14 files changed, 994 insertions, 197 deletions
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 94c2bee3590c..f9e58fd6afa5 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -123,13 +123,15 @@ Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
   return C;
 }
 
-Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) {
-  return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS)));
+Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS, bool Simplify) {
+  auto Cnt = get(CounterExpression(CounterExpression::Add, LHS, RHS));
+  return Simplify ? simplify(Cnt) : Cnt;
 }
 
-Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) {
-  return simplify(
-      get(CounterExpression(CounterExpression::Subtract, LHS, RHS)));
+Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS,
+                                           bool Simplify) {
+  auto Cnt = get(CounterExpression(CounterExpression::Subtract, LHS, RHS));
+  return Simplify ? simplify(Cnt) : Cnt;
 }
 
 void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const {
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index c6691e321b3c..1a187795a8a0 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Object/Archive.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/Error.h"
@@ -174,7 +175,8 @@ Error RawCoverageFilenamesReader::readUncompressed(CovMapVersion Version,
         else
           P.assign(CWD);
         llvm::sys::path::append(P, Filename);
-        Filenames.push_back(static_cast<std::string>(P));
+        sys::path::remove_dots(P, /*remove_dot_dot=*/true);
+        Filenames.push_back(static_cast<std::string>(P.str()));
       }
     }
   }
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index ceb2d7dcb5b9..781a2901dbb9 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -49,12 +49,8 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS, bool Compress) {
   SmallString<128> CompressedStr;
   bool doCompression =
       Compress && zlib::isAvailable() && DoInstrProfNameCompression;
-  if (doCompression) {
-    auto E =
-        zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression);
-    if (E)
-      report_bad_alloc_error("Failed to zlib compress coverage data");
-  }
+  if (doCompression)
+    zlib::compress(FilenamesStr, CompressedStr, zlib::BestSizeCompression);
 
   // ::= <num-filenames>
   //     <uncompressed-len>
diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index 72d1addab01e..feacf40b8d0a 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/ProfileData/GCOV.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Support/Debug.h"
@@ -23,7 +24,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <system_error>
-#include <unordered_map>
 
 using namespace llvm;
 
@@ -663,6 +663,8 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) {
   if (f.startLine >= si.startLineToFunctions.size())
     si.startLineToFunctions.resize(f.startLine + 1);
   si.startLineToFunctions[f.startLine].push_back(&f);
+  SmallSet<uint32_t, 16> lines;
+  SmallSet<uint32_t, 16> linesExec;
   for (const GCOVBlock &b : f.blocksRange()) {
     if (b.lines.empty())
       continue;
@@ -671,9 +673,9 @@ void Context::collectFunction(GCOVFunction &f, Summary &summary) {
       si.lines.resize(maxLineNum + 1);
     for (uint32_t lineNum : b.lines) {
       LineInfo &line = si.lines[lineNum];
-      if (!line.exists)
+      if (lines.insert(lineNum).second)
         ++summary.lines;
-      if (line.count == 0 && b.count)
+      if (b.count && linesExec.insert(lineNum).second)
         ++summary.linesExec;
       line.exists = true;
       line.count += b.count;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 07d467305ae5..48ac5ce0d607 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -51,6 +51,7 @@
 #include <memory>
 #include <string>
 #include <system_error>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
@@ -466,12 +467,8 @@ Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
   }
 
   SmallString<128> CompressedNameStrings;
-  Error E = zlib::compress(StringRef(UncompressedNameStrings),
-                           CompressedNameStrings, zlib::BestSizeCompression);
-  if (E) {
-    consumeError(std::move(E));
-    return make_error<InstrProfError>(instrprof_error::compress_failed);
-  }
+  zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
+                 zlib::BestSizeCompression);
 
   return WriteStringToResult(CompressedNameStrings.size(),
                              CompressedNameStrings);
@@ -1311,4 +1308,76 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
   }
 }
 
+namespace IndexedInstrProf {
+// A C++14 compatible version of the offsetof macro.
+template <typename T1, typename T2>
+inline size_t constexpr offsetOf(T1 T2::*Member) {
+  constexpr T2 Object{};
+  return size_t(&(Object.*Member)) - size_t(&Object);
+}
+
+static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
+  return *reinterpret_cast<const uint64_t *>(Buffer + Offset);
+}
+
+uint64_t Header::formatVersion() const {
+  using namespace support;
+  return endian::byte_swap<uint64_t, little>(Version);
+}
+
+Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
+  using namespace support;
+  static_assert(std::is_standard_layout<Header>::value,
+                "The header should be standard layout type since we use offset "
+                "of fields to read.");
+  Header H;
+
+  H.Magic = read(Buffer, offsetOf(&Header::Magic));
+  // Check the magic number.
+  uint64_t Magic = endian::byte_swap<uint64_t, little>(H.Magic);
+  if (Magic != IndexedInstrProf::Magic)
+    return make_error<InstrProfError>(instrprof_error::bad_magic);
+
+  // Read the version.
+  H.Version = read(Buffer, offsetOf(&Header::Version));
+  if (GET_VERSION(H.formatVersion()) >
+      IndexedInstrProf::ProfVersion::CurrentVersion)
+    return make_error<InstrProfError>(instrprof_error::unsupported_version);
+
+  switch (GET_VERSION(H.formatVersion())) {
+    // When a new field is added in the header add a case statement here to
+    // populate it.
+    static_assert(
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
+        "Please update the reading code below if a new field has been added, "
+        "if not add a case statement to fall through to the latest version.");
+  case 8ull:
+    H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
+    LLVM_FALLTHROUGH;
+  default: // Version7 (when the backwards compatible header was introduced).
+    H.HashType = read(Buffer, offsetOf(&Header::HashType));
+    H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
+  }
+
+  return H;
+}
+
+size_t Header::size() const {
+  switch (GET_VERSION(formatVersion())) {
+    // When a new field is added to the header add a case statement here to
+    // compute the size as offset of the new field + size of the new field. This
+    // relies on the field being added to the end of the list.
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
+                  "Please update the size computation below if a new field has "
+                  "been added to the header, if not add a case statement to "
+                  "fall through to the latest version.");
+  case 8ull:
+    return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
+  default: // Version7 (when the backwards compatible header was introduced).
+    return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset);
+  }
+}
+
+} // namespace IndexedInstrProf
+
 } // end namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 8e38a6869d07..4b8212c546f7 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -7,10 +7,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ProfileData/InstrProfCorrelator.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFLocationExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
 
 #define DEBUG_TYPE "correlator"
 
@@ -279,7 +284,7 @@ void DwarfInstrProfCorrelator<IntPtrT>::correlateProfileDataImpl() {
       LLVM_DEBUG(Die.dump(dbgs()));
     }
     this->addProbe(*FunctionName, *CFGHash, *CounterPtr - CountersStart,
-                   FunctionPtr.getValueOr(0), *NumCounters);
+                   FunctionPtr.value_or(0), *NumCounters);
   };
   for (auto &CU : DICtx->normal_units())
     for (const auto &Entry : CU->dies())
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 138b1532d778..ee8989979a26 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -14,11 +14,11 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -27,7 +27,6 @@
 #include "llvm/Support/SwapByteOrder.h"
 #include "llvm/Support/SymbolRemappingReader.h"
 #include <algorithm>
-#include <cctype>
 #include <cstddef>
 #include <cstdint>
 #include <limits>
@@ -43,13 +42,13 @@ using namespace llvm;
 static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
   InstrProfKind ProfileKind = InstrProfKind::Unknown;
   if (Version & VARIANT_MASK_IR_PROF) {
-    ProfileKind |= InstrProfKind::IR;
+    ProfileKind |= InstrProfKind::IRInstrumentation;
   }
   if (Version & VARIANT_MASK_CSIR_PROF) {
-    ProfileKind |= InstrProfKind::CS;
+    ProfileKind |= InstrProfKind::ContextSensitive;
   }
   if (Version & VARIANT_MASK_INSTR_ENTRY) {
-    ProfileKind |= InstrProfKind::BB;
+    ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
   }
   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
     ProfileKind |= InstrProfKind::SingleByteCoverage;
@@ -57,6 +56,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
   if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
     ProfileKind |= InstrProfKind::FunctionEntryOnly;
   }
+  if (Version & VARIANT_MASK_MEMPROF) {
+    ProfileKind |= InstrProfKind::MemProf;
+  }
   return ProfileKind;
 }
 
@@ -153,14 +155,6 @@ IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
   return std::move(Result);
 }
 
-void InstrProfIterator::Increment() {
-  if (auto E = Reader->readNextRecord(Record)) {
-    // Handle errors in the reader.
-    InstrProfError::take(std::move(E));
-    *this = InstrProfIterator();
-  }
-}
-
 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
   // Verify that this really looks like plain ASCII text by checking a
   // 'reasonable' number of characters (up to profile magic size).
@@ -180,16 +174,16 @@ Error TextInstrProfReader::readHeader() {
   while (Line->startswith(":")) {
     StringRef Str = Line->substr(1);
     if (Str.equals_insensitive("ir"))
-      ProfileKind |= InstrProfKind::IR;
+      ProfileKind |= InstrProfKind::IRInstrumentation;
     else if (Str.equals_insensitive("fe"))
-      ProfileKind |= InstrProfKind::FE;
+      ProfileKind |= InstrProfKind::FrontendInstrumentation;
     else if (Str.equals_insensitive("csir")) {
-      ProfileKind |= InstrProfKind::IR;
-      ProfileKind |= InstrProfKind::CS;
+      ProfileKind |= InstrProfKind::IRInstrumentation;
+      ProfileKind |= InstrProfKind::ContextSensitive;
     } else if (Str.equals_insensitive("entry_first"))
-      ProfileKind |= InstrProfKind::BB;
+      ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
     else if (Str.equals_insensitive("not_entry_first"))
-      ProfileKind &= ~InstrProfKind::BB;
+      ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
     else
       return error(instrprof_error::bad_header);
     ++Line;
@@ -454,7 +448,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
     return error(instrprof_error::bad_header);
 
   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
-  if (Error E = createSymtab(*NewSymtab.get()))
+  if (Error E = createSymtab(*NewSymtab))
     return E;
 
   Symtab = std::move(NewSymtab);
@@ -942,24 +936,17 @@ Error IndexedInstrProfReader::readHeader() {
   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
     return error(instrprof_error::truncated);
 
-  auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
-  Cur += sizeof(IndexedInstrProf::Header);
+  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
+  if (!HeaderOr)
+    return HeaderOr.takeError();
 
-  // Check the magic number.
-  uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
-  if (Magic != IndexedInstrProf::Magic)
-    return error(instrprof_error::bad_magic);
-
-  // Read the version.
-  uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
-  if (GET_VERSION(FormatVersion) >
-      IndexedInstrProf::ProfVersion::CurrentVersion)
-    return error(instrprof_error::unsupported_version);
+  const IndexedInstrProf::Header *Header = &HeaderOr.get();
+  Cur += Header->size();
 
-  Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+  Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
                     /* UseCS */ false);
-  if (FormatVersion & VARIANT_MASK_CSIR_PROF)
-    Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur,
+  if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF)
+    Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
                       /* UseCS */ true);
 
   // Read the hash type and start offset.
@@ -970,10 +957,46 @@ Error IndexedInstrProfReader::readHeader() {
 
   uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
 
-  // The rest of the file is an on disk hash table.
-  auto IndexPtr =
-      std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
-          Start + HashOffset, Cur, Start, HashType, FormatVersion);
+  // The hash table with profile counts comes next.
+  auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
+      Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
+
+  // The MemProfOffset field in the header is only valid when the format version
+  // is higher than 8 (when it was introduced).
+  if (GET_VERSION(Header->formatVersion()) >= 8 &&
+      Header->formatVersion() & VARIANT_MASK_MEMPROF) {
+    uint64_t MemProfOffset =
+        endian::byte_swap<uint64_t, little>(Header->MemProfOffset);
+
+    const unsigned char *Ptr = Start + MemProfOffset;
+    // The value returned from RecordTableGenerator.Emit.
+    const uint64_t RecordTableOffset =
+        support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+    // The offset in the stream right before invoking FrameTableGenerator.Emit.
+    const uint64_t FramePayloadOffset =
+        support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+    // The value returned from FrameTableGenerator.Emit.
+    const uint64_t FrameTableOffset =
+        support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+
+    // Read the schema.
+    auto SchemaOr = memprof::readMemProfSchema(Ptr);
+    if (!SchemaOr)
+      return SchemaOr.takeError();
+    Schema = SchemaOr.get();
+
+    // Now initialize the table reader with a pointer into data buffer.
+    MemProfRecordTable.reset(MemProfRecordHashTable::Create(
+        /*Buckets=*/Start + RecordTableOffset,
+        /*Payload=*/Ptr,
+        /*Base=*/Start, memprof::RecordLookupTrait(Schema)));
+
+    // Initialize the frame table reader with the payload and bucket offsets.
+    MemProfFrameTable.reset(MemProfFrameHashTable::Create(
+        /*Buckets=*/Start + FrameTableOffset,
+        /*Payload=*/Start + FramePayloadOffset,
+        /*Base=*/Start, memprof::FrameLookupTrait()));
+  }
 
   // Load the remapping table now if requested.
   if (RemappingBuffer) {
@@ -991,16 +1014,16 @@ Error IndexedInstrProfReader::readHeader() {
 }
 
 InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
-  if (Symtab.get())
-    return *Symtab.get();
+  if (Symtab)
+    return *Symtab;
 
   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
-  if (Error E = Index->populateSymtab(*NewSymtab.get())) {
+  if (Error E = Index->populateSymtab(*NewSymtab)) {
     consumeError(error(InstrProfError::take(std::move(E))));
   }
 
   Symtab = std::move(NewSymtab);
-  return *Symtab.get();
+  return *Symtab;
 }
 
 Expected<InstrProfRecord>
@@ -1019,6 +1042,43 @@ IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
   return error(instrprof_error::hash_mismatch);
 }
 
+Expected<memprof::MemProfRecord>
+IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) {
+  // TODO: Add memprof specific errors.
+  if (MemProfRecordTable == nullptr)
+    return make_error<InstrProfError>(instrprof_error::invalid_prof,
+                                      "no memprof data available in profile");
+  auto Iter = MemProfRecordTable->find(FuncNameHash);
+  if (Iter == MemProfRecordTable->end())
+    return make_error<InstrProfError>(
+        instrprof_error::unknown_function,
+        "memprof record not found for function hash " + Twine(FuncNameHash));
+
+  // Setup a callback to convert from frame ids to frame using the on-disk
+  // FrameData hash table.
+  memprof::FrameId LastUnmappedFrameId = 0;
+  bool HasFrameMappingError = false;
+  auto IdToFrameCallback = [&](const memprof::FrameId Id) {
+    auto FrIter = MemProfFrameTable->find(Id);
+    if (FrIter == MemProfFrameTable->end()) {
+      LastUnmappedFrameId = Id;
+      HasFrameMappingError = true;
+      return memprof::Frame(0, 0, 0, false);
+    }
+    return *FrIter;
+  };
+
+  memprof::MemProfRecord Record(*Iter, IdToFrameCallback);
+
+  // Check that all frame ids were successfully converted to frames.
+  if (HasFrameMappingError) {
+    return make_error<InstrProfError>(instrprof_error::hash_mismatch,
+                                      "memprof frame not found for frame id " +
+                                          Twine(LastUnmappedFrameId));
+  }
+  return Record;
+}
+
 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
                                                 uint64_t FuncHash,
                                                 std::vector<uint64_t> &Counts) {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 8ded1c0426e5..cd4e8900c963 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
@@ -23,7 +24,6 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/OnDiskHashTable.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cstdint>
 #include <memory>
 #include <string>
@@ -32,7 +32,6 @@
 #include <vector>
 
 using namespace llvm;
-extern cl::opt<bool> DebugInfoCorrelate;
 
 // A struct to define how the data stream should be patched. For Indexed
 // profiling, only uint64_t data type is needed.
@@ -64,11 +63,16 @@ public:
 
     if (IsFDOStream) {
       raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
+      const uint64_t LastPos = FDOStream.tell();
       for (int K = 0; K < NItems; K++) {
         FDOStream.seek(P[K].Pos);
         for (int I = 0; I < P[K].N; I++)
           write(P[K].D[I]);
       }
+      // Reset the stream to the last position after patching so that users
+      // don't accidentally overwrite data. This makes it consistent with
+      // the string stream below which replaces the data directly.
+      FDOStream.seek(LastPos);
     } else {
       raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
       std::string &Data = SOStream.str(); // with flush
@@ -249,11 +253,51 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
   Dest.sortValueData();
 }
 
+void InstrProfWriter::addMemProfRecord(
+    const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {
+  auto Result = MemProfRecordData.insert({Id, Record});
+  // If we inserted a new record then we are done.
+  if (Result.second) {
+    return;
+  }
+  memprof::IndexedMemProfRecord &Existing = Result.first->second;
+  Existing.merge(Record);
+}
+
+bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
+                                      const memprof::Frame &Frame,
+                                      function_ref<void(Error)> Warn) {
+  auto Result = MemProfFrameData.insert({Id, Frame});
+  // If a mapping already exists for the current frame id and it does not
+  // match the new mapping provided then reset the existing contents and bail
+  // out. We don't support the merging of memprof data whose Frame -> Id
+  // mapping across profiles is inconsistent.
+  if (!Result.second && Result.first->second != Frame) {
+    Warn(make_error<InstrProfError>(instrprof_error::malformed,
+                                    "frame to id mapping mismatch"));
+    return false;
+  }
+  return true;
+}
+
 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
                                              function_ref<void(Error)> Warn) {
   for (auto &I : IPW.FunctionData)
     for (auto &Func : I.getValue())
       addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
+
+  MemProfFrameData.reserve(IPW.MemProfFrameData.size());
+  for (auto &I : IPW.MemProfFrameData) {
+    // If we weren't able to add the frame mappings then it doesn't make sense
+    // to try to merge the records from this profile.
+    if (!addMemProfFrame(I.first, I.second, Warn))
+      return;
+  }
+
+  MemProfRecordData.reserve(IPW.MemProfRecordData.size());
+  for (auto &I : IPW.MemProfRecordData) {
+    addMemProfRecord(I.first, I.second);
+  }
 }
 
 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
@@ -298,30 +342,34 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   for (const auto &I : FunctionData)
     if (shouldEncodeData(I.getValue()))
       Generator.insert(I.getKey(), &I.getValue());
+
   // Write the header.
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
   Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
-  if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
+  if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
     Header.Version |= VARIANT_MASK_IR_PROF;
-  if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
+  if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
     Header.Version |= VARIANT_MASK_CSIR_PROF;
-  if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
+  if (static_cast<bool>(ProfileKind &
+                        InstrProfKind::FunctionEntryInstrumentation))
     Header.Version |= VARIANT_MASK_INSTR_ENTRY;
   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
     Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
   if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
     Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+  if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
+    Header.Version |= VARIANT_MASK_MEMPROF;
 
   Header.Unused = 0;
   Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
   Header.HashOffset = 0;
+  Header.MemProfOffset = 0;
   int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
 
-  // Only write out all the fields except 'HashOffset'. We need
-  // to remember the offset of that field to allow back patching
-  // later.
-  for (int I = 0; I < N - 1; I++)
+  // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We
+  // need to remember the offset of these fields to allow back patching later.
+  for (int I = 0; I < N - 2; I++)
     OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
 
   // Save the location of Header.HashOffset field in \c OS.
@@ -329,6 +377,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   // Reserve the space for HashOffset field.
   OS.write(0);
 
+  // Save the location of MemProf profile data. This is stored in two parts as
+  // the schema and as a separate on-disk chained hashtable.
+  uint64_t MemProfSectionOffset = OS.tell();
+  // Reserve space for the MemProf table field to be patched later if this
+  // profile contains memory profile information.
+  OS.write(0);
+
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -338,7 +393,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
     OS.write(0);
   uint64_t CSSummaryOffset = 0;
   uint64_t CSSummarySize = 0;
-  if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
+  if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
     CSSummaryOffset = OS.tell();
     CSSummarySize = SummarySize / sizeof(uint64_t);
     for (unsigned I = 0; I < CSSummarySize; I++)
@@ -348,6 +403,63 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   // Write the hash table.
   uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
 
+  // Write the MemProf profile data if we have it. This includes a simple schema
+  // with the format described below followed by the hashtable:
+  // uint64_t RecordTableOffset = RecordTableGenerator.Emit
+  // uint64_t FramePayloadOffset = Stream offset before emitting the frame table
+  // uint64_t FrameTableOffset = FrameTableGenerator.Emit
+  // uint64_t Num schema entries
+  // uint64_t Schema entry 0
+  // uint64_t Schema entry 1
+  // ....
+  // uint64_t Schema entry N - 1
+  // OnDiskChainedHashTable MemProfRecordData
+  // OnDiskChainedHashTable MemProfFrameData
+  uint64_t MemProfSectionStart = 0;
+  if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
+    MemProfSectionStart = OS.tell();
+    OS.write(0ULL); // Reserve space for the memprof record table offset.
+    OS.write(0ULL); // Reserve space for the memprof frame payload offset.
+    OS.write(0ULL); // Reserve space for the memprof frame table offset.
+
+    auto Schema = memprof::PortableMemInfoBlock::getSchema();
+    OS.write(static_cast<uint64_t>(Schema.size()));
+    for (const auto Id : Schema) {
+      OS.write(static_cast<uint64_t>(Id));
+    }
+
+    auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>();
+    RecordWriter->Schema = &Schema;
+    OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
+        RecordTableGenerator;
+    for (auto &I : MemProfRecordData) {
+      // Insert the key (func hash) and value (memprof record).
+      RecordTableGenerator.insert(I.first, I.second);
+    }
+
+    uint64_t RecordTableOffset =
+        RecordTableGenerator.Emit(OS.OS, *RecordWriter);
+
+    uint64_t FramePayloadOffset = OS.tell();
+
+    auto FrameWriter = std::make_unique<memprof::FrameWriterTrait>();
+    OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
+        FrameTableGenerator;
+    for (auto &I : MemProfFrameData) {
+      // Insert the key (frame id) and value (frame contents).
+      FrameTableGenerator.insert(I.first, I.second);
+    }
+
+    uint64_t FrameTableOffset = FrameTableGenerator.Emit(OS.OS, *FrameWriter);
+
+    PatchItem PatchItems[] = {
+        {MemProfSectionStart, &RecordTableOffset, 1},
+        {MemProfSectionStart + sizeof(uint64_t), &FramePayloadOffset, 1},
+        {MemProfSectionStart + 2 * sizeof(uint64_t), &FrameTableOffset, 1},
+    };
+    OS.patch(PatchItems, 3);
+  }
+
   // Allocate space for data to be serialized out.
   std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
       IndexedInstrProf::allocSummary(SummarySize);
@@ -359,7 +471,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
 
   // For Context Sensitive summary.
   std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr;
-  if (static_cast<bool>(ProfileKind & InstrProfKind::CS)) {
+  if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) {
     TheCSSummary = IndexedInstrProf::allocSummary(SummarySize);
     std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary();
     setSummary(TheCSSummary.get(), *CSPS);
@@ -370,6 +482,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   PatchItem PatchItems[] = {
       // Patch the Header.HashOffset field.
       {HashTableStartFieldOffset, &HashTableStart, 1},
+      // Patch the Header.MemProfOffset (=0 for profiles without MemProf data).
+      {MemProfSectionOffset, &MemProfSectionStart, 1},
       // Patch the summary data.
       {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
        (int)(SummarySize / sizeof(uint64_t))},
@@ -472,12 +586,13 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash,
 
 Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
   // Check CS first since it implies an IR level profile.
-  if (static_cast<bool>(ProfileKind & InstrProfKind::CS))
+  if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
     OS << "# CSIR level Instrumentation Flag\n:csir\n";
-  else if (static_cast<bool>(ProfileKind & InstrProfKind::IR))
+  else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
     OS << "# IR level Instrumentation Flag\n:ir\n";
 
-  if (static_cast<bool>(ProfileKind & InstrProfKind::BB))
+  if (static_cast<bool>(ProfileKind &
+                        InstrProfKind::FunctionEntryInstrumentation))
     OS << "# Always instrument the function entry block\n:entry_first\n";
   InstrProfSymtab Symtab;
 
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
new file mode 100644
index 000000000000..3d44cf0b4c37
--- /dev/null
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -0,0 +1,110 @@
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
+
+namespace llvm {
+namespace memprof {
+
+void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
+                                     raw_ostream &OS) {
+  using namespace support;
+
+  endian::Writer LE(OS, little);
+
+  LE.write<uint64_t>(AllocSites.size());
+  for (const IndexedAllocationInfo &N : AllocSites) {
+    LE.write<uint64_t>(N.CallStack.size());
+    for (const FrameId &Id : N.CallStack)
+      LE.write<FrameId>(Id);
+    N.Info.serialize(Schema, OS);
+  }
+
+  // Related contexts.
+  LE.write<uint64_t>(CallSites.size());
+  for (const auto &Frames : CallSites) {
+    LE.write<uint64_t>(Frames.size());
+    for (const FrameId &Id : Frames)
+      LE.write<FrameId>(Id);
+  }
+}
+
+IndexedMemProfRecord
+IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
+                                  const unsigned char *Ptr) {
+  using namespace support;
+
+  IndexedMemProfRecord Record;
+
+  // Read the meminfo nodes.
+  const uint64_t NumNodes = endian::readNext<uint64_t, little, unaligned>(Ptr);
+  for (uint64_t I = 0; I < NumNodes; I++) {
+    IndexedAllocationInfo Node;
+    const uint64_t NumFrames =
+        endian::readNext<uint64_t, little, unaligned>(Ptr);
+    for (uint64_t J = 0; J < NumFrames; J++) {
+      const FrameId Id = endian::readNext<FrameId, little, unaligned>(Ptr);
+      Node.CallStack.push_back(Id);
+    }
+    Node.Info.deserialize(Schema, Ptr);
+    Ptr += PortableMemInfoBlock::serializedSize();
+    Record.AllocSites.push_back(Node);
+  }
+
+  // Read the callsite information.
+  const uint64_t NumCtxs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+  for (uint64_t J = 0; J < NumCtxs; J++) {
+    const uint64_t NumFrames =
+        endian::readNext<uint64_t, little, unaligned>(Ptr);
+    llvm::SmallVector<FrameId> Frames;
+    Frames.reserve(NumFrames);
+    for (uint64_t K = 0; K < NumFrames; K++) {
+      const FrameId Id = endian::readNext<FrameId, little, unaligned>(Ptr);
+      Frames.push_back(Id);
+    }
+    Record.CallSites.push_back(Frames);
+  }
+
+  return Record;
+}
+
+GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
+  const auto Pos = FunctionName.find(".llvm.");
+
+  // We use the function guid which we expect to be a uint64_t. At
+  // this time, it is the lower 64 bits of the md5 of the function
+  // name. Any suffix with .llvm. is trimmed since these are added by
+  // thinLTO global promotion. At the time the profile is consumed,
+  // these suffixes will not be present.
+  return Function::getGUID(FunctionName.take_front(Pos));
+}
+
+Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
+  using namespace support;
+
+  const unsigned char *Ptr = Buffer;
+  const uint64_t NumSchemaIds =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
+    return make_error<InstrProfError>(instrprof_error::malformed,
+                                      "memprof schema invalid");
+  }
+
+  MemProfSchema Result;
+  for (size_t I = 0; I < NumSchemaIds; I++) {
+    const uint64_t Tag = endian::readNext<uint64_t, little, unaligned>(Ptr);
+    if (Tag >= static_cast<uint64_t>(Meta::Size)) {
+      return make_error<InstrProfError>(instrprof_error::malformed,
+                                        "memprof schema invalid");
+    }
+    Result.push_back(static_cast<Meta>(Tag));
+  }
+  // Advace the buffer to one past the schema if we succeeded.
+  Buffer = Ptr;
+  return Result;
+}
+
+} // namespace memprof
+} // namespace llvm
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index bbb640cfaee8..755e25b355a8 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -10,20 +10,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Type.h"
+#include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
-#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
 cl::opt<bool> UseContextLessSummary(
-    "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
+    "profile-summary-contextless", cl::Hidden,
     cl::desc("Merge context profiles before calculating thresholds."));
 
 // The following two parameters determine the threshold for a count to be
@@ -34,38 +30,38 @@ cl::opt<bool> UseContextLessSummary(
 // threshold for determining cold count (everything <= this threshold is
 // considered cold).
 cl::opt<int> ProfileSummaryCutoffHot(
-    "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
+    "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000),
     cl::desc("A count is hot if it exceeds the minimum count to"
              " reach this percentile of total counts."));
 
 cl::opt<int> ProfileSummaryCutoffCold(
-    "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
+    "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999),
     cl::desc("A count is cold if it is below the minimum count"
              " to reach this percentile of total counts."));
 
 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
     "profile-summary-huge-working-set-size-threshold", cl::Hidden,
-    cl::init(15000), cl::ZeroOrMore,
+    cl::init(15000),
     cl::desc("The code working set size is considered huge if the number of"
              " blocks required to reach the -profile-summary-cutoff-hot"
              " percentile exceeds this count."));
 
 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
     "profile-summary-large-working-set-size-threshold", cl::Hidden,
-    cl::init(12500), cl::ZeroOrMore,
+    cl::init(12500),
     cl::desc("The code working set size is considered large if the number of"
              " blocks required to reach the -profile-summary-cutoff-hot"
              " percentile exceeds this count."));
 
 // The next two options override the counts derived from summary computation and
 // are useful for debugging purposes.
-cl::opt<int> ProfileSummaryHotCount(
-    "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
+cl::opt<uint64_t> ProfileSummaryHotCount(
+    "profile-summary-hot-count", cl::ReallyHidden,
     cl::desc("A fixed hot count that overrides the count derived from"
              " profile-summary-cutoff-hot"));
 
-cl::opt<int> ProfileSummaryColdCount(
-    "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
+cl::opt<uint64_t> ProfileSummaryColdCount(
+    "profile-summary-cold-count", cl::ReallyHidden,
     cl::desc("A fixed cold count that overrides the count derived from"
              " profile-summary-cutoff-cold"));
 
@@ -110,7 +106,13 @@ void SampleProfileSummaryBuilder::addRecord(
     NumFunctions++;
     if (FS.getHeadSamples() > MaxFunctionCount)
       MaxFunctionCount = FS.getHeadSamples();
+  } else if (FS.getContext().hasAttribute(
+                 sampleprof::ContextDuplicatedIntoBase)) {
+    // Do not recount callee samples if they are already merged into their base
+    // profiles. This can happen to CS nested profile.
+    return;
   }
+
   for (const auto &I : FS.getBodySamples()) {
     uint64_t Count = I.second.getSamples();
       addCount(Count);
@@ -194,7 +196,7 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
   // more function profiles each with lower counts, which in turn leads to lower
   // hot thresholds. To compensate for that, by default we merge context
   // profiles before computing profile summary.
-  if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCSFlat &&
+  if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
                                 !UseContextLessSummary.getNumOccurrences())) {
     for (const auto &I : Profiles) {
       ContextLessProfiles[I.second.getName()].merge(I.second);
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index f8d13c74fac3..2423fd38e9a2 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -10,69 +10,55 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <algorithm>
 #include <cstdint>
+#include <memory>
 #include <type_traits>
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Path.h"
+
+#define DEBUG_TYPE "memprof"
 
 namespace llvm {
 namespace memprof {
 namespace {
-
-struct Summary {
-  uint64_t Version;
-  uint64_t TotalSizeBytes;
-  uint64_t NumSegments;
-  uint64_t NumMIBInfo;
-  uint64_t NumStackOffsets;
-};
-
 template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
   static_assert(std::is_pod<T>::value, "Not a pod type.");
   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
   return *reinterpret_cast<const T *>(Ptr);
 }
 
-Summary computeSummary(const char *Start) {
-  auto *H = reinterpret_cast<const Header *>(Start);
-
-  // Check alignment while reading the number of items in each section.
-  return Summary{
-      H->Version,
-      H->TotalSize,
-      alignedRead(Start + H->SegmentOffset),
-      alignedRead(Start + H->MIBOffset),
-      alignedRead(Start + H->StackOffset),
-  };
-}
-
-} // namespace
-
-Expected<std::unique_ptr<RawMemProfReader>>
-RawMemProfReader::create(const Twine &Path) {
-  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
-  if (std::error_code EC = BufferOr.getError())
-    return errorCodeToError(EC);
-
-  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+Error checkBuffer(const MemoryBuffer &Buffer) {
+  if (!RawMemProfReader::hasFormat(Buffer))
+    return make_error<InstrProfError>(instrprof_error::bad_magic);
 
-  if (Buffer->getBufferSize() == 0)
+  if (Buffer.getBufferSize() == 0)
     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
 
-  if (!RawMemProfReader::hasFormat(*Buffer))
-    return make_error<InstrProfError>(instrprof_error::bad_magic);
-
-  if (Buffer->getBufferSize() < sizeof(Header)) {
+  if (Buffer.getBufferSize() < sizeof(Header)) {
     return make_error<InstrProfError>(instrprof_error::truncated);
   }
 
   // The size of the buffer can be > header total size since we allow repeated
   // serialization of memprof profiles to the same file.
   uint64_t TotalSize = 0;
-  const char *Next = Buffer->getBufferStart();
-  while (Next < Buffer->getBufferEnd()) {
+  const char *Next = Buffer.getBufferStart();
+  while (Next < Buffer.getBufferEnd()) {
     auto *H = reinterpret_cast<const Header *>(Next);
     if (H->Version != MEMPROF_RAW_VERSION) {
       return make_error<InstrProfError>(instrprof_error::unsupported_version);
@@ -82,11 +68,143 @@ RawMemProfReader::create(const Twine &Path) {
     Next += H->TotalSize;
   }
 
-  if (Buffer->getBufferSize() != TotalSize) {
+  if (Buffer.getBufferSize() != TotalSize) {
     return make_error<InstrProfError>(instrprof_error::malformed);
   }
+  return Error::success();
+}
+
+llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  llvm::SmallVector<SegmentEntry> Items;
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    Items.push_back(*reinterpret_cast<const SegmentEntry *>(
+        Ptr + I * sizeof(SegmentEntry)));
+  }
+  return Items;
+}
+
+llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+readMemInfoBlocks(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
+    const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
+    Items.push_back({Id, MIB});
+    // Only increment by size of MIB since readNext implicitly increments.
+    Ptr += sizeof(MemInfoBlock);
+  }
+  return Items;
+}
+
+CallStackMap readStackInfo(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  CallStackMap Items;
+
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
+    const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+
+    SmallVector<uint64_t> CallStack;
+    for (uint64_t J = 0; J < NumPCs; J++) {
+      CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
+    }
+
+    Items[StackId] = CallStack;
+  }
+  return Items;
+}
+
+// Merges the contents of stack information in \p From to \p To. Returns true if
+// any stack ids observed previously map to a different set of program counter
+// addresses.
+bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
+  for (const auto &IdStack : From) {
+    auto I = To.find(IdStack.first);
+    if (I == To.end()) {
+      To[IdStack.first] = IdStack.second;
+    } else {
+      // Check that the PCs are the same (in order).
+      if (IdStack.second != I->second)
+        return true;
+    }
+  }
+  return false;
+}
 
-  return std::make_unique<RawMemProfReader>(std::move(Buffer));
+Error report(Error E, const StringRef Context) {
+  return joinErrors(createStringError(inconvertibleErrorCode(), Context),
+                    std::move(E));
+}
+
+bool isRuntimePath(const StringRef Path) {
+  return StringRef(llvm::sys::path::convert_to_slash(Path))
+      .contains("memprof/memprof_");
+}
+
+std::string getBuildIdString(const SegmentEntry &Entry) {
+  constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
+  constexpr uint8_t Zeros[Size] = {0};
+  // If the build id is unset print a helpful string instead of all zeros.
+  if (memcmp(Entry.BuildId, Zeros, Size) == 0)
+    return "<None>";
+
+  std::string Str;
+  raw_string_ostream OS(Str);
+  for (size_t I = 0; I < Size; I++) {
+    OS << format_hex_no_prefix(Entry.BuildId[I], 2);
+  }
+  return OS.str();
+}
+} // namespace
+
+Expected<std::unique_ptr<RawMemProfReader>>
+RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
+                         bool KeepName) {
+  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+  if (std::error_code EC = BufferOr.getError())
+    return report(errorCodeToError(EC), Path.getSingleStringRef());
+
+  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+  if (Error E = checkBuffer(*Buffer))
+    return report(std::move(E), Path.getSingleStringRef());
+
+  if (ProfiledBinary.empty())
+    return report(
+        errorCodeToError(make_error_code(std::errc::invalid_argument)),
+        "Path to profiled binary is empty!");
+
+  auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
+  if (!BinaryOr) {
+    return report(BinaryOr.takeError(), ProfiledBinary);
+  }
+
+  // Use new here since constructor is private.
+  std::unique_ptr<RawMemProfReader> Reader(
+      new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
+  if (Error E = Reader->initialize(std::move(Buffer))) {
+    return std::move(E);
+  }
+  return std::move(Reader);
+}
+
+bool RawMemProfReader::hasFormat(const StringRef Path) {
+  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+  if (!BufferOr)
+    return false;
+
+  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+  return hasFormat(*Buffer);
 }
 
 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
@@ -98,24 +216,343 @@ bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
   return Magic == MEMPROF_RAW_MAGIC_64;
 }
 
-void RawMemProfReader::printSummaries(raw_ostream &OS) const {
-  int Count = 0;
+void RawMemProfReader::printYAML(raw_ostream &OS) {
+  uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
+  for (const auto &KV : FunctionProfileData) {
+    const size_t NumAllocSites = KV.second.AllocSites.size();
+    if (NumAllocSites > 0) {
+      NumAllocFunctions++;
+      NumMibInfo += NumAllocSites;
+    }
+  }
+
+  OS << "MemprofProfile:\n";
+  OS << "  Summary:\n";
+  OS << "    Version: " << MEMPROF_RAW_VERSION << "\n";
+  OS << "    NumSegments: " << SegmentInfo.size() << "\n";
+  OS << "    NumMibInfo: " << NumMibInfo << "\n";
+  OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
+  OS << "    NumStackOffsets: " << StackMap.size() << "\n";
+  // Print out the segment information.
+  OS << "  Segments:\n";
+  for (const auto &Entry : SegmentInfo) {
+    OS << "  -\n";
+    OS << "    BuildId: " << getBuildIdString(Entry) << "\n";
+    OS << "    Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
+    OS << "    End: 0x" << llvm::utohexstr(Entry.End) << "\n";
+    OS << "    Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
+  }
+  // Print out the merged contents of the profiles.
+  OS << "  Records:\n";
+  for (const auto &Entry : *this) {
+    OS << "  -\n";
+    OS << "    FunctionGUID: " << Entry.first << "\n";
+    Entry.second.print(OS);
+  }
+}
+
+Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
+  const StringRef FileName = Binary.getBinary()->getFileName();
+
+  auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
+  if (!ElfObject) {
+    return report(make_error<StringError>(Twine("Not an ELF file: "),
+                                          inconvertibleErrorCode()),
+                  FileName);
+  }
+
+  // Check whether the profiled binary was built with position independent code
+  // (PIC). For now we provide a error message until symbolization support
+  // is added for pic.
+  auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
+  const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
+  auto PHdrsOr = ElfFile.program_headers();
+  if(!PHdrsOr) 
+    return report(make_error<StringError>(Twine("Could not read program headers: "),
+                                          inconvertibleErrorCode()),
+                  FileName);
+  auto FirstLoadHeader = PHdrsOr->begin();
+  while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD)
+    ++FirstLoadHeader;
+  if(FirstLoadHeader->p_vaddr == 0)
+    return report(make_error<StringError>(Twine("Unsupported position independent code"),
+                                          inconvertibleErrorCode()),
+                  FileName);
+
+  auto Triple = ElfObject->makeTriple();
+  if (!Triple.isX86())
+    return report(make_error<StringError>(Twine("Unsupported target: ") +
+                                              Triple.getArchName(),
+                                          inconvertibleErrorCode()),
+                  FileName);
+
+  auto *Object = cast<object::ObjectFile>(Binary.getBinary());
+  std::unique_ptr<DIContext> Context = DWARFContext::create(
+      *Object, DWARFContext::ProcessDebugRelocations::Process);
+
+  auto SOFOr = symbolize::SymbolizableObjectFile::create(
+      Object, std::move(Context), /*UntagAddresses=*/false);
+  if (!SOFOr)
+    return report(SOFOr.takeError(), FileName);
+  Symbolizer = std::move(SOFOr.get());
+
+  if (Error E = readRawProfile(std::move(DataBuffer)))
+    return E;
+
+  if (Error E = symbolizeAndFilterStackFrames())
+    return E;
+
+  return mapRawProfileToRecords();
+}
+
+Error RawMemProfReader::mapRawProfileToRecords() {
+  // Hold a mapping from function to each callsite location we encounter within
+  // it that is part of some dynamic allocation context. The location is stored
+  // as a pointer to a symbolized list of inline frames.
+  using LocationPtr = const llvm::SmallVector<FrameId> *;
+  llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
+      PerFunctionCallSites;
+
+  // Convert the raw profile callstack data into memprof records. While doing so
+  // keep track of related contexts so that we can fill these in later.
+  for (const auto &Entry : CallstackProfileData) {
+    const uint64_t StackId = Entry.first;
+
+    auto It = StackMap.find(StackId);
+    if (It == StackMap.end())
+      return make_error<InstrProfError>(
+          instrprof_error::malformed,
+          "memprof callstack record does not contain id: " + Twine(StackId));
+
+    // Construct the symbolized callstack.
+    llvm::SmallVector<FrameId> Callstack;
+    Callstack.reserve(It->getSecond().size());
+
+    llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
+    for (size_t I = 0; I < Addresses.size(); I++) {
+      const uint64_t Address = Addresses[I];
+      assert(SymbolizedFrame.count(Address) > 0 &&
+             "Address not found in SymbolizedFrame map");
+      const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
+
+      assert(!idToFrame(Frames.back()).IsInlineFrame &&
+             "The last frame should not be inlined");
+
+      // Record the callsites for each function. Skip the first frame of the
+      // first address since it is the allocation site itself that is recorded
+      // as an alloc site.
+      for (size_t J = 0; J < Frames.size(); J++) {
+        if (I == 0 && J == 0)
+          continue;
+        // We attach the entire bottom-up frame here for the callsite even
+        // though we only need the frames up to and including the frame for
+        // Frames[J].Function. This will enable better deduplication for
+        // compression in the future.
+        const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
+        PerFunctionCallSites[Guid].insert(&Frames);
+      }
+
+      // Add all the frames to the current allocation callstack.
+      Callstack.append(Frames.begin(), Frames.end());
+    }
+
+    // We attach the memprof record to each function bottom-up including the
+    // first non-inline frame.
+    for (size_t I = 0; /*Break out using the condition below*/; I++) {
+      const Frame &F = idToFrame(Callstack[I]);
+      auto Result =
+          FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
+      IndexedMemProfRecord &Record = Result.first->second;
+      Record.AllocSites.emplace_back(Callstack, Entry.second);
+
+      if (!F.IsInlineFrame)
+        break;
+    }
+  }
+
+  // Fill in the related callsites per function.
+  for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
+       I != E; I++) {
+    const GlobalValue::GUID Id = I->first;
+    // Some functions may have only callsite data and no allocation data. Here
+    // we insert a new entry for callsite data if we need to.
+    auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
+    IndexedMemProfRecord &Record = Result.first->second;
+    for (LocationPtr Loc : I->getSecond()) {
+      Record.CallSites.push_back(*Loc);
+    }
+  }
+
+  return Error::success();
+}
+
+Error RawMemProfReader::symbolizeAndFilterStackFrames() {
+  // The specifier to use when symbolization is requested.
+  const DILineInfoSpecifier Specifier(
+      DILineInfoSpecifier::FileLineInfoKind::RawValue,
+      DILineInfoSpecifier::FunctionNameKind::LinkageName);
+
+  // For entries where all PCs in the callstack are discarded, we erase the
+  // entry from the stack map.
+  llvm::SmallVector<uint64_t> EntriesToErase;
+  // We keep track of all prior discarded entries so that we can avoid invoking
+  // the symbolizer for such entries.
+  llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
+  for (auto &Entry : StackMap) {
+    for (const uint64_t VAddr : Entry.getSecond()) {
+      // Check if we have already symbolized and cached the result or if we
+      // don't want to attempt symbolization since we know this address is bad.
+      // In this case the address is also removed from the current callstack.
+      if (SymbolizedFrame.count(VAddr) > 0 ||
+          AllVAddrsToDiscard.contains(VAddr))
+        continue;
+
+      Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
+          getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
+      if (!DIOr)
+        return DIOr.takeError();
+      DIInliningInfo DI = DIOr.get();
+
+      // Drop frames which we can't symbolize or if they belong to the runtime.
+      if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
+          isRuntimePath(DI.getFrame(0).FileName)) {
+        AllVAddrsToDiscard.insert(VAddr);
+        continue;
+      }
+
+      for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
+           I++) {
+        const auto &DIFrame = DI.getFrame(I);
+        const uint64_t Guid =
+            IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
+        const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
+                      // Only the last entry is not an inlined location.
+                      I != NumFrames - 1);
+        // Here we retain a mapping from the GUID to symbol name instead of
+        // adding it to the frame object directly to reduce memory overhead.
+        // This is because there can be many unique frames, particularly for
+        // callsite frames.
+        if (KeepSymbolName)
+          GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+
+        const FrameId Hash = F.hash();
+        IdToFrame.insert({Hash, F});
+        SymbolizedFrame[VAddr].push_back(Hash);
+      }
+    }
+
+    auto &CallStack = Entry.getSecond();
+    llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
+      return AllVAddrsToDiscard.contains(A);
+    });
+    if (CallStack.empty())
+      EntriesToErase.push_back(Entry.getFirst());
+  }
+
+  // Drop the entries where the callstack is empty.
+  for (const uint64_t Id : EntriesToErase) {
+    StackMap.erase(Id);
+    CallstackProfileData.erase(Id);
+  }
+
+  if (StackMap.empty())
+    return make_error<InstrProfError>(
+        instrprof_error::malformed,
+        "no entries in callstack map after symbolization");
+
+  return Error::success();
+}
+
+Error RawMemProfReader::readRawProfile(
+    std::unique_ptr<MemoryBuffer> DataBuffer) {
   const char *Next = DataBuffer->getBufferStart();
+
   while (Next < DataBuffer->getBufferEnd()) {
-    auto Summary = computeSummary(Next);
-    OS << "MemProf Profile " << ++Count << "\n";
-    OS << "  Version: " << Summary.Version << "\n";
-    OS << "  TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
-    OS << "  NumSegments: " << Summary.NumSegments << "\n";
-    OS << "  NumMIBInfo: " << Summary.NumMIBInfo << "\n";
-    OS << "  NumStackOffsets: " << Summary.NumStackOffsets << "\n";
-    // TODO: Print the build ids once we can record them using the
-    // sanitizer_procmaps library for linux.
+    auto *Header = reinterpret_cast<const memprof::Header *>(Next);
 
-    auto *H = reinterpret_cast<const Header *>(Next);
-    Next += H->TotalSize;
+    // Read in the segment information, check whether its the same across all
+    // profiles in this binary file.
+    const llvm::SmallVector<SegmentEntry> Entries =
+        readSegmentEntries(Next + Header->SegmentOffset);
+    if (!SegmentInfo.empty() && SegmentInfo != Entries) {
+      // We do not expect segment information to change when deserializing from
+      // the same binary profile file. This can happen if dynamic libraries are
+      // loaded/unloaded between profile dumping.
+      return make_error<InstrProfError>(
+          instrprof_error::malformed,
+          "memprof raw profile has different segment information");
+    }
+    SegmentInfo.assign(Entries.begin(), Entries.end());
+
+    // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
+    // raw profiles in the same binary file are from the same process so the
+    // stackdepot ids are the same.
+    for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
+      if (CallstackProfileData.count(Value.first)) {
+        CallstackProfileData[Value.first].Merge(Value.second);
+      } else {
+        CallstackProfileData[Value.first] = Value.second;
+      }
+    }
+
+    // Read in the callstack for each ids. For multiple raw profiles in the same
+    // file, we expect that the callstack is the same for a unique id.
+    const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
+    if (StackMap.empty()) {
+      StackMap = CSM;
+    } else {
+      if (mergeStackMap(CSM, StackMap))
+        return make_error<InstrProfError>(
+            instrprof_error::malformed,
+            "memprof raw profile got different call stack for same id");
+    }
+
+    Next += Header->TotalSize;
+  }
+
+  return Error::success();
+}
+
+object::SectionedAddress
+RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
+  LLVM_DEBUG({
+  SegmentEntry *ContainingSegment = nullptr;
+  for (auto &SE : SegmentInfo) {
+    if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
+      ContainingSegment = &SE;
+    }
   }
+
+  // Ensure that the virtual address is valid.
+  assert(ContainingSegment && "Could not find a segment entry");
+  });
+
+  // TODO: Compute the file offset based on the maps and program headers. For
+  // now this only works for non PIE binaries.
+  return object::SectionedAddress{VirtualAddress};
 }
 
+Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
+  if (FunctionProfileData.empty())
+    return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
+
+  if (Iter == FunctionProfileData.end())
+    return make_error<InstrProfError>(instrprof_error::eof);
+
+  auto IdToFrameCallback = [this](const FrameId Id) {
+    Frame F = this->idToFrame(Id);
+    if (!this->KeepSymbolName)
+      return F;
+    auto Iter = this->GuidToSymbolName.find(F.Function);
+    assert(Iter != this->GuidToSymbolName.end());
+    F.SymbolName = Iter->getSecond();
+    return F;
+  };
+
+  const IndexedMemProfRecord &IndexedRecord = Iter->second;
+  GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
+  Iter++;
+  return Error::success();
+}
 } // namespace memprof
 } // namespace llvm
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 9b01a386a360..f794e64a13e7 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -19,9 +19,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
@@ -31,22 +29,21 @@ using namespace llvm;
 using namespace sampleprof;
 
 static cl::opt<uint64_t> ProfileSymbolListCutOff(
-    "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::ZeroOrMore,
+    "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1),
     cl::desc("Cutoff value about how many symbols in profile symbol list "
              "will be used. This is very useful for performance debugging"));
 
 cl::opt<bool> GenerateMergedBaseProfiles(
-    "generate-merged-base-profiles", cl::init(true), cl::ZeroOrMore,
+    "generate-merged-base-profiles",
     cl::desc("When generating nested context-sensitive profiles, always "
              "generate extra base profile for function with all its context "
              "profiles merged into it."));
 
 namespace llvm {
 namespace sampleprof {
-SampleProfileFormat FunctionSamples::Format;
 bool FunctionSamples::ProfileIsProbeBased = false;
-bool FunctionSamples::ProfileIsCSFlat = false;
-bool FunctionSamples::ProfileIsCSNested = false;
+bool FunctionSamples::ProfileIsCS = false;
+bool FunctionSamples::ProfileIsPreInlined = false;
 bool FunctionSamples::UseMD5 = false;
 bool FunctionSamples::HasUniqSuffix = true;
 bool FunctionSamples::ProfileIsFS = false;
@@ -88,8 +85,6 @@ class SampleProfErrorCategoryType : public std::error_category {
       return "Counter overflow";
     case sampleprof_error::ostream_seek_unsupported:
       return "Ostream does not support seek";
-    case sampleprof_error::compress_failed:
-      return "Compress failure";
     case sampleprof_error::uncompress_failed:
       return "Uncompress failure";
     case sampleprof_error::zlib_unavailable:
@@ -523,6 +518,12 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
       auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
       SamplesMap.emplace(OrigChildContext.getName().str(), *ChildProfile);
       NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());
+      // Remove the corresponding body sample for the callsite and update the
+      // total weight.
+      auto Count = NodeProfile->removeCalledTargetAndBodySample(
+          ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,
+          OrigChildContext.getName());
+      NodeProfile->removeTotalSamples(Count);
     }
 
     // Separate child profile to be a standalone profile, if the current parent
@@ -531,13 +532,14 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
     // thus done optionally. It is seen that duplicating context profiles into
     // base profiles improves the code quality for thinlto build by allowing a
     // profile in the prelink phase for to-be-fully-inlined functions.
-    if (!NodeProfile || GenerateMergedBaseProfiles)
+    if (!NodeProfile) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
-
-    // Contexts coming with a `ContextShouldBeInlined` attribute indicate this
-    // is a preinliner-computed profile.
-    if (OrigChildContext.hasAttribute(ContextShouldBeInlined))
-      FunctionSamples::ProfileIsCSNested = true;
+    } else if (GenerateMergedBaseProfiles) {
+      ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+      auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
+      SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
+          ContextDuplicatedIntoBase);
+    }
 
     // Remove the original child profile.
     ProfileMap.erase(OrigChildContext);
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 80c02faaba04..280e3c6cb8d1 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
@@ -39,7 +40,6 @@
 #include <cstdint>
 #include <limits>
 #include <memory>
-#include <set>
 #include <system_error>
 #include <vector>
 
@@ -348,7 +348,7 @@ std::error_code SampleProfileReaderText::readImpl() {
         }
         FProfile.getContext().setAllAttributes(Attributes);
         if (Attributes & (uint32_t)ContextShouldBeInlined)
-          ProfileIsCSNested = true;
+          ProfileIsPreInlined = true;
         DepthMetadata = Depth;
         break;
       }
@@ -358,14 +358,14 @@ std::error_code SampleProfileReaderText::readImpl() {
 
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
-  ProfileIsCSFlat = (CSProfileCount > 0);
+  ProfileIsCS = (CSProfileCount > 0);
   assert((TopLevelProbeProfileCount == 0 ||
           TopLevelProbeProfileCount == Profiles.size()) &&
          "Cannot have both probe-based profiles and regular profiles");
   ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
-  FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat;
-  FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
+  FunctionSamples::ProfileIsCS = ProfileIsCS;
+  FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
 
   if (Result == sampleprof_error::success)
     computeSummary();
@@ -630,7 +630,7 @@ SampleProfileReaderExtBinaryBase::readContextFromTable() {
 
 ErrorOr<SampleContext>
 SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
-  if (ProfileIsCSFlat) {
+  if (ProfileIsCS) {
     auto FContext(readContextFromTable());
     if (std::error_code EC = FContext.getError())
       return EC;
@@ -654,9 +654,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
       Summary->setPartialProfile(true);
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
-      FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat = true;
-    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested))
-      FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
+      FunctionSamples::ProfileIsCS = ProfileIsCS = true;
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
+      FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
     break;
@@ -777,7 +777,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
       }
     }
 
-    if (ProfileIsCSFlat) {
+    if (ProfileIsCS) {
       DenseSet<uint64_t> FuncGuidsToUse;
       if (useMD5()) {
         for (auto Name : FuncsToUse)
@@ -847,7 +847,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
   }
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
-  assert((!CSProfileCount || ProfileIsCSFlat) &&
+  assert((!CSProfileCount || ProfileIsCS) &&
          "Section flag should be consistent with actual profile");
   return sampleprof_error::success;
 }
@@ -1105,7 +1105,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
         FProfile->getContext().setAllAttributes(*Attributes);
     }
 
-    if (!ProfileIsCSFlat) {
+    if (!ProfileIsCS) {
       // Read all the attributes for inlined function calls.
       auto NumCallsites = readNumber<uint32_t>();
       if (std::error_code EC = NumCallsites.getError())
@@ -1275,8 +1275,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
       Flags.append("partial,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
       Flags.append("context,");
-    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested))
-      Flags.append("context-nested,");
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
+      Flags.append("preInlined,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       Flags.append("fs-discriminator,");
     break;
@@ -1828,7 +1828,7 @@ SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
                                            SampleProfileReader &Reader,
                                            LLVMContext &C) {
   auto Remappings = std::make_unique<SymbolRemappingReader>();
-  if (Error E = Remappings->read(*B.get())) {
+  if (Error E = Remappings->read(*B)) {
     handleAllErrors(
         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
@@ -1882,7 +1882,6 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
     Reader->Remapper = std::move(ReaderOrErr.get());
   }
 
-  FunctionSamples::Format = Reader->getFormat();
   if (std::error_code EC = Reader->readHeader()) {
     return EC;
   }
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index b575425d4e94..8ec6b7ebc29e 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -19,7 +19,6 @@
 
 #include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Compression.h"
@@ -87,10 +86,8 @@ std::error_code SampleProfileWriterExtBinaryBase::compressAndOutput() {
     return sampleprof_error::success;
   auto &OS = *OutputStream;
   SmallString<128> CompressedStrings;
-  llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
-                                 zlib::BestSizeCompression);
-  if (E)
-    return sampleprof_error::compress_failed;
+  zlib::compress(UncompressedStrings, CompressedStrings,
+                 zlib::BestSizeCompression);
   encodeULEB128(UncompressedStrings.size(), OS);
   encodeULEB128(CompressedStrings.size(), OS);
   OS << CompressedStrings.str();
@@ -172,7 +169,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
     return (std::error_code)sampleprof_error::success;
   };
 
-  if (FunctionSamples::ProfileIsCSFlat) {
+  if (FunctionSamples::ProfileIsCS) {
     // Sort the contexts before writing them out. This is to help fast load all
     // context profiles for a function as well as their callee contexts which
     // can help profile-guided importing for ThinLTO.
@@ -202,11 +199,11 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
 
   if (FunctionSamples::ProfileIsProbeBased)
     encodeULEB128(FunctionProfile.getFunctionHash(), OS);
-  if (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested) {
+  if (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined) {
     encodeULEB128(FunctionProfile.getContext().getAllAttributes(), OS);
   }
 
-  if (!FunctionSamples::ProfileIsCSFlat) {
+  if (!FunctionSamples::ProfileIsCS) {
     // Recursively emit attributes for all callee samples.
     uint64_t NumCallsites = 0;
     for (const auto &J : FunctionProfile.getCallsiteSamples())
@@ -228,8 +225,8 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
 
 std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
     const SampleProfileMap &Profiles) {
-  if (!FunctionSamples::ProfileIsProbeBased &&
-      !FunctionSamples::ProfileIsCSFlat && !FunctionSamples::ProfileIsCSNested)
+  if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS &&
+      !FunctionSamples::ProfileIsPreInlined)
     return sampleprof_error::success;
   for (const auto &Entry : Profiles) {
     if (std::error_code EC = writeFuncMetadata(Entry.second))
@@ -324,12 +321,12 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
   if (Type == SecFuncMetadata && FunctionSamples::ProfileIsProbeBased)
     addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagIsProbeBased);
   if (Type == SecFuncMetadata &&
-      (FunctionSamples::ProfileIsCSFlat || FunctionSamples::ProfileIsCSNested))
+      (FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsPreInlined))
     addSectionFlag(SecFuncMetadata, SecFuncMetadataFlags::SecFlagHasAttribute);
-  if (Type == SecProfSummary && FunctionSamples::ProfileIsCSFlat)
+  if (Type == SecProfSummary && FunctionSamples::ProfileIsCS)
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFullContext);
-  if (Type == SecProfSummary && FunctionSamples::ProfileIsCSNested)
-    addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsCSNested);
+  if (Type == SecProfSummary && FunctionSamples::ProfileIsPreInlined)
+    addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
   if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
 
@@ -471,7 +468,7 @@ SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
 /// it needs to be parsed by the SampleProfileReaderText class.
 std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
   auto &OS = *OutputStream;
-  if (FunctionSamples::ProfileIsCSFlat)
+  if (FunctionSamples::ProfileIsCS)
     OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
   else
     OS << S.getName() << ":" << S.getTotalSamples();
@@ -871,8 +868,7 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
   std::unique_ptr<SampleProfileWriter> Writer;
 
   // Currently only Text and Extended Binary format are supported for CSSPGO.
-  if ((FunctionSamples::ProfileIsCSFlat ||
-       FunctionSamples::ProfileIsProbeBased) &&
+  if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) &&
       (Format == SPF_Binary || Format == SPF_Compact_Binary))
     return sampleprof_error::unsupported_writing_format;