1 files changed, 1634 insertions, 0 deletions
diff --git a/contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
new file mode 100644
index 000000000000..879f07fb4760
--- /dev/null
+++ b/contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
@@ -0,0 +1,1634 @@
+//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+///
+/// \file Converts from in-memory normalized mach-o to in-memory Atoms.
+///
+///                  +------------+
+///                  | normalized |
+///                  +------------+
+///                        |
+///                        |
+///                        v
+///                    +-------+
+///                    | Atoms |
+///                    +-------+
+
+#include "ArchHandler.h"
+#include "Atoms.h"
+#include "File.h"
+#include "MachONormalizedFile.h"
+#include "MachONormalizedFileBinaryUtils.h"
+#include "lld/Common/LLVM.h"
+#include "lld/Core/Error.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm::MachO;
+using namespace lld::mach_o::normalized;
+
+#define DEBUG_TYPE "normalized-file-to-atoms"
+
+namespace lld {
+namespace mach_o {
+
+
+namespace { // anonymous
+
+
+#define ENTRY(seg, sect, type, atomType) \
+  {seg, sect, type, DefinedAtom::atomType }
+
+struct MachORelocatableSectionToAtomType {
+  StringRef                 segmentName;
+  StringRef                 sectionName;
+  SectionType               sectionType;
+  DefinedAtom::ContentType  atomType;
+};
+
+const MachORelocatableSectionToAtomType sectsToAtomType[] = {
+  ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
+  ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
+  ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
+  ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
+  ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
+  ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
+  ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
+  ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
+  ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
+  ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
+  ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
+  ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
+  ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
+  ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
+  ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
+  ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
+  ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
+  ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
+                                                          typeInitializerPtr),
+  ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
+                                                          typeTerminatorPtr),
+  ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
+                                                          typeGOT),
+  ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
+  ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
+                                                          typeGOT),
+  ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
+  ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
+                                                          typeThunkTLV),
+  ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
+  ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
+                                                        typeTLVInitialZeroFill),
+  ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
+  ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
+  ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
+  ENTRY("__LD",   "__compact_unwind", S_REGULAR,
+                                                         typeCompactUnwindInfo),
+  ENTRY("",       "",                 S_REGULAR,          typeUnknown)
+};
+#undef ENTRY
+
+
+/// Figures out ContentType of a mach-o section.
+DefinedAtom::ContentType atomTypeFromSection(const Section &section,
+                                             bool &customSectionName) {
+  // First look for match of name and type. Empty names in table are wildcards.
+  customSectionName = false;
+  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
+                                 p->atomType != DefinedAtom::typeUnknown; ++p) {
+    if (p->sectionType != section.type)
+      continue;
+    if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
+      continue;
+    if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
+      continue;
+    customSectionName = p->segmentName.empty() && p->sectionName.empty();
+    return p->atomType;
+  }
+  // Look for code denoted by section attributes
+  if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
+    return DefinedAtom::typeCode;
+
+  return DefinedAtom::typeUnknown;
+}
+
+enum AtomizeModel {
+  atomizeAtSymbols,
+  atomizeFixedSize,
+  atomizePointerSize,
+  atomizeUTF8,
+  atomizeUTF16,
+  atomizeCFI,
+  atomizeCU,
+  atomizeCFString
+};
+
+/// Returns info on how to atomize a section of the specified ContentType.
+void sectionParseInfo(DefinedAtom::ContentType atomType,
+                      unsigned int &sizeMultiple,
+                      DefinedAtom::Scope &scope,
+                      DefinedAtom::Merge &merge,
+                      AtomizeModel &atomizeModel) {
+  struct ParseInfo {
+    DefinedAtom::ContentType  atomType;
+    unsigned int              sizeMultiple;
+    DefinedAtom::Scope        scope;
+    DefinedAtom::Merge        merge;
+    AtomizeModel              atomizeModel;
+  };
+
+  #define ENTRY(type, size, scope, merge, model) \
+    {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
+
+  static const ParseInfo parseInfo[] = {
+    ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
+                                                            atomizeAtSymbols),
+    ENTRY(typeData,              1, scopeGlobal,          mergeNo,
+                                                            atomizeAtSymbols),
+    ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
+                                                            atomizeAtSymbols),
+    ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
+                                                            atomizeAtSymbols),
+    ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
+                                                            atomizeAtSymbols),
+    ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
+                                                            atomizeUTF8),
+    ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
+                                                            atomizeUTF16),
+    ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
+                                                            atomizeCFI),
+    ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
+                                                            atomizeFixedSize),
+    ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
+                                                            atomizeFixedSize),
+    ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
+                                                            atomizeFixedSize),
+    ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
+                                                            atomizeCFString),
+    ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
+                                                            atomizePointerSize),
+    ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
+                                                            atomizePointerSize),
+    ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
+                                                            atomizeCU),
+    ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
+                                                            atomizePointerSize),
+    ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
+                                                            atomizePointerSize),
+    ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
+                                                            atomizeAtSymbols)
+  };
+  #undef ENTRY
+  const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
+  for (int i=0; i < tableLen; ++i) {
+    if (parseInfo[i].atomType == atomType) {
+      sizeMultiple = parseInfo[i].sizeMultiple;
+      scope        = parseInfo[i].scope;
+      merge        = parseInfo[i].merge;
+      atomizeModel = parseInfo[i].atomizeModel;
+      return;
+    }
+  }
+
+  // Unknown type is atomized by symbols.
+  sizeMultiple = 1;
+  scope = DefinedAtom::scopeGlobal;
+  merge = DefinedAtom::mergeNo;
+  atomizeModel = atomizeAtSymbols;
+}
+
+
+Atom::Scope atomScope(uint8_t scope) {
+  switch (scope) {
+  case N_EXT:
+    return Atom::scopeGlobal;
+  case N_PEXT:
+  case N_PEXT | N_EXT:
+    return Atom::scopeLinkageUnit;
+  case 0:
+    return Atom::scopeTranslationUnit;
+  }
+  llvm_unreachable("unknown scope value!");
+}
+
+void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
+                            uint32_t sectionIndex,
+                            SmallVector<const Symbol *, 64> &outSyms) {
+  for (const Symbol &sym : inSymbols) {
+    // Only look at definition symbols.
+    if ((sym.type & N_TYPE) != N_SECT)
+      continue;
+    if (sym.sect != sectionIndex)
+      continue;
+    outSyms.push_back(&sym);
+  }
+}
+
+void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
+                    MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
+                    uint16_t symbolDescFlags, Atom::Scope symbolScope,
+                    uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
+  // Mach-O symbol table does have size in it. Instead the size is the
+  // difference between this and the next symbol.
+  uint64_t size = nextSymbolAddr - symbolAddr;
+  uint64_t offset = symbolAddr - section.address;
+  bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
+  if (isZeroFillSection(section.type)) {
+    file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
+                                noDeadStrip, copyRefs, &section);
+  } else {
+    DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
+                              ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
+    bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
+    if (atomType == DefinedAtom::typeUnknown) {
+      // Mach-O needs a segment and section name.  Concatentate those two
+      // with a / separator (e.g. "seg/sect") to fit into the lld model
+      // of just a section name.
+      std::string segSectName = section.segmentName.str()
+                                + "/" + section.sectionName.str();
+      file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
+                                         merge, thumb, noDeadStrip, offset,
+                                         size, segSectName, true, &section);
+    } else {
+      if ((atomType == lld::DefinedAtom::typeCode) &&
+          (symbolDescFlags & N_SYMBOL_RESOLVER)) {
+        atomType = lld::DefinedAtom::typeResolver;
+      }
+      file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
+                          offset, size, thumb, noDeadStrip, copyRefs, &section);
+    }
+  }
+}
+
+llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
+                                   const Section &section,
+                                   const NormalizedFile &normalizedFile,
+                                   MachOFile &file, bool scatterable,
+                                   bool copyRefs) {
+  // Find section's index.
+  uint32_t sectIndex = 1;
+  for (auto &sect : normalizedFile.sections) {
+    if (&sect == &section)
+      break;
+    ++sectIndex;
+  }
+
+  // Find all symbols in this section.
+  SmallVector<const Symbol *, 64> symbols;
+  appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
+  appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);
+
+  // Sort symbols.
+  std::sort(symbols.begin(), symbols.end(),
+            [](const Symbol *lhs, const Symbol *rhs) -> bool {
+              if (lhs == rhs)
+                return false;
+              // First by address.
+              uint64_t lhsAddr = lhs->value;
+              uint64_t rhsAddr = rhs->value;
+              if (lhsAddr != rhsAddr)
+                return lhsAddr < rhsAddr;
+               // If same address, one is an alias so sort by scope.
+              Atom::Scope lScope = atomScope(lhs->scope);
+              Atom::Scope rScope = atomScope(rhs->scope);
+              if (lScope != rScope)
+                return lScope < rScope;
+              // If same address and scope, see if one might be better as
+              // the alias.
+              bool lPrivate = (lhs->name.front() == 'l');
+              bool rPrivate = (rhs->name.front() == 'l');
+              if (lPrivate != rPrivate)
+                return lPrivate;
+              // If same address and scope, sort by name.
+              return lhs->name < rhs->name;
+            });
+
+  // Debug logging of symbols.
+  //for (const Symbol *sym : symbols)
+  //  llvm::errs() << "  sym: "
+  //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
+  //    << ", " << sym->name << "\n";
+
+  // If section has no symbols and no content, there are no atoms.
+  if (symbols.empty() && section.content.empty())
+    return llvm::Error::success();
+
+  if (symbols.empty()) {
+    // Section has no symbols, put all content in one anoymous atom.
+    atomFromSymbol(atomType, section, file, section.address, StringRef(),
+                  0, Atom::scopeTranslationUnit,
+                  section.address + section.content.size(),
+                  scatterable, copyRefs);
+  }
+  else if (symbols.front()->value != section.address) {
+    // Section has anonymous content before first symbol.
+    atomFromSymbol(atomType, section, file, section.address, StringRef(),
+                   0, Atom::scopeTranslationUnit, symbols.front()->value,
+                   scatterable, copyRefs);
+  }
+
+  const Symbol *lastSym = nullptr;
+  for (const Symbol *sym : symbols) {
+    if (lastSym != nullptr) {
+      // Ignore any assembler added "ltmpNNN" symbol at start of section
+      // if there is another symbol at the start.
+      if ((lastSym->value != sym->value)
+          || lastSym->value != section.address
+          || !lastSym->name.startswith("ltmp")) {
+        atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
+                       lastSym->desc, atomScope(lastSym->scope), sym->value,
+                       scatterable, copyRefs);
+      }
+    }
+    lastSym = sym;
+  }
+  if (lastSym != nullptr) {
+    atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
+                   lastSym->desc, atomScope(lastSym->scope),
+                   section.address + section.content.size(),
+                   scatterable, copyRefs);
+  }
+
+  // If object built without .subsections_via_symbols, add reference chain.
+  if (!scatterable) {
+    MachODefinedAtom *prevAtom = nullptr;
+    file.eachAtomInSection(section,
+                           [&](MachODefinedAtom *atom, uint64_t offset)->void {
+      if (prevAtom)
+        prevAtom->addReference(Reference::KindNamespace::all,
+                               Reference::KindArch::all,
+                               Reference::kindLayoutAfter, 0, atom, 0);
+      prevAtom = atom;
+    });
+  }
+
+  return llvm::Error::success();
+}
+
+llvm::Error processSection(DefinedAtom::ContentType atomType,
+                           const Section &section,
+                           bool customSectionName,
+                           const NormalizedFile &normalizedFile,
+                           MachOFile &file, bool scatterable,
+                           bool copyRefs) {
+  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+
+  // Get info on how to atomize section.
+  unsigned int       sizeMultiple;
+  DefinedAtom::Scope scope;
+  DefinedAtom::Merge merge;
+  AtomizeModel       atomizeModel;
+  sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
+
+  // Validate section size.
+  if ((section.content.size() % sizeMultiple) != 0)
+    return llvm::make_error<GenericError>(Twine("Section ")
+                                          + section.segmentName
+                                          + "/" + section.sectionName
+                                          + " has size ("
+                                          + Twine(section.content.size())
+                                          + ") which is not a multiple of "
+                                          + Twine(sizeMultiple));
+
+  if (atomizeModel == atomizeAtSymbols) {
+    // Break section up into atoms each with a fixed size.
+    return processSymboledSection(atomType, section, normalizedFile, file,
+                                  scatterable, copyRefs);
+  } else {
+    unsigned int size;
+    for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
+      switch (atomizeModel) {
+      case atomizeFixedSize:
+        // Break section up into atoms each with a fixed size.
+        size = sizeMultiple;
+        break;
+      case atomizePointerSize:
+        // Break section up into atoms each the size of a pointer.
+        size = is64 ? 8 : 4;
+        break;
+      case atomizeUTF8:
+        // Break section up into zero terminated c-strings.
+        size = 0;
+        for (unsigned int i = offset; i < e; ++i) {
+          if (section.content[i] == 0) {
+            size = i + 1 - offset;
+            break;
+          }
+        }
+        break;
+      case atomizeUTF16:
+        // Break section up into zero terminated UTF16 strings.
+        size = 0;
+        for (unsigned int i = offset; i < e; i += 2) {
+          if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
+            size = i + 2 - offset;
+            break;
+          }
+        }
+        break;
+      case atomizeCFI:
+        // Break section up into dwarf unwind CFIs (FDE or CIE).
+        size = read32(&section.content[offset], isBig) + 4;
+        if (offset+size > section.content.size()) {
+          return llvm::make_error<GenericError>(Twine("Section ")
+                                                + section.segmentName
+                                                + "/" + section.sectionName
+                                                + " is malformed.  Size of CFI "
+                                                "starting at offset ("
+                                                + Twine(offset)
+                                                + ") is past end of section.");
+        }
+        break;
+      case atomizeCU:
+        // Break section up into compact unwind entries.
+        size = is64 ? 32 : 20;
+        break;
+      case atomizeCFString:
+        // Break section up into NS/CFString objects.
+        size = is64 ? 32 : 16;
+        break;
+      case atomizeAtSymbols:
+        break;
+      }
+      if (size == 0) {
+        return llvm::make_error<GenericError>(Twine("Section ")
+                                              + section.segmentName
+                                              + "/" + section.sectionName
+                                              + " is malformed.  The last atom "
+                                              "is not zero terminated.");
+      }
+      if (customSectionName) {
+        // Mach-O needs a segment and section name.  Concatentate those two
+        // with a / separator (e.g. "seg/sect") to fit into the lld model
+        // of just a section name.
+        std::string segSectName = section.segmentName.str()
+                                  + "/" + section.sectionName.str();
+        file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
+                                           merge, false, false, offset,
+                                           size, segSectName, true, &section);
+      } else {
+        file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
+                            false, false, copyRefs, &section);
+      }
+      offset += size;
+    }
+  }
+  return llvm::Error::success();
+}
+
+const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
+                                          uint64_t address) {
+  for (const Section &s : normalizedFile.sections) {
+    uint64_t sAddr = s.address;
+    if ((sAddr <= address) && (address < sAddr+s.content.size())) {
+      return &s;
+    }
+  }
+  return nullptr;
+}
+
+const MachODefinedAtom *
+findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
+                        uint64_t addr, Reference::Addend &addend) {
+  const Section *sect = nullptr;
+  sect = findSectionCoveringAddress(normalizedFile, addr);
+  if (!sect)
+    return nullptr;
+
+  uint32_t offsetInTarget;
+  uint64_t offsetInSect = addr - sect->address;
+  auto atom =
+      file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
+  addend = offsetInTarget;
+  return atom;
+}
+
+// Walks all relocations for a section in a normalized .o file and
+// creates corresponding lld::Reference objects.
+llvm::Error convertRelocs(const Section &section,
+                          const NormalizedFile &normalizedFile,
+                          bool scatterable,
+                          MachOFile &file,
+                          ArchHandler &handler) {
+  // Utility function for ArchHandler to find atom by its address.
+  auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
+                         const lld::Atom **atom, Reference::Addend *addend)
+                         -> llvm::Error {
+    if (sectIndex > normalizedFile.sections.size())
+      return llvm::make_error<GenericError>(Twine("out of range section "
+                                     "index (") + Twine(sectIndex) + ")");
+    const Section *sect = nullptr;
+    if (sectIndex == 0) {
+      sect = findSectionCoveringAddress(normalizedFile, addr);
+      if (!sect)
+        return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
+                                       + ") is not in any section"));
+    } else {
+      sect = &normalizedFile.sections[sectIndex-1];
+    }
+    uint32_t offsetInTarget;
+    uint64_t offsetInSect = addr - sect->address;
+    *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
+    *addend = offsetInTarget;
+    return llvm::Error::success();
+  };
+
+  // Utility function for ArchHandler to find atom by its symbol index.
+  auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
+                           -> llvm::Error {
+    // Find symbol from index.
+    const Symbol *sym = nullptr;
+    uint32_t numStabs  = normalizedFile.stabsSymbols.size();
+    uint32_t numLocal  = normalizedFile.localSymbols.size();
+    uint32_t numGlobal = normalizedFile.globalSymbols.size();
+    uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
+    assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
+    if (symbolIndex < numStabs+numLocal) {
+      sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
+    } else if (symbolIndex < numStabs+numLocal+numGlobal) {
+      sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
+    } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
+      sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
+                                             numGlobal];
+    } else {
+      return llvm::make_error<GenericError>(Twine("symbol index (")
+                                     + Twine(symbolIndex) + ") out of range");
+    }
+
+    // Find atom from symbol.
+    if ((sym->type & N_TYPE) == N_SECT) {
+      if (sym->sect > normalizedFile.sections.size())
+        return llvm::make_error<GenericError>(Twine("symbol section index (")
+                                        + Twine(sym->sect) + ") out of range ");
+      const Section &symSection = normalizedFile.sections[sym->sect-1];
+      uint64_t targetOffsetInSect = sym->value - symSection.address;
+      MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
+                                                            targetOffsetInSect);
+      if (target) {
+        *result = target;
+        return llvm::Error::success();
+      }
+      return llvm::make_error<GenericError>("no atom found for defined symbol");
+    } else if ((sym->type & N_TYPE) == N_UNDF) {
+      const lld::Atom *target = file.findUndefAtom(sym->name);
+      if (target) {
+        *result = target;
+        return llvm::Error::success();
+      }
+      return llvm::make_error<GenericError>("no undefined atom found for sym");
+    } else {
+      // Search undefs
+      return llvm::make_error<GenericError>("no atom found for symbol");
+    }
+  };
+
+  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+  // Use old-school iterator so that paired relocations can be grouped.
+  for (auto it=section.relocations.begin(), e=section.relocations.end();
+                                                                it != e; ++it) {
+    const Relocation &reloc = *it;
+    // Find atom this relocation is in.
+    if (reloc.offset > section.content.size())
+      return llvm::make_error<GenericError>(
+                                    Twine("r_address (") + Twine(reloc.offset)
+                                    + ") is larger than section size ("
+                                    + Twine(section.content.size()) + ")");
+    uint32_t offsetInAtom;
+    MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
+                                                            reloc.offset,
+                                                            &offsetInAtom);
+    assert(inAtom && "r_address in range, should have found atom");
+    uint64_t fixupAddress = section.address + reloc.offset;
+
+    const lld::Atom *target = nullptr;
+    Reference::Addend addend = 0;
+    Reference::KindValue kind;
+    if (handler.isPairedReloc(reloc)) {
+      // Handle paired relocations together.
+      const Relocation &reloc2 = *++it;
+      auto relocErr = handler.getPairReferenceInfo(
+          reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
+          atomByAddr, atomBySymbol, &kind, &target, &addend);
+      if (relocErr) {
+        return handleErrors(std::move(relocErr),
+                            [&](std::unique_ptr<GenericError> GE) {
+          return llvm::make_error<GenericError>(
+            Twine("bad relocation (") + GE->getMessage()
+             + ") in section "
+             + section.segmentName + "/" + section.sectionName
+             + " (r1_address=" + Twine::utohexstr(reloc.offset)
+             + ", r1_type=" + Twine(reloc.type)
+             + ", r1_extern=" + Twine(reloc.isExtern)
+             + ", r1_length=" + Twine((int)reloc.length)
+             + ", r1_pcrel=" + Twine(reloc.pcRel)
+             + (!reloc.scattered ? (Twine(", r1_symbolnum=")
+                                    + Twine(reloc.symbol))
+                                 : (Twine(", r1_scattered=1, r1_value=")
+                                    + Twine(reloc.value)))
+             + ")"
+             + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
+             + ", r2_type=" + Twine(reloc2.type)
+             + ", r2_extern=" + Twine(reloc2.isExtern)
+             + ", r2_length=" + Twine((int)reloc2.length)
+             + ", r2_pcrel=" + Twine(reloc2.pcRel)
+             + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
+                                     + Twine(reloc2.symbol))
+                                  : (Twine(", r2_scattered=1, r2_value=")
+                                     + Twine(reloc2.value)))
+             + ")" );
+          });
+      }
+    }
+    else {
+      // Use ArchHandler to convert relocation record into information
+      // needed to instantiate an lld::Reference object.
+      auto relocErr = handler.getReferenceInfo(
+          reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
+          atomBySymbol, &kind, &target, &addend);
+      if (relocErr) {
+        return handleErrors(std::move(relocErr),
+                            [&](std::unique_ptr<GenericError> GE) {
+          return llvm::make_error<GenericError>(
+            Twine("bad relocation (") + GE->getMessage()
+             + ") in section "
+             + section.segmentName + "/" + section.sectionName
+             + " (r_address=" + Twine::utohexstr(reloc.offset)
+             + ", r_type=" + Twine(reloc.type)
+             + ", r_extern=" + Twine(reloc.isExtern)
+             + ", r_length=" + Twine((int)reloc.length)
+             + ", r_pcrel=" + Twine(reloc.pcRel)
+             + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
+                                 : (Twine(", r_scattered=1, r_value=")
+                                    + Twine(reloc.value)))
+             + ")" );
+          });
+      }
+    }
+    // Instantiate an lld::Reference object and add to its atom.
+    inAtom->addReference(Reference::KindNamespace::mach_o,
+                         handler.kindArch(),
+                         kind, offsetInAtom, target, addend);
+  }
+
+  return llvm::Error::success();
+}
+
+bool isDebugInfoSection(const Section &section) {
+  if ((section.attributes & S_ATTR_DEBUG) == 0)
+    return false;
+  return section.segmentName.equals("__DWARF");
+}
+
+static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
+  std::string strName = name.str();
+  for (auto *atom : file.defined())
+    if (atom->name() == strName)
+      return atom;
+  return nullptr;
+}
+
+static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
+  char *strCopy = alloc.Allocate<char>(str.size() + 1);
+  memcpy(strCopy, str.data(), str.size());
+  strCopy[str.size()] = '\0';
+  return strCopy;
+}
+
+llvm::Error parseStabs(MachOFile &file,
+                       const NormalizedFile &normalizedFile,
+                       bool copyRefs) {
+
+  if (normalizedFile.stabsSymbols.empty())
+    return llvm::Error::success();
+
+  // FIXME: Kill this off when we can move to sane yaml parsing.
+  std::unique_ptr<BumpPtrAllocator> allocator;
+  if (copyRefs)
+    allocator = llvm::make_unique<BumpPtrAllocator>();
+
+  enum { start, inBeginEnd } state = start;
+
+  const Atom *currentAtom = nullptr;
+  uint64_t currentAtomAddress = 0;
+  StabsDebugInfo::StabsList stabsList;
+  for (const auto &stabSym : normalizedFile.stabsSymbols) {
+    Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
+              stabSym.value, stabSym.name);
+    switch (state) {
+    case start:
+      switch (static_cast<StabType>(stabSym.type)) {
+      case N_BNSYM:
+        state = inBeginEnd;
+        currentAtomAddress = stabSym.value;
+        Reference::Addend addend;
+        currentAtom = findAtomCoveringAddress(normalizedFile, file,
+                                              currentAtomAddress, addend);
+        if (addend != 0)
+          return llvm::make_error<GenericError>(
+                   "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
+                   file.path());
+        if (currentAtom)
+          stab.atom = currentAtom;
+        else {
+          // FIXME: ld64 just issues a warning here - should we match that?
+          return llvm::make_error<GenericError>(
+                   "can't find atom for stabs BNSYM at " +
+                   Twine::utohexstr(stabSym.value) + " in " + file.path());
+        }
+        break;
+      case N_SO:
+      case N_OSO:
+        // Not associated with an atom, just copy.
+        if (copyRefs)
+          stab.str = copyDebugString(stabSym.name, *allocator);
+        else
+          stab.str = stabSym.name;
+        break;
+      case N_GSYM: {
+        auto colonIdx = stabSym.name.find(':');
+        if (colonIdx != StringRef::npos) {
+          StringRef name = stabSym.name.substr(0, colonIdx);
+          currentAtom = findDefinedAtomByName(file, "_" + name);
+          stab.atom = currentAtom;
+          if (copyRefs)
+            stab.str = copyDebugString(stabSym.name, *allocator);
+          else
+            stab.str = stabSym.name;
+        } else {
+          currentAtom = findDefinedAtomByName(file, stabSym.name);
+          stab.atom = currentAtom;
+          if (copyRefs)
+            stab.str = copyDebugString(stabSym.name, *allocator);
+          else
+            stab.str = stabSym.name;
+        }
+        if (stab.atom == nullptr)
+          return llvm::make_error<GenericError>(
+                   "can't find atom for N_GSYM stabs" + stabSym.name +
+                   " in " + file.path());
+        break;
+      }
+      case N_FUN:
+        return llvm::make_error<GenericError>(
+                 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
+      default:
+        return llvm::make_error<GenericError>(
+                 "unrecognized stab symbol '" + stabSym.name + "'");
+      }
+      break;
+    case inBeginEnd:
+      stab.atom = currentAtom;
+      switch (static_cast<StabType>(stabSym.type)) {
+      case N_ENSYM:
+        state = start;
+        currentAtom = nullptr;
+        break;
+      case N_FUN:
+        // Just copy the string.
+        if (copyRefs)
+          stab.str = copyDebugString(stabSym.name, *allocator);
+        else
+          stab.str = stabSym.name;
+        break;
+      default:
+        return llvm::make_error<GenericError>(
+                 "unrecognized stab symbol '" + stabSym.name + "'");
+      }
+    }
+    llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
+    stabsList.push_back(stab);
+  }
+
+  file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
+
+  // FIXME: Kill this off when we fix YAML memory ownership.
+  file.debugInfo()->setAllocator(std::move(allocator));
+
+  return llvm::Error::success();
+}
+
+static llvm::DataExtractor
+dataExtractorFromSection(const NormalizedFile &normalizedFile,
+                         const Section &S) {
+  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+  StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
+                    S.content.size());
+  return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
+}
+
+// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+//        inspection" code if possible.
+static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
+                                  uint64_t abbrCode) {
+  uint64_t curCode;
+  uint32_t offset = 0;
+  while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
+    // Tag
+    abbrevData.getULEB128(&offset);
+    // DW_CHILDREN
+    abbrevData.getU8(&offset);
+    // Attributes
+    while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
+      ;
+  }
+  return offset;
+}
+
+// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+//        inspection" code if possible.
+static Expected<const char *>
+getIndexedString(const NormalizedFile &normalizedFile,
+                 llvm::dwarf::Form form, llvm::DataExtractor infoData,
+                 uint32_t &infoOffset, const Section &stringsSection) {
+  if (form == llvm::dwarf::DW_FORM_string)
+   return infoData.getCStr(&infoOffset);
+  if (form != llvm::dwarf::DW_FORM_strp)
+    return llvm::make_error<GenericError>(
+        "string field encoded without DW_FORM_strp");
+  uint32_t stringOffset = infoData.getU32(&infoOffset);
+  llvm::DataExtractor stringsData =
+    dataExtractorFromSection(normalizedFile, stringsSection);
+  return stringsData.getCStr(&stringOffset);
+}
+
+// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+//        inspection" code if possible.
+static llvm::Expected<TranslationUnitSource>
+readCompUnit(const NormalizedFile &normalizedFile,
+             const Section &info,
+             const Section &abbrev,
+             const Section &strings,
+             StringRef path) {
+  // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
+  //        inspection" code if possible.
+  uint32_t offset = 0;
+  llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
+  auto infoData = dataExtractorFromSection(normalizedFile, info);
+  uint32_t length = infoData.getU32(&offset);
+  if (length == 0xffffffff) {
+    Format = llvm::dwarf::DwarfFormat::DWARF64;
+    infoData.getU64(&offset);
+  }
+  else if (length > 0xffffff00)
+    return llvm::make_error<GenericError>("Malformed DWARF in " + path);
+
+  uint16_t version = infoData.getU16(&offset);
+
+  if (version < 2 || version > 4)
+    return llvm::make_error<GenericError>("Unsupported DWARF version in " +
+                                          path);
+
+  infoData.getU32(&offset); // Abbrev offset (should be zero)
+  uint8_t addrSize = infoData.getU8(&offset);
+
+  uint32_t abbrCode = infoData.getULEB128(&offset);
+  auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
+  uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
+  uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
+  if (tag != llvm::dwarf::DW_TAG_compile_unit)
+    return llvm::make_error<GenericError>("top level DIE is not a compile unit");
+  // DW_CHILDREN
+  abbrevData.getU8(&abbrevOffset);
+  uint32_t name;
+  llvm::dwarf::Form form;
+  llvm::dwarf::FormParams formParams = {version, addrSize, Format};
+  TranslationUnitSource tu;
+  while ((name = abbrevData.getULEB128(&abbrevOffset)) |
+         (form = static_cast<llvm::dwarf::Form>(
+             abbrevData.getULEB128(&abbrevOffset))) &&
+         (name != 0 || form != 0)) {
+    switch (name) {
+    case llvm::dwarf::DW_AT_name: {
+      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
+                                        strings))
+          tu.name = *eName;
+      else
+        return eName.takeError();
+      break;
+    }
+    case llvm::dwarf::DW_AT_comp_dir: {
+      if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
+                                        strings))
+        tu.path = *eName;
+      else
+        return eName.takeError();
+      break;
+    }
+    default:
+      llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
+    }
+  }
+  return tu;
+}
+
+llvm::Error parseDebugInfo(MachOFile &file,
+                           const NormalizedFile &normalizedFile, bool copyRefs) {
+
+  // Find the interesting debug info sections.
+  const Section *debugInfo = nullptr;
+  const Section *debugAbbrev = nullptr;
+  const Section *debugStrings = nullptr;
+
+  for (auto &s : normalizedFile.sections) {
+    if (s.segmentName == "__DWARF") {
+      if (s.sectionName == "__debug_info")
+        debugInfo = &s;
+      else if (s.sectionName == "__debug_abbrev")
+        debugAbbrev = &s;
+      else if (s.sectionName == "__debug_str")
+        debugStrings = &s;
+    }
+  }
+
+  if (!debugInfo)
+    return parseStabs(file, normalizedFile, copyRefs);
+
+  if (debugInfo->content.size() == 0)
+    return llvm::Error::success();
+
+  if (debugInfo->content.size() < 12)
+    return llvm::make_error<GenericError>("Malformed __debug_info section in " +
+                                          file.path() + ": too small");
+
+  if (!debugAbbrev)
+    return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
+                                          file.path());
+
+  if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
+                                  *debugStrings, file.path())) {
+    // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
+    //        memory ownership.
+    std::unique_ptr<BumpPtrAllocator> allocator;
+    if (copyRefs) {
+      allocator = llvm::make_unique<BumpPtrAllocator>();
+      tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
+      tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
+    }
+    file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
+    if (copyRefs)
+      file.debugInfo()->setAllocator(std::move(allocator));
+  } else
+    return tuOrErr.takeError();
+
+  return llvm::Error::success();
+}
+
+static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
+  if (is64)
+    return read64(addr, isBig);
+
+  int32_t res = read32(addr, isBig);
+  return res;
+}
+
+/// --- Augmentation String Processing ---
+
+struct CIEInfo {
+  bool _augmentationDataPresent = false;
+  bool _mayHaveEH = false;
+  uint32_t _offsetOfLSDA = ~0U;
+  uint32_t _offsetOfPersonality = ~0U;
+  uint32_t _offsetOfFDEPointerEncoding = ~0U;
+  uint32_t _augmentationDataLength = ~0U;
+};
+
+typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
+
+static llvm::Error processAugmentationString(const uint8_t *augStr,
+                                             CIEInfo &cieInfo,
+                                             unsigned &len) {
+
+  if (augStr[0] == '\0') {
+    len = 1;
+    return llvm::Error::success();
+  }
+
+  if (augStr[0] != 'z')
+    return llvm::make_error<GenericError>("expected 'z' at start of "
+                                          "augmentation string");
+
+  cieInfo._augmentationDataPresent = true;
+  uint64_t idx = 1;
+
+  uint32_t offsetInAugmentationData = 0;
+  while (augStr[idx] != '\0') {
+    if (augStr[idx] == 'L') {
+      cieInfo._offsetOfLSDA = offsetInAugmentationData;
+      // This adds a single byte to the augmentation data.
+      ++offsetInAugmentationData;
+      ++idx;
+      continue;
+    }
+    if (augStr[idx] == 'P') {
+      cieInfo._offsetOfPersonality = offsetInAugmentationData;
+      // This adds a single byte to the augmentation data for the encoding,
+      // then a number of bytes for the pointer data.
+      // FIXME: We are assuming 4 is correct here for the pointer size as we
+      // always currently use delta32ToGOT.
+      offsetInAugmentationData += 5;
+      ++idx;
+      continue;
+    }
+    if (augStr[idx] == 'R') {
+      cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
+      // This adds a single byte to the augmentation data.
+      ++offsetInAugmentationData;
+      ++idx;
+      continue;
+    }
+    if (augStr[idx] == 'e') {
+      if (augStr[idx + 1] != 'h')
+        return llvm::make_error<GenericError>("expected 'eh' in "
+                                              "augmentation string");
+      cieInfo._mayHaveEH = true;
+      idx += 2;
+      continue;
+    }
+    ++idx;
+  }
+
+  cieInfo._augmentationDataLength = offsetInAugmentationData;
+
+  len = idx + 1;
+  return llvm::Error::success();
+}
+
+static llvm::Error processCIE(const NormalizedFile &normalizedFile,
+                              MachOFile &file,
+                              mach_o::ArchHandler &handler,
+                              const Section *ehFrameSection,
+                              MachODefinedAtom *atom,
+                              uint64_t offset,
+                              CIEInfoMap &cieInfos) {
+  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+  const uint8_t *frameData = atom->rawContent().data();
+
+  CIEInfo cieInfo;
+
+  uint32_t size = read32(frameData, isBig);
+  uint64_t cieIDField = size == 0xffffffffU
+                          ? sizeof(uint32_t) + sizeof(uint64_t)
+                          : sizeof(uint32_t);
+  uint64_t versionField = cieIDField + sizeof(uint32_t);
+  uint64_t augmentationStringField = versionField + sizeof(uint8_t);
+
+  unsigned augmentationStringLength = 0;
+  if (auto err = processAugmentationString(frameData + augmentationStringField,
+                                           cieInfo, augmentationStringLength))
+    return err;
+
+  if (cieInfo._offsetOfPersonality != ~0U) {
+    // If we have augmentation data for the personality function, then we may
+    // need to implicitly generate its relocation.
+
+    // Parse the EH Data field which is pointer sized.
+    uint64_t EHDataField = augmentationStringField + augmentationStringLength;
+    const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+    unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
+
+    // Parse Code Align Factor which is a ULEB128.
+    uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
+    unsigned lengthFieldSize = 0;
+    llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
+
+    // Parse Data Align Factor which is a SLEB128.
+    uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
+    llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
+
+    // Parse Return Address Register which is a byte.
+    uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
+
+    // Parse the augmentation length which is a ULEB128.
+    uint64_t AugmentationLengthField = ReturnAddressField + 1;
+    uint64_t AugmentationLength =
+      llvm::decodeULEB128(frameData + AugmentationLengthField,
+                          &lengthFieldSize);
+
+    if (AugmentationLength != cieInfo._augmentationDataLength)
+      return llvm::make_error<GenericError>("CIE augmentation data length "
+                                            "mismatch");
+
+    // Get the start address of the augmentation data.
+    uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
+
+    // Parse the personality function from the augmentation data.
+    uint64_t PersonalityField =
+      AugmentationDataField + cieInfo._offsetOfPersonality;
+
+    // Parse the personality encoding.
+    // FIXME: Verify that this is a 32-bit pcrel offset.
+    uint64_t PersonalityFunctionField = PersonalityField + 1;
+
+    if (atom->begin() != atom->end()) {
+      // If we have an explicit relocation, then make sure it matches this
+      // offset as this is where we'd expect it to be applied to.
+      DefinedAtom::reference_iterator CurrentRef = atom->begin();
+      if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
+        return llvm::make_error<GenericError>("CIE personality reloc at "
+                                              "wrong offset");
+
+      if (++CurrentRef != atom->end())
+        return llvm::make_error<GenericError>("CIE contains too many relocs");
+    } else {
+      // Implicitly generate the personality function reloc.  It's assumed to
+      // be a delta32 offset to a GOT entry.
+      // FIXME: Parse the encoding and check this.
+      int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
+      uint64_t funcAddress = ehFrameSection->address + offset +
+                             PersonalityFunctionField;
+      funcAddress += funcDelta;
+
+      const MachODefinedAtom *func = nullptr;
+      Reference::Addend addend;
+      func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
+                                     addend);
+      atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
+                         handler.unwindRefToPersonalityFunctionKind(),
+                         PersonalityFunctionField, func, addend);
+    }
+  } else if (atom->begin() != atom->end()) {
+    // Otherwise, we expect there to be no relocations in this atom as the only
+    // relocation would have been to the personality function.
+    return llvm::make_error<GenericError>("unexpected relocation in CIE");
+  }
+
+
+  cieInfos[atom] = std::move(cieInfo);
+
+  return llvm::Error::success();
+}
+
+static llvm::Error processFDE(const NormalizedFile &normalizedFile,
+                              MachOFile &file,
+                              mach_o::ArchHandler &handler,
+                              const Section *ehFrameSection,
+                              MachODefinedAtom *atom,
+                              uint64_t offset,
+                              const CIEInfoMap &cieInfos) {
+
+  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+  const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
+
+  // Compiler wasn't lazy and actually told us what it meant.
+  // Unfortunately, the compiler may not have generated references for all of
+  // [cie, func, lsda] and so we still need to parse the FDE and add references
+  // for any the compiler didn't generate.
+  if (atom->begin() != atom->end())
+    atom->sortReferences();
+
+  DefinedAtom::reference_iterator CurrentRef = atom->begin();
+
+  // This helper returns the reference (if one exists) at the offset we are
+  // currently processing.  It automatically increments the ref iterator if we
+  // do return a ref, and throws an error if we pass over a ref without
+  // comsuming it.
+  auto currentRefGetter = [&CurrentRef,
+                           &atom](uint64_t Offset)->const Reference* {
+    // If there are no more refs found, then we are done.
+    if (CurrentRef == atom->end())
+      return nullptr;
+
+    const Reference *Ref = *CurrentRef;
+
+    // If we haven't reached the offset for this reference, then return that
+    // we don't yet have a reference to process.
+    if (Offset < Ref->offsetInAtom())
+      return nullptr;
+
+    // If the offset is equal, then we want to process this ref.
+    if (Offset == Ref->offsetInAtom()) {
+      ++CurrentRef;
+      return Ref;
+    }
+
+    // The current ref is at an offset which is earlier than the current
+    // offset, then we failed to consume it when we should have.  In this case
+    // throw an error.
+    llvm::report_fatal_error("Skipped reference when processing FDE");
+  };
+
+  // Helper to either get the reference at this current location, and verify
+  // that it is of the expected type, or add a reference of that type.
+  // Returns the reference target.
+  auto verifyOrAddReference = [&](uint64_t targetAddress,
+                                  Reference::KindValue refKind,
+                                  uint64_t refAddress,
+                                  bool allowsAddend)->const Atom* {
+    if (auto *ref = currentRefGetter(refAddress)) {
+      // The compiler already emitted a relocation for the CIE ref.  This should
+      // have been converted to the correct type of reference in
+      // get[Pair]ReferenceInfo().
+      assert(ref->kindValue() == refKind &&
+             "Incorrect EHFrame reference kind");
+      return ref->target();
+    }
+    Reference::Addend addend;
+    auto *target = findAtomCoveringAddress(normalizedFile, file,
+                                           targetAddress, addend);
+    atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
+                       refKind, refAddress, target, addend);
+
+    if (!allowsAddend)
+      assert(!addend && "EHFrame reference cannot have addend");
+    return target;
+  };
+
+  const uint8_t *startFrameData = atom->rawContent().data();
+  const uint8_t *frameData = startFrameData;
+
+  uint32_t size = read32(frameData, isBig);
+  uint64_t cieFieldInFDE = size == 0xffffffffU
+    ? sizeof(uint32_t) + sizeof(uint64_t)
+    : sizeof(uint32_t);
+
+  // Linker needs to fixup a reference from the FDE to its parent CIE (a
+  // 32-bit byte offset backwards in the __eh_frame section).
+  uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
+  uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
+  cieAddress -= cieDelta;
+
+  auto *cieRefTarget = verifyOrAddReference(cieAddress,
+                                            handler.unwindRefToCIEKind(),
+                                            cieFieldInFDE, false);
+  const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
+  assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
+         "FDE's CIE field does not point at the start of a CIE.");
+
+  const CIEInfo &cieInfo = cieInfos.find(cie)->second;
+
+  // Linker needs to fixup reference from the FDE to the function it's
+  // describing. FIXME: there are actually different ways to do this, and the
+  // particular method used is specified in the CIE's augmentation fields
+  // (hopefully)
+  uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
+
+  int64_t functionFromFDE = readSPtr(is64, isBig,
+                                     frameData + rangeFieldInFDE);
+  uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
+  rangeStart += functionFromFDE;
+
+  verifyOrAddReference(rangeStart,
+                       handler.unwindRefToFunctionKind(),
+                       rangeFieldInFDE, true);
+
+  // Handle the augmentation data if there is any.
+  if (cieInfo._augmentationDataPresent) {
+    // First process the augmentation data length field.
+    uint64_t augmentationDataLengthFieldInFDE =
+      rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
+    unsigned lengthFieldSize = 0;
+    uint64_t augmentationDataLength =
+      llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
+                          &lengthFieldSize);
+
+    if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
+
+      // Look at the augmentation data field.
+      uint64_t augmentationDataFieldInFDE =
+        augmentationDataLengthFieldInFDE + lengthFieldSize;
+
+      int64_t lsdaFromFDE = readSPtr(is64, isBig,
+                                     frameData + augmentationDataFieldInFDE);
+      uint64_t lsdaStart =
+        ehFrameSection->address + offset + augmentationDataFieldInFDE +
+        lsdaFromFDE;
+
+      verifyOrAddReference(lsdaStart,
+                           handler.unwindRefToFunctionKind(),
+                           augmentationDataFieldInFDE, true);
+    }
+  }
+
+  return llvm::Error::success();
+}
+
+llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
+                                 MachOFile &file,
+                                 mach_o::ArchHandler &handler) {
+
+  const Section *ehFrameSection = nullptr;
+  for (auto &section : normalizedFile.sections)
+    if (section.segmentName == "__TEXT" &&
+        section.sectionName == "__eh_frame") {
+      ehFrameSection = &section;
+      break;
+    }
+
+  // No __eh_frame so nothing to do.
+  if (!ehFrameSection)
+    return llvm::Error::success();
+
+  llvm::Error ehFrameErr = llvm::Error::success();
+  CIEInfoMap cieInfos;
+
+  file.eachAtomInSection(*ehFrameSection,
+                         [&](MachODefinedAtom *atom, uint64_t offset) -> void {
+    assert(atom->contentType() == DefinedAtom::typeCFI);
+
+    // Bail out if we've encountered an error.
+    if (ehFrameErr)
+      return;
+
+    const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+    if (ArchHandler::isDwarfCIE(isBig, atom))
+      ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
+                              atom, offset, cieInfos);
+    else
+      ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
+                              atom, offset, cieInfos);
+  });
+
+  return ehFrameErr;
+}
+
+llvm::Error parseObjCImageInfo(const Section &sect,
+                               const NormalizedFile &normalizedFile,
+                               MachOFile &file) {
+
+  //	struct objc_image_info  {
+  //		uint32_t	version;	// initially 0
+  //		uint32_t	flags;
+  //	};
+
+  ArrayRef<uint8_t> content = sect.content;
+  if (content.size() != 8)
+    return llvm::make_error<GenericError>(sect.segmentName + "/" +
+                                          sect.sectionName +
+                                          " in file " + file.path() +
+                                          " should be 8 bytes in size");
+
+  const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
+  uint32_t version = read32(content.data(), isBig);
+  if (version)
+    return llvm::make_error<GenericError>(sect.segmentName + "/" +
+                                          sect.sectionName +
+                                          " in file " + file.path() +
+                                          " should have version=0");
+
+  uint32_t flags = read32(content.data() + 4, isBig);
+  if (flags & (MachOLinkingContext::objc_supports_gc |
+               MachOLinkingContext::objc_gc_only))
+    return llvm::make_error<GenericError>(sect.segmentName + "/" +
+                                          sect.sectionName +
+                                          " in file " + file.path() +
+                                          " uses GC.  This is not supported");
+
+  if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
+    file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
+  else
+    file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
+
+  file.setSwiftVersion((flags >> 8) & 0xFF);
+
+  return llvm::Error::success();
+}
+
+/// Converts normalized mach-o file into an lld::File and lld::Atoms.
+llvm::Expected<std::unique_ptr<lld::File>>
+objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+              bool copyRefs) {
+  std::unique_ptr<MachOFile> file(new MachOFile(path));
+  if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
+    return std::move(ec);
+  return std::unique_ptr<File>(std::move(file));
+}
+
+llvm::Expected<std::unique_ptr<lld::File>>
+dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+             bool copyRefs) {
+  // Instantiate SharedLibraryFile object.
+  std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
+  if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
+    return std::move(ec);
+  return std::unique_ptr<File>(std::move(file));
+}
+
+} // anonymous namespace
+
+namespace normalized {
+
+static bool isObjCImageInfo(const Section &sect) {
+  return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
+    (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
+}
+
+llvm::Error
+normalizedObjectToAtoms(MachOFile *file,
+                        const NormalizedFile &normalizedFile,
+                        bool copyRefs) {
+  LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
+                          << file->path() << "\n");
+  bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
+
+  // Create atoms from each section.
+  for (auto &sect : normalizedFile.sections) {
+
+    // If this is a debug-info section parse it specially.
+    if (isDebugInfoSection(sect))
+      continue;
+
+    // If the file contains an objc_image_info struct, then we should parse the
+    // ObjC flags and Swift version.
+    if (isObjCImageInfo(sect)) {
+      if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
+        return ec;
+      // We then skip adding atoms for this section as we use the ObjCPass to
+      // re-emit this data after it has been aggregated for all files.
+      continue;
+    }
+
+    bool customSectionName;
+    DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
+                                                            customSectionName);
+    if (auto ec =  processSection(atomType, sect, customSectionName,
+                                  normalizedFile, *file, scatterable, copyRefs))
+      return ec;
+  }
+  // Create atoms from undefined symbols.
+  for (auto &sym : normalizedFile.undefinedSymbols) {
+    // Undefinded symbols with n_value != 0 are actually tentative definitions.
+    if (sym.value == Hex64(0)) {
+      file->addUndefinedAtom(sym.name, copyRefs);
+    } else {
+      file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
+                                DefinedAtom::Alignment(1 << (sym.desc >> 8)),
+                                copyRefs);
+    }
+  }
+
+  // Convert mach-o relocations to References
+  std::unique_ptr<mach_o::ArchHandler> handler
+                                     = ArchHandler::create(normalizedFile.arch);
+  for (auto &sect : normalizedFile.sections) {
+    if (isDebugInfoSection(sect))
+      continue;
+    if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
+                                       *file, *handler))
+      return ec;
+  }
+
+  // Add additional arch-specific References
+  file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
+    handler->addAdditionalReferences(*atom);
+  });
+
+  // Each __eh_frame section needs references to both __text (the function we're
+  // providing unwind info for) and itself (FDE -> CIE). These aren't
+  // represented in the relocations on some architectures, so we have to add
+  // them back in manually there.
+  if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
+    return ec;
+
+  // Process mach-o data-in-code regions array. That information is encoded in
+  // atoms as References at each transition point.
+  unsigned nextIndex = 0;
+  for (const DataInCode &entry : normalizedFile.dataInCode) {
+    ++nextIndex;
+    const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
+    if (!s) {
+      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
+                                                  + Twine(entry.offset)
+                                                  + ") is not in any section"));
+    }
+    uint64_t offsetInSect = entry.offset - s->address;
+    uint32_t offsetInAtom;
+    MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
+                                                           &offsetInAtom);
+    if (offsetInAtom + entry.length > atom->size()) {
+      return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
+                                                  "(offset="
+                                                  + Twine(entry.offset)
+                                                  + ", length="
+                                                  + Twine(entry.length)
+                                                  + ") crosses atom boundary."));
+    }
+    // Add reference that marks start of data-in-code.
+    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
+                       handler->dataInCodeTransitionStart(*atom),
+                       offsetInAtom, atom, entry.kind);
+
+    // Peek at next entry, if it starts where this one ends, skip ending ref.
+    if (nextIndex < normalizedFile.dataInCode.size()) {
+      const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
+      if (nextEntry.offset == (entry.offset + entry.length))
+        continue;
+    }
+
+    // If data goes to end of function, skip ending ref.
+    if ((offsetInAtom + entry.length) == atom->size())
+      continue;
+
+    // Add reference that marks end of data-in-code.
+    atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
+                       handler->dataInCodeTransitionEnd(*atom),
+                       offsetInAtom+entry.length, atom, 0);
+  }
+
+  // Cache some attributes on the file for use later.
+  file->setFlags(normalizedFile.flags);
+  file->setArch(normalizedFile.arch);
+  file->setOS(normalizedFile.os);
+  file->setMinVersion(normalizedFile.minOSverson);
+  file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
+
+  // Sort references in each atom to their canonical order.
+  for (const DefinedAtom* defAtom : file->defined()) {
+    reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
+  }
+
+  if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
+    return err;
+
+  return llvm::Error::success();
+}
+
+llvm::Error
+normalizedDylibToAtoms(MachODylibFile *file,
+                       const NormalizedFile &normalizedFile,
+                       bool copyRefs) {
+  file->setInstallName(normalizedFile.installName);
+  file->setCompatVersion(normalizedFile.compatVersion);
+  file->setCurrentVersion(normalizedFile.currentVersion);
+
+  // Tell MachODylibFile object about all symbols it exports.
+  if (!normalizedFile.exportInfo.empty()) {
+    // If exports trie exists, use it instead of traditional symbol table.
+    for (const Export &exp : normalizedFile.exportInfo) {
+      bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
+      // StringRefs from export iterator are ephemeral, so force copy.
+      file->addExportedSymbol(exp.name, weakDef, true);
+    }
+  } else {
+    for (auto &sym : normalizedFile.globalSymbols) {
+      assert((sym.scope & N_EXT) && "only expect external symbols here");
+      bool weakDef = (sym.desc & N_WEAK_DEF);
+      file->addExportedSymbol(sym.name, weakDef, copyRefs);
+    }
+  }
+  // Tell MachODylibFile object about all dylibs it re-exports.
+  for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
+    if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
+      file->addReExportedDylib(dep.path);
+  }
+  return llvm::Error::success();
+}
+
+void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
+                                          StringRef &segmentName,
+                                          StringRef &sectionName,
+                                          SectionType &sectionType,
+                                          SectionAttr &sectionAttrs,
+                                          bool &relocsToDefinedCanBeImplicit) {
+
+  for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
+                                 p->atomType != DefinedAtom::typeUnknown; ++p) {
+    if (p->atomType != atomType)
+      continue;
+    // Wild carded entries are ignored for reverse lookups.
+    if (p->segmentName.empty() || p->sectionName.empty())
+      continue;
+    segmentName = p->segmentName;
+    sectionName = p->sectionName;
+    sectionType = p->sectionType;
+    sectionAttrs = 0;
+    relocsToDefinedCanBeImplicit = false;
+    if (atomType == DefinedAtom::typeCode)
+      sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
+    if (atomType == DefinedAtom::typeCFI)
+      relocsToDefinedCanBeImplicit = true;
+    return;
+  }
+  llvm_unreachable("content type not yet supported");
+}
+
+llvm::Expected<std::unique_ptr<lld::File>>
+normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
+                  bool copyRefs) {
+  switch (normalizedFile.fileType) {
+  case MH_DYLIB:
+  case MH_DYLIB_STUB:
+    return dylibToAtoms(normalizedFile, path, copyRefs);
+  case MH_OBJECT:
+    return objectToAtoms(normalizedFile, path, copyRefs);
+  default:
+    llvm_unreachable("unhandled MachO file type!");
+  }
+}
+
+} // namespace normalized
+} // namespace mach_o
+} // namespace lld