summaryrefslogtreecommitdiff
path: root/lib/ReaderWriter/Native/NativeFileFormat.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ReaderWriter/Native/NativeFileFormat.h')
-rw-r--r--lib/ReaderWriter/Native/NativeFileFormat.h258
1 files changed, 258 insertions, 0 deletions
diff --git a/lib/ReaderWriter/Native/NativeFileFormat.h b/lib/ReaderWriter/Native/NativeFileFormat.h
new file mode 100644
index 000000000000..535072fe2314
--- /dev/null
+++ b/lib/ReaderWriter/Native/NativeFileFormat.h
@@ -0,0 +1,258 @@
+//===- lib/ReaderWriter/Native/NativeFileFormat.h -------------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H
+#define LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstdint>
+
+namespace lld {
+
+//
+// Overview:
+//
+// The number one design goal of this file format is enable the linker to
+// read object files into in-memory Atom objects extremely quickly.
+// The second design goal is to enable future modifications to the
+// Atom attribute model.
+//
+// The llvm native object file format is not like traditional object file
+// formats (e.g. ELF, COFF, mach-o). There is no symbol table and no
+// sections. Instead the file is essentially an array of archived Atoms.
+// It is *not* serialized Atoms which would require deserialization into
+// in memory objects. Instead it is an array of read-only info about each
+// Atom. The NativeReader bulk creates in-memory Atoms which just have
+// an ivar which points to the read-only info for that Atom. No additional
+// processing is done to construct the in-memory Atoms. All Atom attribute
+// getter methods are virtual calls which dig up the info they need from the
+// ivar data.
+//
+// To support the gradual evolution of Atom attributes, the Atom read-only
+// data is versioned. The NativeReader chooses which in-memory Atom class
+// to use based on the version. What this means is that if new attributes
+// are added (or changed) in the Atom model, a new native atom class and
+// read-only atom info struct needs to be defined. Then, all the existing
+// native reader atom classes need to be modified to do their best effort
+// to map their old style read-only data to the new Atom model. At some point
+// some classes to support old versions may be dropped.
+//
+//
+// Details:
+//
+// The native object file format consists of a header that specifies the
+// endianness of the file and the architecture along with a list of "chunks"
+// in the file. A Chunk is simply a tagged range of the file. There is
+// one chunk for the array of atom infos. There is another chunk for the
+// string pool, and another for the content pool.
+//
+// It turns out there most atoms have very similar sets of attributes, only
+// the name and content attribute vary. To exploit this fact to reduce the file
+// size, the atom read-only info contains just the name and content info plus
+// a reference to which attribute set it uses. The attribute sets are stored
+// in another chunk.
+//
+
+
+//
+// An entry in the NativeFileHeader that describes one chunk of the file.
+//
+struct NativeChunk {
+ uint32_t signature;
+ uint32_t fileOffset;
+ uint32_t fileSize;
+ uint32_t elementCount;
+};
+
+
+//
+// The header in a native object file
+//
+struct NativeFileHeader {
+ uint8_t magic[16];
+ uint32_t endian;
+ uint32_t architecture;
+ uint32_t fileSize;
+ uint32_t chunkCount;
+ // NativeChunk chunks[]
+};
+
+//
+// Possible values for NativeChunk.signature field
+//
+enum NativeChunkSignatures {
+ NCS_DefinedAtomsV1 = 1,
+ NCS_AttributesArrayV1 = 2,
+ NCS_AbsoluteAttributesV1 = 12,
+ NCS_UndefinedAtomsV1 = 3,
+ NCS_SharedLibraryAtomsV1 = 4,
+ NCS_AbsoluteAtomsV1 = 5,
+ NCS_Strings = 6,
+ NCS_ReferencesArrayV1 = 7,
+ NCS_ReferencesArrayV2 = 8,
+ NCS_TargetsTable = 9,
+ NCS_AddendsTable = 10,
+ NCS_Content = 11,
+};
+
+//
+// The 16-bytes at the start of a native object file
+//
+#define NATIVE_FILE_HEADER_MAGIC "llvm nat obj v1 "
+
+//
+// Possible values for the NativeFileHeader.endian field
+//
+enum {
+ NFH_BigEndian = 0x42696745,
+ NFH_LittleEndian = 0x4574696c
+};
+
+
+//
+// Possible values for the NativeFileHeader.architecture field
+//
+enum {
+ NFA_x86 = 1,
+ NFA_x86_64 = 2,
+ NFA_armv6 = 3,
+ NFA_armv7 = 4,
+};
+
+
+//
+// The NCS_DefinedAtomsV1 chunk contains an array of these structs
+//
+struct NativeDefinedAtomIvarsV1 {
+ uint32_t nameOffset;
+ uint32_t attributesOffset;
+ uint32_t referencesStartIndex;
+ uint32_t referencesCount;
+ uint32_t contentOffset;
+ uint32_t contentSize;
+ uint64_t sectionSize;
+};
+
+
+//
+// The NCS_AttributesArrayV1 chunk contains an array of these structs
+//
+struct NativeAtomAttributesV1 {
+ uint32_t sectionNameOffset;
+ uint16_t align2;
+ uint16_t alignModulus;
+ uint8_t scope;
+ uint8_t interposable;
+ uint8_t merge;
+ uint8_t contentType;
+ uint8_t sectionChoice;
+ uint8_t deadStrip;
+ uint8_t dynamicExport;
+ uint8_t permissions;
+ uint8_t alias;
+ uint8_t codeModel;
+};
+
+
+
+//
+// The NCS_UndefinedAtomsV1 chunk contains an array of these structs
+//
+struct NativeUndefinedAtomIvarsV1 {
+ uint32_t nameOffset;
+ uint32_t flags;
+ uint32_t fallbackNameOffset;
+};
+
+
+//
+// The NCS_SharedLibraryAtomsV1 chunk contains an array of these structs
+//
+struct NativeSharedLibraryAtomIvarsV1 {
+ uint64_t size;
+ uint32_t nameOffset;
+ uint32_t loadNameOffset;
+ uint32_t type;
+ uint32_t flags;
+};
+
+
+
+//
+// The NCS_AbsoluteAtomsV1 chunk contains an array of these structs
+//
+struct NativeAbsoluteAtomIvarsV1 {
+ uint32_t nameOffset;
+ uint32_t attributesOffset;
+ uint32_t reserved;
+ uint64_t value;
+};
+
+
+
+//
+// The NCS_ReferencesArrayV1 chunk contains an array of these structs
+//
+struct NativeReferenceIvarsV1 {
+ enum {
+ noTarget = UINT16_MAX
+ };
+ uint32_t offsetInAtom;
+ uint16_t kindValue;
+ uint8_t kindNamespace;
+ uint8_t kindArch;
+ uint16_t targetIndex;
+ uint16_t addendIndex;
+};
+
+
+//
+// The NCS_ReferencesArrayV2 chunk contains an array of these structs
+//
+struct NativeReferenceIvarsV2 {
+ enum : unsigned {
+ noTarget = UINT32_MAX
+ };
+ uint64_t offsetInAtom;
+ int64_t addend;
+ uint16_t kindValue;
+ uint8_t kindNamespace;
+ uint8_t kindArch;
+ uint32_t targetIndex;
+ uint32_t tag;
+};
+
+
+//
+// The NCS_TargetsTable chunk contains an array of uint32_t entries.
+// The C++ class Reference has a target() method that returns a
+// pointer to another Atom. We can't have pointers in object files,
+// so instead NativeReferenceIvarsV1 contains an index to the target.
+// The index is into this NCS_TargetsTable of uint32_t entries.
+// The values in this table are the index of the (target) atom in this file.
+// For DefinedAtoms the value is from 0 to NCS_DefinedAtomsV1.elementCount.
+// For UndefinedAtoms the value is from NCS_DefinedAtomsV1.elementCount to
+// NCS_DefinedAtomsV1.elementCount+NCS_UndefinedAtomsV1.elementCount.
+//
+
+
+//
+// The NCS_AddendsTable chunk contains an array of int64_t entries.
+// If we allocated space for addends directly in NativeReferenceIvarsV1
+// it would double the size of that struct. But since addends are rare,
+// we instead just keep a pool of addends and have NativeReferenceIvarsV1
+// (if it needs an addend) just store the index (into the pool) of the
+// addend it needs.
+//
+
+
+
+} // namespace lld
+
+#endif // LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H