20 files changed, 2590 insertions, 0 deletions
diff --git a/lib/esan/CMakeLists.txt b/lib/esan/CMakeLists.txt
new file mode 100644
index 0000000000000..2a0a71b2e3482
--- /dev/null
+++ b/lib/esan/CMakeLists.txt
@@ -0,0 +1,43 @@
+# Build for the EfficiencySanitizer runtime support library.
+
+add_custom_target(esan)
+set_target_properties(esan PROPERTIES FOLDER "Compiler-RT Misc")
+
+set(ESAN_RTL_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+append_rtti_flag(OFF ESAN_RTL_CFLAGS)
+
+include_directories(..)
+
+set(ESAN_SOURCES
+  esan.cpp
+  esan_flags.cpp
+  esan_interface.cpp
+  esan_interceptors.cpp
+  esan_linux.cpp
+  esan_sideline_linux.cpp
+  cache_frag.cpp
+  working_set.cpp
+  working_set_posix.cpp)
+
+foreach (arch ${ESAN_SUPPORTED_ARCH})
+  add_compiler_rt_runtime(clang_rt.esan
+    STATIC
+    ARCHS ${arch}
+    SOURCES ${ESAN_SOURCES}
+            $<TARGET_OBJECTS:RTInterception.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+    CFLAGS ${ESAN_RTL_CFLAGS})
+  add_sanitizer_rt_symbols(clang_rt.esan
+    ARCHS ${arch}
+    EXTRA esan.syms.extra)
+  add_dependencies(esan
+    clang_rt.esan-${arch}
+    clang_rt.esan-${arch}-symbols)
+endforeach()
+
+add_dependencies(compiler-rt esan)
+
+if (COMPILER_RT_INCLUDE_TESTS)
+  # TODO(bruening): add tests via add_subdirectory(tests)
+endif()
diff --git a/lib/esan/cache_frag.cpp b/lib/esan/cache_frag.cpp
new file mode 100644
index 0000000000000..a3e612daceb1d
--- /dev/null
+++ b/lib/esan/cache_frag.cpp
@@ -0,0 +1,208 @@
+//===-- cache_frag.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// This file contains cache fragmentation-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "esan.h"
+#include "esan_flags.h"
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include <string.h>
+
+namespace __esan {
+
+//===-- Struct field access counter runtime -------------------------------===//
+
+// This should be kept consistent with LLVM's EfficiencySanitizer StructInfo.
+struct StructInfo {
+  const char *StructName;
+  u32 Size;
+  u32 NumFields;
+  u32 *FieldOffset;           // auxiliary struct field info.
+  u32 *FieldSize;             // auxiliary struct field info.
+  const char **FieldTypeName; // auxiliary struct field info.
+  u64 *FieldCounters;
+  u64 *ArrayCounter;
+  bool hasAuxFieldInfo() { return FieldOffset != nullptr; }
+};
+
+// This should be kept consistent with LLVM's EfficiencySanitizer CacheFragInfo.
+// The tool-specific information per compilation unit (module).
+struct CacheFragInfo {
+  const char *UnitName;
+  u32 NumStructs;
+  StructInfo *Structs;
+};
+
+struct StructCounter {
+  StructInfo *Struct;
+  u64 Count; // The total access count of the struct.
+  u64 Ratio; // Difference ratio for the struct layout access.
+};
+
+// We use StructHashMap to keep track of an unique copy of StructCounter.
+typedef AddrHashMap<StructCounter, 31051> StructHashMap;
+struct Context {
+  StructHashMap StructMap;
+  u32 NumStructs;
+  u64 TotalCount; // The total access count of all structs.
+};
+static Context *Ctx;
+
+static void reportStructSummary() {
+  // FIXME: provide a better struct field access summary report.
+  Report("%s: total struct field access count = %llu\n", SanitizerToolName,
+         Ctx->TotalCount);
+}
+
+// FIXME: we are still exploring proper ways to evaluate the difference between
+// struct field counts.  Currently, we use a simple formula to calculate the
+// difference ratio: V1/V2.
+static inline u64 computeDifferenceRatio(u64 Val1, u64 Val2) {
+  if (Val2 > Val1) {
+    Swap(Val1, Val2);
+  }
+  if (Val2 == 0)
+    Val2 = 1;
+  return (Val1 / Val2);
+}
+
+static void reportStructCounter(StructHashMap::Handle &Handle) {
+  const u32 TypePrintLimit = 512;
+  const char *type, *start, *end;
+  StructInfo *Struct = Handle->Struct;
+  // Union field address calculation is done via bitcast instead of GEP,
+  // so the count for union is always 0.
+  // We skip the union report to avoid confusion.
+  if (strncmp(Struct->StructName, "union.", 6) == 0)
+    return;
+  // Remove the '.' after class/struct during print.
+  if (strncmp(Struct->StructName, "class.", 6) == 0) {
+    type = "class";
+    start = &Struct->StructName[6];
+  } else {
+    type = "struct";
+    start = &Struct->StructName[7];
+  }
+  // Remove the suffixes with '#' during print.
+  end = strchr(start, '#');
+  CHECK(end != nullptr);
+  Report("  %s %.*s\n", type, end - start, start);
+  Report("   size = %u, count = %llu, ratio = %llu, array access = %llu\n",
+         Struct->Size, Handle->Count, Handle->Ratio, *Struct->ArrayCounter);
+  if (Struct->hasAuxFieldInfo()) {
+    for (u32 i = 0; i < Struct->NumFields; ++i) {
+      Report("   #%2u: offset = %u,\t size = %u,"
+             "\t count = %llu,\t type = %.*s\n",
+             i, Struct->FieldOffset[i], Struct->FieldSize[i],
+             Struct->FieldCounters[i], TypePrintLimit, Struct->FieldTypeName[i]);
+    }
+  } else {
+    for (u32 i = 0; i < Struct->NumFields; ++i) {
+      Report("   #%2u: count = %llu\n", i, Struct->FieldCounters[i]);
+    }
+  }
+}
+
+static void computeStructRatio(StructHashMap::Handle &Handle) {
+  Handle->Ratio = 0;
+  Handle->Count = Handle->Struct->FieldCounters[0];
+  for (u32 i = 1; i < Handle->Struct->NumFields; ++i) {
+    Handle->Count += Handle->Struct->FieldCounters[i];
+    Handle->Ratio += computeDifferenceRatio(
+        Handle->Struct->FieldCounters[i - 1], Handle->Struct->FieldCounters[i]);
+  }
+  Ctx->TotalCount += Handle->Count;
+  if (Handle->Ratio >= (u64)getFlags()->report_threshold ||
+      (Verbosity() >= 1 && Handle->Count > 0))
+    reportStructCounter(Handle);
+}
+
+static void registerStructInfo(CacheFragInfo *CacheFrag) {
+  for (u32 i = 0; i < CacheFrag->NumStructs; ++i) {
+    StructInfo *Struct = &CacheFrag->Structs[i];
+    StructHashMap::Handle H(&Ctx->StructMap, (uptr)Struct->FieldCounters);
+    if (H.created()) {
+      VPrintf(2, " Register %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+      H->Struct = Struct;
+      ++Ctx->NumStructs;
+    } else {
+      VPrintf(2, " Duplicated %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+    }
+  }
+}
+
+static void unregisterStructInfo(CacheFragInfo *CacheFrag) {
+  // FIXME: if the library is unloaded before finalizeCacheFrag, we should
+  // collect the result for later report.
+  for (u32 i = 0; i < CacheFrag->NumStructs; ++i) {
+    StructInfo *Struct = &CacheFrag->Structs[i];
+    StructHashMap::Handle H(&Ctx->StructMap, (uptr)Struct->FieldCounters, true);
+    if (H.exists()) {
+      VPrintf(2, " Unregister %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+      // FIXME: we should move this call to finalizeCacheFrag once we can
+      // iterate over the hash map there.
+      computeStructRatio(H);
+      --Ctx->NumStructs;
+    } else {
+      VPrintf(2, " Duplicated %s: %u fields\n", Struct->StructName,
+              Struct->NumFields);
+    }
+  }
+  static bool Reported = false;
+  if (Ctx->NumStructs == 0 && !Reported) {
+    Reported = true;
+    reportStructSummary();
+  }
+}
+
+//===-- Init/exit functions -----------------------------------------------===//
+
+void processCacheFragCompilationUnitInit(void *Ptr) {
+  CacheFragInfo *CacheFrag = (CacheFragInfo *)Ptr;
+  VPrintf(2, "in esan::%s: %s with %u class(es)/struct(s)\n", __FUNCTION__,
+          CacheFrag->UnitName, CacheFrag->NumStructs);
+  registerStructInfo(CacheFrag);
+}
+
+void processCacheFragCompilationUnitExit(void *Ptr) {
+  CacheFragInfo *CacheFrag = (CacheFragInfo *)Ptr;
+  VPrintf(2, "in esan::%s: %s with %u class(es)/struct(s)\n", __FUNCTION__,
+          CacheFrag->UnitName, CacheFrag->NumStructs);
+  unregisterStructInfo(CacheFrag);
+}
+
+void initializeCacheFrag() {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  // We use placement new to initialize Ctx before C++ static initializaion.
+  // We make CtxMem 8-byte aligned for atomic operations in AddrHashMap.
+  static u64 CtxMem[sizeof(Context) / sizeof(u64) + 1];
+  Ctx = new (CtxMem) Context();
+  Ctx->NumStructs = 0;
+}
+
+int finalizeCacheFrag() {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  return 0;
+}
+
+void reportCacheFrag() {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  // FIXME: Not yet implemented.  We need to iterate over all of the
+  // compilation unit data.
+}
+
+} // namespace __esan
diff --git a/lib/esan/cache_frag.h b/lib/esan/cache_frag.h
new file mode 100644
index 0000000000000..646d3f85ed97a
--- /dev/null
+++ b/lib/esan/cache_frag.h
@@ -0,0 +1,29 @@
+//===-- cache_frag.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Header for cache-fragmentation-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifndef CACHE_FRAG_H
+#define CACHE_FRAG_H
+
+namespace __esan {
+
+void processCacheFragCompilationUnitInit(void *Ptr);
+void processCacheFragCompilationUnitExit(void *Ptr);
+
+void initializeCacheFrag();
+int finalizeCacheFrag();
+void reportCacheFrag();
+
+} // namespace __esan
+
+#endif  // CACHE_FRAG_H
diff --git a/lib/esan/esan.cpp b/lib/esan/esan.cpp
new file mode 100644
index 0000000000000..2fb77894d4fb6
--- /dev/null
+++ b/lib/esan/esan.cpp
@@ -0,0 +1,270 @@
+//===-- esan.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Main file (entry points) for the Esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "esan.h"
+#include "esan_flags.h"
+#include "esan_interface_internal.h"
+#include "esan_shadow.h"
+#include "cache_frag.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+#include "working_set.h"
+
+// See comment below.
+extern "C" {
+extern void __cxa_atexit(void (*function)(void));
+}
+
+namespace __esan {
+
+bool EsanIsInitialized;
+bool EsanDuringInit;
+ShadowMapping Mapping;
+
+// Different tools use different scales within the same shadow mapping scheme.
+// The scale used here must match that used by the compiler instrumentation.
+// This array is indexed by the ToolType enum.
+static const uptr ShadowScale[] = {
+  0, // ESAN_None.
+  2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
+  6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
+};
+
+// We are combining multiple performance tuning tools under the umbrella of
+// one EfficiencySanitizer super-tool.  Most of our tools have very similar
+// memory access instrumentation, shadow memory mapping, libc interception,
+// etc., and there is typically more shared code than distinct code.
+//
+// We are not willing to dispatch on tool dynamically in our fastpath
+// instrumentation: thus, which tool to use is a static option selected
+// at compile time and passed to __esan_init().
+//
+// We are willing to pay the overhead of tool dispatch in the slowpath to more
+// easily share code.  We expect to only come here rarely.
+// If this becomes a performance hit, we can add separate interface
+// routines for each subtool (e.g., __esan_cache_frag_aligned_load_4).
+// But for libc interceptors, we'll have to do one of the following:
+// A) Add multiple-include support to sanitizer_common_interceptors.inc,
+//    instantiate it separately for each tool, and call the selected
+//    tool's intercept setup code.
+// B) Build separate static runtime libraries, one for each tool.
+// C) Completely split the tools into separate sanitizers.
+
+void processRangeAccess(uptr PC, uptr Addr, int Size, bool IsWrite) {
+  VPrintf(3, "in esan::%s %p: %c %p %d\n", __FUNCTION__, PC,
+          IsWrite ? 'w' : 'r', Addr, Size);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    // TODO(bruening): add shadow mapping and update shadow bits here.
+    // We'll move this to cache_frag.cpp once we have something.
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    processRangeAccessWorkingSet(PC, Addr, Size, IsWrite);
+  }
+}
+
+bool processSignal(int SigNum, void (*Handler)(int), void (**Result)(int)) {
+  if (__esan_which_tool == ESAN_WorkingSet)
+    return processWorkingSetSignal(SigNum, Handler, Result);
+  return true;
+}
+
+bool processSigaction(int SigNum, const void *Act, void *OldAct) {
+  if (__esan_which_tool == ESAN_WorkingSet)
+    return processWorkingSetSigaction(SigNum, Act, OldAct);
+  return true;
+}
+
+bool processSigprocmask(int How, void *Set, void *OldSet) {
+  if (__esan_which_tool == ESAN_WorkingSet)
+    return processWorkingSetSigprocmask(How, Set, OldSet);
+  return true;
+}
+
+#if SANITIZER_DEBUG
+static bool verifyShadowScheme() {
+  // Sanity checks for our shadow mapping scheme.
+  uptr AppStart, AppEnd;
+  if (Verbosity() >= 3) {
+    for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+      VPrintf(3, "App #%d: [%zx-%zx) (%zuGB)\n", i, AppStart, AppEnd,
+              (AppEnd - AppStart) >> 30);
+    }
+  }
+  for (int Scale = 0; Scale < 8; ++Scale) {
+    Mapping.initialize(Scale);
+    if (Verbosity() >= 3) {
+      VPrintf(3, "\nChecking scale %d\n", Scale);
+      uptr ShadowStart, ShadowEnd;
+      for (int i = 0; getShadowRegion(i, &ShadowStart, &ShadowEnd); ++i) {
+        VPrintf(3, "Shadow #%d: [%zx-%zx) (%zuGB)\n", i, ShadowStart,
+                ShadowEnd, (ShadowEnd - ShadowStart) >> 30);
+      }
+      for (int i = 0; getShadowRegion(i, &ShadowStart, &ShadowEnd); ++i) {
+        VPrintf(3, "Shadow(Shadow) #%d: [%zx-%zx)\n", i,
+                appToShadow(ShadowStart), appToShadow(ShadowEnd - 1)+1);
+      }
+    }
+    for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+      DCHECK(isAppMem(AppStart));
+      DCHECK(!isAppMem(AppStart - 1));
+      DCHECK(isAppMem(AppEnd - 1));
+      DCHECK(!isAppMem(AppEnd));
+      DCHECK(!isShadowMem(AppStart));
+      DCHECK(!isShadowMem(AppEnd - 1));
+      DCHECK(isShadowMem(appToShadow(AppStart)));
+      DCHECK(isShadowMem(appToShadow(AppEnd - 1)));
+      // Double-shadow checks.
+      DCHECK(!isShadowMem(appToShadow(appToShadow(AppStart))));
+      DCHECK(!isShadowMem(appToShadow(appToShadow(AppEnd - 1))));
+    }
+    // Ensure no shadow regions overlap each other.
+    uptr ShadowAStart, ShadowBStart, ShadowAEnd, ShadowBEnd;
+    for (int i = 0; getShadowRegion(i, &ShadowAStart, &ShadowAEnd); ++i) {
+      for (int j = 0; getShadowRegion(j, &ShadowBStart, &ShadowBEnd); ++j) {
+        DCHECK(i == j || ShadowAStart >= ShadowBEnd ||
+               ShadowAEnd <= ShadowBStart);
+      }
+    }
+  }
+  return true;
+}
+#endif
+
+static void initializeShadow() {
+  verifyAddressSpace();
+
+  DCHECK(verifyShadowScheme());
+
+  Mapping.initialize(ShadowScale[__esan_which_tool]);
+
+  VPrintf(1, "Shadow scale=%d offset=%p\n", Mapping.Scale, Mapping.Offset);
+
+  uptr ShadowStart, ShadowEnd;
+  for (int i = 0; getShadowRegion(i, &ShadowStart, &ShadowEnd); ++i) {
+    VPrintf(1, "Shadow #%d: [%zx-%zx) (%zuGB)\n", i, ShadowStart, ShadowEnd,
+            (ShadowEnd - ShadowStart) >> 30);
+
+    uptr Map;
+    if (__esan_which_tool == ESAN_WorkingSet) {
+      // We want to identify all shadow pages that are touched so we start
+      // out inaccessible.
+      Map = (uptr)MmapFixedNoAccess(ShadowStart, ShadowEnd- ShadowStart,
+                                    "shadow");
+    } else {
+      Map = (uptr)MmapFixedNoReserve(ShadowStart, ShadowEnd - ShadowStart,
+                                     "shadow");
+    }
+    if (Map != ShadowStart) {
+      Printf("FATAL: EfficiencySanitizer failed to map its shadow memory.\n");
+      Die();
+    }
+
+    if (common_flags()->no_huge_pages_for_shadow)
+      NoHugePagesInRegion(ShadowStart, ShadowEnd - ShadowStart);
+    if (common_flags()->use_madv_dontdump)
+      DontDumpShadowMemory(ShadowStart, ShadowEnd - ShadowStart);
+
+    // TODO: Call MmapNoAccess() on in-between regions.
+  }
+}
+
+void initializeLibrary(ToolType Tool) {
+  // We assume there is only one thread during init, but we need to
+  // guard against double-init when we're (re-)called from an
+  // early interceptor.
+  if (EsanIsInitialized || EsanDuringInit)
+    return;
+  EsanDuringInit = true;
+  CHECK(Tool == __esan_which_tool);
+  SanitizerToolName = "EfficiencySanitizer";
+  CacheBinaryName();
+  initializeFlags();
+
+  // Intercepting libc _exit or exit via COMMON_INTERCEPTOR_ON_EXIT only
+  // finalizes on an explicit exit call by the app.  To handle a normal
+  // exit we register an atexit handler.
+  ::__cxa_atexit((void (*)())finalizeLibrary);
+
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool <= ESAN_None || __esan_which_tool >= ESAN_Max) {
+    Printf("ERROR: unknown tool %d requested\n", __esan_which_tool);
+    Die();
+  }
+
+  initializeShadow();
+  if (__esan_which_tool == ESAN_WorkingSet)
+    initializeShadowWorkingSet();
+
+  initializeInterceptors();
+
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    initializeCacheFrag();
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    initializeWorkingSet();
+  }
+
+  EsanIsInitialized = true;
+  EsanDuringInit = false;
+}
+
+int finalizeLibrary() {
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    return finalizeCacheFrag();
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    return finalizeWorkingSet();
+  }
+  return 0;
+}
+
+void reportResults() {
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    return reportCacheFrag();
+  } else if (__esan_which_tool == ESAN_WorkingSet) {
+    return reportWorkingSet();
+  }
+}
+
+void processCompilationUnitInit(void *Ptr) {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    DCHECK(Ptr != nullptr);
+    processCacheFragCompilationUnitInit(Ptr);
+  } else {
+    DCHECK(Ptr == nullptr);
+  }
+}
+
+// This is called when the containing module is unloaded.
+// For the main executable module, this is called after finalizeLibrary.
+void processCompilationUnitExit(void *Ptr) {
+  VPrintf(2, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_CacheFrag) {
+    DCHECK(Ptr != nullptr);
+    processCacheFragCompilationUnitExit(Ptr);
+  } else {
+    DCHECK(Ptr == nullptr);
+  }
+}
+
+unsigned int getSampleCount() {
+  VPrintf(1, "in esan::%s\n", __FUNCTION__);
+  if (__esan_which_tool == ESAN_WorkingSet) {
+    return getSampleCountWorkingSet();
+  }
+  return 0;
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan.h b/lib/esan/esan.h
new file mode 100644
index 0000000000000..5a0dde627888f
--- /dev/null
+++ b/lib/esan/esan.h
@@ -0,0 +1,60 @@
+//===-- esan.h --------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Main internal esan header file.
+//
+// Ground rules:
+//   - C++ run-time should not be used (static CTORs, RTTI, exceptions, static
+//     function-scope locals)
+//   - All functions/classes/etc reside in namespace __esan, except for those
+//     declared in esan_interface_internal.h.
+//   - Platform-specific files should be used instead of ifdefs (*).
+//   - No system headers included in header files (*).
+//   - Platform specific headers included only into platform-specific files (*).
+//
+//  (*) Except when inlining is critical for performance.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_H
+#define ESAN_H
+
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "esan_interface_internal.h"
+
+namespace __esan {
+
+extern bool EsanIsInitialized;
+extern bool EsanDuringInit;
+
+void initializeLibrary(ToolType Tool);
+int finalizeLibrary();
+void reportResults();
+unsigned int getSampleCount();
+// Esan creates the variable per tool per compilation unit at compile time
+// and passes its pointer Ptr to the runtime library.
+void processCompilationUnitInit(void *Ptr);
+void processCompilationUnitExit(void *Ptr);
+void processRangeAccess(uptr PC, uptr Addr, int Size, bool IsWrite);
+void initializeInterceptors();
+
+// Platform-dependent routines.
+void verifyAddressSpace();
+bool fixMmapAddr(void **Addr, SIZE_T Size, int Flags);
+uptr checkMmapResult(uptr Addr, SIZE_T Size);
+// The return value indicates whether to call the real version or not.
+bool processSignal(int SigNum, void (*Handler)(int), void (**Result)(int));
+bool processSigaction(int SigNum, const void *Act, void *OldAct);
+bool processSigprocmask(int How, void *Set, void *OldSet);
+
+} // namespace __esan
+
+#endif // ESAN_H
diff --git a/lib/esan/esan.syms.extra b/lib/esan/esan.syms.extra
new file mode 100644
index 0000000000000..d6397d4c350ff
--- /dev/null
+++ b/lib/esan/esan.syms.extra
@@ -0,0 +1,4 @@
+__esan_init
+__esan_exit
+__esan_aligned*
+__esan_unaligned*
diff --git a/lib/esan/esan_circular_buffer.h b/lib/esan/esan_circular_buffer.h
new file mode 100644
index 0000000000000..9ce102d04d8f9
--- /dev/null
+++ b/lib/esan/esan_circular_buffer.h
@@ -0,0 +1,96 @@
+//===-- esan_circular_buffer.h ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Circular buffer data structure.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_common.h"
+
+namespace __esan {
+
+// A circular buffer for POD data whose memory is allocated using mmap.
+// There are two usage models: one is to use initialize/free (for global
+// instances) and the other is to use placement new with the
+// constructor and to call the destructor or free (they are equivalent).
+template<typename T>
+class CircularBuffer {
+ public:
+  // To support global instances we cannot initialize any field in the
+  // default constructor.
+  explicit CircularBuffer() {}
+  CircularBuffer(uptr BufferCapacity) {
+    initialize(BufferCapacity);
+    WasConstructed = true;
+  }
+  ~CircularBuffer() {
+    if (WasConstructed) // Else caller will call free() explicitly.
+      free();
+  }
+  void initialize(uptr BufferCapacity) {
+    Capacity = BufferCapacity;
+    // MmapOrDie rounds up to the page size for us.
+    Data = (T *)MmapOrDie(Capacity * sizeof(T), "CircularBuffer");
+    StartIdx = 0;
+    Count = 0;
+    WasConstructed = false;
+  }
+  void free() {
+    UnmapOrDie(Data, Capacity * sizeof(T));
+  }
+  T &operator[](uptr Idx) {
+    CHECK_LT(Idx, Count);
+    uptr ArrayIdx = (StartIdx + Idx) % Capacity;
+    return Data[ArrayIdx];
+  }
+  const T &operator[](uptr Idx) const {
+    CHECK_LT(Idx, Count);
+    uptr ArrayIdx = (StartIdx + Idx) % Capacity;
+    return Data[ArrayIdx];
+  }
+  void push_back(const T &Item) {
+    CHECK_GT(Capacity, 0);
+    uptr ArrayIdx = (StartIdx + Count) % Capacity;
+    Data[ArrayIdx] = Item;
+    if (Count < Capacity)
+      ++Count;
+    else
+      StartIdx = (StartIdx + 1) % Capacity;
+  }
+  T &back() {
+    CHECK_GT(Count, 0);
+    uptr ArrayIdx = (StartIdx + Count - 1) % Capacity;
+    return Data[ArrayIdx];
+  }
+  void pop_back() {
+    CHECK_GT(Count, 0);
+    --Count;
+  }
+  uptr size() const {
+    return Count;
+  }
+  void clear() {
+    StartIdx = 0;
+    Count = 0;
+  }
+  bool empty() const { return size() == 0; }
+
+ private:
+  CircularBuffer(const CircularBuffer&);
+  void operator=(const CircularBuffer&);
+
+  bool WasConstructed;
+  T *Data;
+  uptr Capacity;
+  uptr StartIdx;
+  uptr Count;
+};
+
+} // namespace __esan
diff --git a/lib/esan/esan_flags.cpp b/lib/esan/esan_flags.cpp
new file mode 100644
index 0000000000000..3b047e28be225
--- /dev/null
+++ b/lib/esan/esan_flags.cpp
@@ -0,0 +1,58 @@
+//===-- esan_flags.cc -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Esan flag parsing logic.
+//===----------------------------------------------------------------------===//
+
+#include "esan_flags.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_flags.h"
+
+namespace __esan {
+
+static const char EsanOptsEnv[] = "ESAN_OPTIONS";
+
+Flags EsanFlagsDontUseDirectly;
+
+void Flags::setDefaults() {
+#define ESAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "esan_flags.inc"
+#undef ESAN_FLAG
+}
+
+static void registerEsanFlags(FlagParser *Parser, Flags *F) {
+#define ESAN_FLAG(Type, Name, DefaultValue, Description) \
+  RegisterFlag(Parser, #Name, Description, &F->Name);
+#include "esan_flags.inc"
+#undef ESAN_FLAG
+}
+
+void initializeFlags() {
+  SetCommonFlagsDefaults();
+  Flags *F = getFlags();
+  F->setDefaults();
+
+  FlagParser Parser;
+  registerEsanFlags(&Parser, F);
+  RegisterCommonFlags(&Parser);
+  Parser.ParseString(GetEnv(EsanOptsEnv));
+
+  InitializeCommonFlags();
+  if (Verbosity())
+    ReportUnrecognizedFlags();
+  if (common_flags()->help)
+    Parser.PrintFlagDescriptions();
+
+  __sanitizer_set_report_path(common_flags()->log_path);
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan_flags.h b/lib/esan/esan_flags.h
new file mode 100644
index 0000000000000..c8f4ef5ab2b57
--- /dev/null
+++ b/lib/esan/esan_flags.h
@@ -0,0 +1,41 @@
+//===-- esan_flags.h --------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Esan runtime flags.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_FLAGS_H
+#define ESAN_FLAGS_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+
+namespace __esan {
+
+class Flags {
+public:
+#define ESAN_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "esan_flags.inc"
+#undef ESAN_FLAG
+
+  void setDefaults();
+};
+
+extern Flags EsanFlagsDontUseDirectly;
+inline Flags *getFlags() {
+  return &EsanFlagsDontUseDirectly;
+}
+
+void initializeFlags();
+
+} // namespace __esan
+
+#endif // ESAN_FLAGS_H
diff --git a/lib/esan/esan_flags.inc b/lib/esan/esan_flags.inc
new file mode 100644
index 0000000000000..5687caca29892
--- /dev/null
+++ b/lib/esan/esan_flags.inc
@@ -0,0 +1,56 @@
+//===-- esan_flags.inc ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Esan runtime flags.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_FLAG
+# error "Define ESAN_FLAG prior to including this file!"
+#endif
+
+// ESAN_FLAG(Type, Name, DefaultValue, Description)
+// See COMMON_FLAG in sanitizer_flags.inc for more details.
+
+//===----------------------------------------------------------------------===//
+// Cross-tool options
+//===----------------------------------------------------------------------===//
+
+ESAN_FLAG(int, cache_line_size, 64,
+          "The number of bytes in a cache line.  For the working-set tool, this "
+          "cannot be changed without also changing the compiler "
+          "instrumentation.")
+
+//===----------------------------------------------------------------------===//
+// Working set tool options
+//===----------------------------------------------------------------------===//
+
+ESAN_FLAG(bool, record_snapshots, true,
+          "Working set tool: whether to sample snapshots during a run.")
+
+// Typical profiling uses a 10ms timer.  Our snapshots take some work
+// to scan memory so we reduce to 20ms.
+// To disable samples, turn off record_snapshots.
+ESAN_FLAG(int, sample_freq, 20,
+          "Working set tool: sampling frequency in milliseconds.")
+
+// This controls the difference in frequency between each successive series
+// of snapshots.  There are 8 in total, with number 0 using sample_freq.
+// Number N samples number N-1 every (1 << snapshot_step) instance of N-1.
+ESAN_FLAG(int, snapshot_step, 2, "Working set tool: the log of the sampling "
+          "performed for the next-higher-frequency snapshot series.")
+
+//===----------------------------------------------------------------------===//
+// Cache Fragmentation tool options
+//===----------------------------------------------------------------------===//
+
+// The difference information of a struct is reported if the struct's difference
+// score is greater than the report_threshold.
+ESAN_FLAG(int, report_threshold, 1<<10, "Cache-frag tool: the struct difference"
+          " score threshold for reporting.")
diff --git a/lib/esan/esan_interceptors.cpp b/lib/esan/esan_interceptors.cpp
new file mode 100644
index 0000000000000..647f010852b09
--- /dev/null
+++ b/lib/esan/esan_interceptors.cpp
@@ -0,0 +1,547 @@
+//===-- esan_interceptors.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Interception routines for the esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "esan.h"
+#include "esan_shadow.h"
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+
+using namespace __esan; // NOLINT
+
+#define CUR_PC() (StackTrace::GetCurrentPc())
+
+//===----------------------------------------------------------------------===//
+// Interception via sanitizer common interceptors
+//===----------------------------------------------------------------------===//
+
+// Get the per-platform defines for what is possible to intercept
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+
+// TODO(bruening): tsan disables several interceptors (getpwent, etc.) claiming
+// that interception is a perf hit: should we do the same?
+
+// We have no need to intercept:
+#undef SANITIZER_INTERCEPT_TLS_GET_ADDR
+
+// TODO(bruening): the common realpath interceptor assumes malloc is
+// intercepted!  We should try to parametrize that, though we'll
+// intercept malloc soon ourselves and can then remove this undef.
+#undef SANITIZER_INTERCEPT_REALPATH
+
+// We provide our own version:
+#undef SANITIZER_INTERCEPT_SIGPROCMASK
+
+#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!EsanIsInitialized)
+
+#define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+  INTERCEPT_FUNCTION_VER(name, ver)
+
+// We must initialize during early interceptors, to support tcmalloc.
+// This means that for some apps we fully initialize prior to
+// __esan_init() being called.
+// We currently do not use ctx.
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...)                               \
+  do {                                                                         \
+    if (UNLIKELY(COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)) {                 \
+      if (!UNLIKELY(EsanDuringInit))                                           \
+        initializeLibrary(__esan_which_tool);                                  \
+      return REAL(func)(__VA_ARGS__);                                          \
+    }                                                                          \
+    ctx = nullptr;                                                             \
+    (void)ctx;                                                                 \
+  } while (false)
+
+#define COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, func, ...)                      \
+  COMMON_INTERCEPTOR_ENTER(ctx, func, __VA_ARGS__)
+
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                         \
+  processRangeAccess(CUR_PC(), (uptr)ptr, size, true)
+
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)                          \
+  processRangeAccess(CUR_PC(), (uptr)ptr, size, false)
+
+// This is only called if the app explicitly calls exit(), not on
+// a normal exit.
+#define COMMON_INTERCEPTOR_ON_EXIT(ctx) finalizeLibrary()
+
+#define COMMON_INTERCEPTOR_FILE_OPEN(ctx, file, path)                          \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(file);                                                              \
+    (void)(path);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file)                               \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(file);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)                    \
+  do {                                                                         \
+    (void)(filename);                                                          \
+    (void)(handle);                                                            \
+  } while (false)
+#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()                                  \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_ACQUIRE(ctx, u)                                     \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(u);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_RELEASE(ctx, u)                                     \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(u);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path)                              \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(path);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd)                                 \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd)                                 \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd)                                  \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, newfd)                    \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(fd);                                                                \
+    (void)(newfd);                                                             \
+  } while (false)
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name)                          \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(name);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)                 \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(thread);                                                            \
+    (void)(name);                                                              \
+  } while (false)
+#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
+#define COMMON_INTERCEPTOR_MUTEX_LOCK(ctx, m)                                  \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(m);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m)                                \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(m);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m)                                \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(m);                                                                 \
+  } while (false)
+#define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg)                            \
+  do {                                                                         \
+    (void)(ctx);                                                               \
+    (void)(msg);                                                               \
+  } while (false)
+#define COMMON_INTERCEPTOR_USER_CALLBACK_START()                               \
+  do {                                                                         \
+  } while (false)
+#define COMMON_INTERCEPTOR_USER_CALLBACK_END()                                 \
+  do {                                                                         \
+  } while (false)
+
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
+//===----------------------------------------------------------------------===//
+// Syscall interception
+//===----------------------------------------------------------------------===//
+
+// We want the caller's PC b/c unlike the other function interceptors these
+// are separate pre and post functions called around the app's syscall().
+
+#define COMMON_SYSCALL_PRE_READ_RANGE(ptr, size)                               \
+  processRangeAccess(GET_CALLER_PC(), (uptr)ptr, size, false)
+
+#define COMMON_SYSCALL_PRE_WRITE_RANGE(ptr, size)                              \
+  do {                                                                         \
+    (void)(ptr);                                                               \
+    (void)(size);                                                              \
+  } while (false)
+
+#define COMMON_SYSCALL_POST_READ_RANGE(ptr, size)                              \
+  do {                                                                         \
+    (void)(ptr);                                                               \
+    (void)(size);                                                              \
+  } while (false)
+
+// The actual amount written is in post, not pre.
+#define COMMON_SYSCALL_POST_WRITE_RANGE(ptr, size)                             \
+  processRangeAccess(GET_CALLER_PC(), (uptr)ptr, size, true)
+
+#define COMMON_SYSCALL_ACQUIRE(addr)                                           \
+  do {                                                                         \
+    (void)(addr);                                                              \
+  } while (false)
+#define COMMON_SYSCALL_RELEASE(addr)                                           \
+  do {                                                                         \
+    (void)(addr);                                                              \
+  } while (false)
+#define COMMON_SYSCALL_FD_CLOSE(fd)                                            \
+  do {                                                                         \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_SYSCALL_FD_ACQUIRE(fd)                                          \
+  do {                                                                         \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_SYSCALL_FD_RELEASE(fd)                                          \
+  do {                                                                         \
+    (void)(fd);                                                                \
+  } while (false)
+#define COMMON_SYSCALL_PRE_FORK()                                              \
+  do {                                                                         \
+  } while (false)
+#define COMMON_SYSCALL_POST_FORK(res)                                          \
+  do {                                                                         \
+    (void)(res);                                                               \
+  } while (false)
+
+#include "sanitizer_common/sanitizer_common_syscalls.inc"
+
+//===----------------------------------------------------------------------===//
+// Custom interceptors
+//===----------------------------------------------------------------------===//
+
+// TODO(bruening): move more of these to the common interception pool as they
+// are shared with tsan and asan.
+// While our other files match LLVM style, here we match sanitizer style as we
+// expect to move these to the common pool.
+
+INTERCEPTOR(char *, strcpy, char *dst, const char *src) { // NOLINT
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strcpy, dst, src);
+  uptr srclen = internal_strlen(src);
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, srclen + 1);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, src, srclen + 1);
+  return REAL(strcpy)(dst, src); // NOLINT
+}
+
+INTERCEPTOR(char *, strncpy, char *dst, char *src, uptr n) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, strncpy, dst, src, n);
+  uptr srclen = internal_strnlen(src, n);
+  uptr copied_size = srclen + 1 > n ? n : srclen + 1;
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, copied_size);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, src, copied_size);
+  return REAL(strncpy)(dst, src, n);
+}
+
+INTERCEPTOR(int, open, const char *name, int flags, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, open, name, flags, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(open)(name, flags, mode);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, open64, const char *name, int flags, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, open64, name, flags, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(open64)(name, flags, mode);
+}
+#define ESAN_MAYBE_INTERCEPT_OPEN64 INTERCEPT_FUNCTION(open64)
+#else
+#define ESAN_MAYBE_INTERCEPT_OPEN64
+#endif
+
+INTERCEPTOR(int, creat, const char *name, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, creat, name, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(creat)(name, mode);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, creat64, const char *name, int mode) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, creat64, name, mode);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
+  return REAL(creat64)(name, mode);
+}
+#define ESAN_MAYBE_INTERCEPT_CREAT64 INTERCEPT_FUNCTION(creat64)
+#else
+#define ESAN_MAYBE_INTERCEPT_CREAT64
+#endif
+
+INTERCEPTOR(int, unlink, char *path) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, unlink, path);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  return REAL(unlink)(path);
+}
+
+INTERCEPTOR(uptr, fread, void *ptr, uptr size, uptr nmemb, void *f) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, fread, ptr, size, nmemb, f);
+  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size * nmemb);
+  return REAL(fread)(ptr, size, nmemb, f);
+}
+
+INTERCEPTOR(uptr, fwrite, const void *p, uptr size, uptr nmemb, void *f) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, f);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, p, size * nmemb);
+  return REAL(fwrite)(p, size, nmemb, f);
+}
+
+INTERCEPTOR(int, puts, const char *s) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
+  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s));
+  return REAL(puts)(s);
+}
+
+INTERCEPTOR(int, rmdir, char *path) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, rmdir, path);
+  COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
+  return REAL(rmdir)(path);
+}
+
+//===----------------------------------------------------------------------===//
+// Shadow-related interceptors
+//===----------------------------------------------------------------------===//
+
+// These are candidates for sharing with all sanitizers if shadow memory
+// support is also standardized.
+
+INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags,
+                 int fd, OFF_T off) {
+  if (UNLIKELY(REAL(mmap) == nullptr)) {
+    // With esan init during interceptor init and a static libc preventing
+    // our early-calloc from triggering, we can end up here before our
+    // REAL pointer is set up.
+    return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
+  }
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
+  if (!fixMmapAddr(&addr, sz, flags))
+    return (void *)-1;
+  void *result = REAL(mmap)(addr, sz, prot, flags, fd, off);
+  return (void *)checkMmapResult((uptr)result, sz);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags,
+                 int fd, OFF64_T off) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
+  if (!fixMmapAddr(&addr, sz, flags))
+    return (void *)-1;
+  void *result = REAL(mmap64)(addr, sz, prot, flags, fd, off);
+  return (void *)checkMmapResult((uptr)result, sz);
+}
+#define ESAN_MAYBE_INTERCEPT_MMAP64 INTERCEPT_FUNCTION(mmap64)
+#else
+#define ESAN_MAYBE_INTERCEPT_MMAP64
+#endif
+
+//===----------------------------------------------------------------------===//
+// Signal-related interceptors
+//===----------------------------------------------------------------------===//
+
+#if SANITIZER_LINUX
+typedef void (*signal_handler_t)(int);
+INTERCEPTOR(signal_handler_t, signal, int signum, signal_handler_t handler) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, signal, signum, handler);
+  signal_handler_t result;
+  if (!processSignal(signum, handler, &result))
+    return result;
+  else
+    return REAL(signal)(signum, handler);
+}
+#define ESAN_MAYBE_INTERCEPT_SIGNAL INTERCEPT_FUNCTION(signal)
+#else
+#error Platform not supported
+#define ESAN_MAYBE_INTERCEPT_SIGNAL
+#endif
+
+#if SANITIZER_LINUX
+DECLARE_REAL(int, sigaction, int signum, const struct sigaction *act,
+             struct sigaction *oldact)
+INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act,
+            struct sigaction *oldact) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigaction, signum, act, oldact);
+  if (!processSigaction(signum, act, oldact))
+    return 0;
+  else
+    return REAL(sigaction)(signum, act, oldact);
+}
+
+// This is required to properly use internal_sigaction.
+namespace __sanitizer {
+int real_sigaction(int signum, const void *act, void *oldact) {
+  if (REAL(sigaction) == nullptr) {
+    // With an instrumented allocator, this is called during interceptor init
+    // and we need a raw syscall solution.
+    return internal_sigaction_syscall(signum, act, oldact);
+  }
+  return REAL(sigaction)(signum, (const struct sigaction *)act,
+                         (struct sigaction *)oldact);
+}
+} // namespace __sanitizer
+
+#define ESAN_MAYBE_INTERCEPT_SIGACTION INTERCEPT_FUNCTION(sigaction)
+#else
+#error Platform not supported
+#define ESAN_MAYBE_INTERCEPT_SIGACTION
+#endif
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, sigprocmask, int how, __sanitizer_sigset_t *set,
+            __sanitizer_sigset_t *oldset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigprocmask, how, set, oldset);
+  int res = 0;
+  if (processSigprocmask(how, set, oldset))
+    res = REAL(sigprocmask)(how, set, oldset);
+  if (!res && oldset)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
+  return res;
+}
+#define ESAN_MAYBE_INTERCEPT_SIGPROCMASK INTERCEPT_FUNCTION(sigprocmask)
+#else
+#define ESAN_MAYBE_INTERCEPT_SIGPROCMASK
+#endif
+
+#if !SANITIZER_WINDOWS
+INTERCEPTOR(int, pthread_sigmask, int how, __sanitizer_sigset_t *set,
+            __sanitizer_sigset_t *oldset) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, pthread_sigmask, how, set, oldset);
+  int res = 0;
+  if (processSigprocmask(how, set, oldset))
+    res = REAL(sigprocmask)(how, set, oldset);
+  if (!res && oldset)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
+  return res;
+}
+#define ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK INTERCEPT_FUNCTION(pthread_sigmask)
+#else
+#define ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK
+#endif
+
+//===----------------------------------------------------------------------===//
+// Malloc interceptors
+//===----------------------------------------------------------------------===//
+
+static char early_alloc_buf[128];
+static bool used_early_alloc_buf;
+
+static void *handleEarlyAlloc(uptr size) {
+  // If esan is initialized during an interceptor (which happens with some
+  // tcmalloc implementations that call pthread_mutex_lock), the call from
+  // dlsym to calloc will deadlock.  There is only one such calloc (dlsym
+  // allocates a single pthread key), so we work around it by using a
+  // static buffer for the calloc request.  The loader currently needs
+  // 32 bytes but we size at 128 to allow for future changes.
+  // This solution will also allow us to deliberately intercept malloc & family
+  // in the future (to perform tool actions on each allocation, without
+  // replacing the allocator), as it also solves the problem of intercepting
+  // calloc when it will itself be called before its REAL pointer is
+  // initialized.
+  CHECK(!used_early_alloc_buf && size < sizeof(early_alloc_buf));
+  // We do not handle multiple threads here.  This only happens at process init
+  // time, and while it's possible for a shared library to create early threads
+  // that race here, we consider that to be a corner case extreme enough that
+  // it's not worth the effort to handle.
+  used_early_alloc_buf = true;
+  return (void *)early_alloc_buf;
+}
+
+INTERCEPTOR(void*, calloc, uptr size, uptr n) {
+  if (EsanDuringInit && REAL(calloc) == nullptr)
+    return handleEarlyAlloc(size * n);
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, calloc, size, n);
+  void *res = REAL(calloc)(size, n);
+  // The memory is zeroed and thus is all written.
+  COMMON_INTERCEPTOR_WRITE_RANGE(nullptr, (uptr)res, size * n);
+  return res;
+}
+
+INTERCEPTOR(void, free, void *p) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, free, p);
+  if (p == (void *)early_alloc_buf) {
+    // We expect just a singleton use but we clear this for cleanliness.
+    used_early_alloc_buf = false;
+    return;
+  }
+  REAL(free)(p);
+}
+
+namespace __esan {
+
+void initializeInterceptors() {
+  InitializeCommonInterceptors();
+
+  INTERCEPT_FUNCTION(strcpy); // NOLINT
+  INTERCEPT_FUNCTION(strncpy);
+
+  INTERCEPT_FUNCTION(open);
+  ESAN_MAYBE_INTERCEPT_OPEN64;
+  INTERCEPT_FUNCTION(creat);
+  ESAN_MAYBE_INTERCEPT_CREAT64;
+  INTERCEPT_FUNCTION(unlink);
+  INTERCEPT_FUNCTION(fread);
+  INTERCEPT_FUNCTION(fwrite);
+  INTERCEPT_FUNCTION(puts);
+  INTERCEPT_FUNCTION(rmdir);
+
+  INTERCEPT_FUNCTION(mmap);
+  ESAN_MAYBE_INTERCEPT_MMAP64;
+
+  ESAN_MAYBE_INTERCEPT_SIGNAL;
+  ESAN_MAYBE_INTERCEPT_SIGACTION;
+  ESAN_MAYBE_INTERCEPT_SIGPROCMASK;
+  ESAN_MAYBE_INTERCEPT_PTHREAD_SIGMASK;
+
+  INTERCEPT_FUNCTION(calloc);
+  INTERCEPT_FUNCTION(free);
+
+  // TODO(bruening): intercept routines that other sanitizers intercept that
+  // are not in the common pool or here yet, ideally by adding to the common
+  // pool.  Examples include wcslen and bcopy.
+
+  // TODO(bruening): there are many more libc routines that read or write data
+  // structures that no sanitizer is intercepting: sigaction, strtol, etc.
+}
+
+} // namespace __esan
diff --git a/lib/esan/esan_interface.cpp b/lib/esan/esan_interface.cpp
new file mode 100644
index 0000000000000..43b3dff86f778
--- /dev/null
+++ b/lib/esan/esan_interface.cpp
@@ -0,0 +1,122 @@
+//===-- esan_interface.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+//===----------------------------------------------------------------------===//
+
+#include "esan_interface_internal.h"
+#include "esan.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+using namespace __esan; // NOLINT
+
+void __esan_init(ToolType Tool, void *Ptr) {
+  if (Tool != __esan_which_tool) {
+    Printf("ERROR: tool mismatch: %d vs %d\n", Tool, __esan_which_tool);
+    Die();
+  }
+  initializeLibrary(Tool);
+  processCompilationUnitInit(Ptr);
+}
+
+void __esan_exit(void *Ptr) {
+  processCompilationUnitExit(Ptr);
+}
+
+void __esan_aligned_load1(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 1, false);
+}
+
+void __esan_aligned_load2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, false);
+}
+
+void __esan_aligned_load4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, false);
+}
+
+void __esan_aligned_load8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, false);
+}
+
+void __esan_aligned_load16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, false);
+}
+
+void __esan_aligned_store1(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 1, true);
+}
+
+void __esan_aligned_store2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, true);
+}
+
+void __esan_aligned_store4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, true);
+}
+
+void __esan_aligned_store8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, true);
+}
+
+void __esan_aligned_store16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, true);
+}
+
+void __esan_unaligned_load2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, false);
+}
+
+void __esan_unaligned_load4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, false);
+}
+
+void __esan_unaligned_load8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, false);
+}
+
+void __esan_unaligned_load16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, false);
+}
+
+void __esan_unaligned_store2(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 2, true);
+}
+
+void __esan_unaligned_store4(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 4, true);
+}
+
+void __esan_unaligned_store8(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 8, true);
+}
+
+void __esan_unaligned_store16(void *Addr) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, 16, true);
+}
+
+void __esan_unaligned_loadN(void *Addr, uptr Size) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, Size, false);
+}
+
+void __esan_unaligned_storeN(void *Addr, uptr Size) {
+  processRangeAccess(GET_CALLER_PC(), (uptr)Addr, Size, true);
+}
+
+// Public interface:
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_report() {
+  reportResults();
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE unsigned int __esan_get_sample_count() {
+  return getSampleCount();
+}
+} // extern "C"
diff --git a/lib/esan/esan_interface_internal.h b/lib/esan/esan_interface_internal.h
new file mode 100644
index 0000000000000..3b915d03e07a8
--- /dev/null
+++ b/lib/esan/esan_interface_internal.h
@@ -0,0 +1,80 @@
+//===-- esan_interface_internal.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Calls to the functions declared in this header will be inserted by
+// the instrumentation module.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_INTERFACE_INTERNAL_H
+#define ESAN_INTERFACE_INTERNAL_H
+
+#include <sanitizer_common/sanitizer_internal_defs.h>
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __esan_.
+
+extern "C" {
+
+// This should be kept consistent with LLVM's EfficiencySanitizerOptions.
+// The value is passed as a 32-bit integer by the compiler.
+typedef enum Type : u32 {
+  ESAN_None = 0,
+  ESAN_CacheFrag,
+  ESAN_WorkingSet,
+  ESAN_Max,
+} ToolType;
+
+// To handle interceptors that invoke instrumented code prior to
+// __esan_init() being called, the instrumentation module creates this
+// global variable specifying the tool.
+extern ToolType __esan_which_tool;
+
+// This function should be called at the very beginning of the process,
+// before any instrumented code is executed and before any call to malloc.
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_init(ToolType Tool, void *Ptr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_exit(void *Ptr);
+
+// The instrumentation module will insert a call to one of these routines prior
+// to each load and store instruction for which we do not have "fastpath"
+// inlined instrumentation.  These calls constitute the "slowpath" for our
+// tools.  We have separate routines for each type of memory access to enable
+// targeted optimization.
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load1(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_load16(void *Addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store1(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_aligned_store16(void *Addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_load16(void *Addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store2(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store4(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store8(void *Addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __esan_unaligned_store16(void *Addr);
+
+// These cover unusually-sized accesses.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __esan_unaligned_loadN(void *Addr, uptr Size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __esan_unaligned_storeN(void *Addr, uptr Size);
+
+} // extern "C"
+
+#endif // ESAN_INTERFACE_INTERNAL_H
diff --git a/lib/esan/esan_linux.cpp b/lib/esan/esan_linux.cpp
new file mode 100644
index 0000000000000..aa961b66116bd
--- /dev/null
+++ b/lib/esan/esan_linux.cpp
@@ -0,0 +1,83 @@
+//===-- esan.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Linux-specific code for the Esan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_FREEBSD || SANITIZER_LINUX
+
+#include "esan.h"
+#include "esan_shadow.h"
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include <sys/mman.h>
+#include <errno.h>
+
+namespace __esan {
+
+void verifyAddressSpace() {
+#if SANITIZER_LINUX && defined(__x86_64__)
+  // The kernel determines its mmap base from the stack size limit.
+  // Our Linux 64-bit shadow mapping assumes the stack limit is less than a
+  // terabyte, which keeps the mmap region above 0x7e00'.
+  uptr StackLimit = GetStackSizeLimitInBytes();
+  if (StackSizeIsUnlimited() || StackLimit > MaxStackSize) {
+    VReport(1, "The stack size limit is beyond the maximum supported.\n"
+            "Re-execing with a stack size below 1TB.\n");
+    SetStackSizeLimitInBytes(MaxStackSize);
+    ReExec();
+  }
+#endif
+}
+
+static bool liesWithinSingleAppRegion(uptr Start, SIZE_T Size) {
+  uptr AppStart, AppEnd;
+  for (int i = 0; getAppRegion(i, &AppStart, &AppEnd); ++i) {
+    if (Start >= AppStart && Start + Size - 1 <= AppEnd) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool fixMmapAddr(void **Addr, SIZE_T Size, int Flags) {
+  if (*Addr) {
+    if (!liesWithinSingleAppRegion((uptr)*Addr, Size)) {
+      VPrintf(1, "mmap conflict: [%p-%p) is not in an app region\n",
+              *Addr, (uptr)*Addr + Size);
+      if (Flags & MAP_FIXED) {
+        errno = EINVAL;
+        return false;
+      } else {
+        *Addr = 0;
+      }
+    }
+  }
+  return true;
+}
+
+uptr checkMmapResult(uptr Addr, SIZE_T Size) {
+  if ((void *)Addr == MAP_FAILED)
+    return Addr;
+  if (!liesWithinSingleAppRegion(Addr, Size)) {
+    // FIXME: attempt to dynamically add this as an app region if it
+    // fits our shadow criteria.
+    // We could also try to remap somewhere else.
+    Printf("ERROR: unsupported mapping at [%p-%p)\n", Addr, Addr+Size);
+    Die();
+  }
+  return Addr;
+}
+
+} // namespace __esan
+
+#endif // SANITIZER_FREEBSD || SANITIZER_LINUX
diff --git a/lib/esan/esan_shadow.h b/lib/esan/esan_shadow.h
new file mode 100644
index 0000000000000..f8f154ef7cca5
--- /dev/null
+++ b/lib/esan/esan_shadow.h
@@ -0,0 +1,203 @@
+//===-- esan_shadow.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Shadow memory mappings for the esan run-time.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_SHADOW_H
+#define ESAN_SHADOW_H
+
+#include <sanitizer_common/sanitizer_platform.h>
+
+#if SANITIZER_WORDSIZE != 64
+#error Only 64-bit is supported
+#endif
+
+namespace __esan {
+
+#if SANITIZER_LINUX && defined(__x86_64__)
+// Linux x86_64
+//
+// Application memory falls into these 5 regions (ignoring the corner case
+// of PIE with a non-zero PT_LOAD base):
+//
+// [0x00000000'00000000, 0x00000100'00000000) non-PIE + heap
+// [0x00005500'00000000, 0x00005700'00000000) PIE
+// [0x00007e00'00000000, 0x00007fff'ff600000) libraries + stack, part 1
+// [0x00007fff'ff601000, 0x00008000'00000000) libraries + stack, part 2
+// [0xffffffff'ff600000, 0xffffffff'ff601000) vsyscall
+//
+// Although we can ignore the vsyscall for the most part as there are few data
+// references there (other sanitizers ignore it), we enforce a gap inside the
+// library region to distinguish the vsyscall's shadow, considering this gap to
+// be an invalid app region.
+// We disallow application memory outside of those 5 regions.
+// Our regions assume that the stack rlimit is less than a terabyte (otherwise
+// the Linux kernel's default mmap region drops below 0x7e00'), which we enforce
+// at init time (we can support larger and unlimited sizes for shadow
+// scaledowns, but it is difficult for 1:1 mappings).
+//
+// Our shadow memory is scaled from a 1:1 mapping and supports a scale
+// specified at library initialization time that can be any power-of-2
+// scaledown (1x, 2x, 4x, 8x, 16x, etc.).
+//
+// We model our shadow memory after Umbra, a library used by the Dr. Memory
+// tool: https://github.com/DynamoRIO/drmemory/blob/master/umbra/umbra_x64.c.
+// We use Umbra's scheme as it was designed to support different
+// offsets, it supports two different shadow mappings (which we may want to
+// use for future tools), and it ensures that the shadow of a shadow will
+// not overlap either shadow memory or application memory.
+//
+// This formula translates from application memory to shadow memory:
+//
+//   shadow(app) = ((app & 0x00000fff'ffffffff) + offset) >> scale
+//
+// Where the offset for 1:1 is 0x00001300'00000000.  For other scales, the
+// offset is shifted left by the scale, except for scales of 1 and 2 where
+// it must be tweaked in order to pass the double-shadow test
+// (see the "shadow(shadow)" comments below):
+//   scale == 0: 0x00001300'000000000
+//   scale == 1: 0x00002200'000000000
+//   scale == 2: 0x00004400'000000000
+//   scale >= 3: (0x00001300'000000000 << scale)
+//
+// Do not pass in the open-ended end value to the formula as it will fail.
+//
+// The resulting shadow memory regions for a 0 scaling are:
+//
+// [0x00001300'00000000, 0x00001400'00000000)
+// [0x00001800'00000000, 0x00001a00'00000000)
+// [0x00002100'00000000, 0x000022ff'ff600000)
+// [0x000022ff'ff601000, 0x00002300'00000000)
+// [0x000022ff'ff600000, 0x000022ff'ff601000]
+//
+// We also want to ensure that a wild access by the application into the shadow
+// regions will not corrupt our own shadow memory.  shadow(shadow) ends up
+// disjoint from shadow(app):
+//
+// [0x00001600'00000000, 0x00001700'00000000)
+// [0x00001b00'00000000, 0x00001d00'00000000)
+// [0x00001400'00000000, 0x000015ff'ff600000]
+// [0x000015ff'ff601000, 0x00001600'00000000]
+// [0x000015ff'ff600000, 0x000015ff'ff601000]
+
+struct ApplicationRegion {
+  uptr Start;
+  uptr End;
+  bool ShadowMergedWithPrev;
+};
+
+static const struct ApplicationRegion AppRegions[] = {
+  {0x0000000000000000ull, 0x0000010000000000u, false},
+  {0x0000550000000000u,   0x0000570000000000u, false},
+  // We make one shadow mapping to hold the shadow regions for all 3 of these
+  // app regions, as the mappings interleave, and the gap between the 3rd and
+  // 4th scales down below a page.
+  {0x00007e0000000000u,   0x00007fffff600000u, false},
+  {0x00007fffff601000u,   0x0000800000000000u, true},
+  {0xffffffffff600000u,   0xffffffffff601000u, true},
+};
+static const u32 NumAppRegions = sizeof(AppRegions)/sizeof(AppRegions[0]);
+
+// See the comment above: we do not currently support a stack size rlimit
+// equal to or larger than 1TB.
+static const uptr MaxStackSize = (1ULL << 40) - 4096;
+
+class ShadowMapping {
+public:
+  static const uptr Mask = 0x00000fffffffffffu;
+  // The scale and offset vary by tool.
+  uptr Scale;
+  uptr Offset;
+  void initialize(uptr ShadowScale) {
+    static const uptr OffsetArray[3] = {
+        0x0000130000000000u,
+        0x0000220000000000u,
+        0x0000440000000000u,
+    };
+    Scale = ShadowScale;
+    if (Scale <= 2)
+      Offset = OffsetArray[Scale];
+    else
+      Offset = OffsetArray[0] << Scale;
+  }
+};
+extern ShadowMapping Mapping;
+#else
+// We'll want to use templatized functions over the ShadowMapping once
+// we support more platforms.
+#error Platform not supported
+#endif
+
+static inline bool getAppRegion(u32 i, uptr *Start, uptr *End) {
+  if (i >= NumAppRegions)
+    return false;
+  *Start = AppRegions[i].Start;
+  *End = AppRegions[i].End;
+  return true;
+}
+
+ALWAYS_INLINE
+bool isAppMem(uptr Mem) {
+  for (u32 i = 0; i < NumAppRegions; ++i) {
+    if (Mem >= AppRegions[i].Start && Mem < AppRegions[i].End)
+      return true;
+  }
+  return false;
+}
+
+ALWAYS_INLINE
+uptr appToShadow(uptr App) {
+  return (((App & ShadowMapping::Mask) + Mapping.Offset) >> Mapping.Scale);
+}
+
+static inline bool getShadowRegion(u32 i, uptr *Start, uptr *End) {
+  if (i >= NumAppRegions)
+    return false;
+  u32 UnmergedShadowCount = 0;
+  u32 AppIdx;
+  for (AppIdx = 0; AppIdx < NumAppRegions; ++AppIdx) {
+    if (!AppRegions[AppIdx].ShadowMergedWithPrev) {
+      if (UnmergedShadowCount == i)
+        break;
+      UnmergedShadowCount++;
+    }
+  }
+  if (AppIdx >= NumAppRegions || UnmergedShadowCount != i)
+    return false;
+  *Start = appToShadow(AppRegions[AppIdx].Start);
+  // The formula fails for the end itself.
+  *End = appToShadow(AppRegions[AppIdx].End - 1) + 1;
+  // Merge with adjacent shadow regions:
+  for (++AppIdx; AppIdx < NumAppRegions; ++AppIdx) {
+    if (!AppRegions[AppIdx].ShadowMergedWithPrev)
+      break;
+    *Start = Min(*Start, appToShadow(AppRegions[AppIdx].Start));
+    *End = Max(*End, appToShadow(AppRegions[AppIdx].End - 1) + 1);
+  }
+  return true;
+}
+
+ALWAYS_INLINE
+bool isShadowMem(uptr Mem) {
+  // We assume this is not used on any critical performance path and so there's
+  // no need to hardcode the mapping results.
+  for (uptr i = 0; i < NumAppRegions; ++i) {
+    if (Mem >= appToShadow(AppRegions[i].Start) &&
+        Mem < appToShadow(AppRegions[i].End - 1) + 1)
+      return true;
+  }
+  return false;
+}
+
+} // namespace __esan
+
+#endif /* ESAN_SHADOW_H */
diff --git a/lib/esan/esan_sideline.h b/lib/esan/esan_sideline.h
new file mode 100644
index 0000000000000..aa3fae1db3183
--- /dev/null
+++ b/lib/esan/esan_sideline.h
@@ -0,0 +1,61 @@
+//===-- esan_sideline.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Esan sideline thread support.
+//===----------------------------------------------------------------------===//
+
+#ifndef ESAN_SIDELINE_H
+#define ESAN_SIDELINE_H
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __esan {
+
+typedef void (*SidelineFunc)(void *Arg);
+
+// Currently only one sideline thread is supported.
+// It calls the SidelineFunc passed to launchThread once on each sample at the
+// given frequency in real time (i.e., wall clock time).
+class SidelineThread {
+public:
+  // We cannot initialize any fields in the constructor as it will be called
+  // *after* launchThread for a static instance, as esan.module_ctor is called
+  // before static initializers.
+  SidelineThread() {}
+  ~SidelineThread() {}
+
+  // To simplify declaration in sanitizer code where we want to avoid
+  // heap allocations, the constructor and destructor do nothing and
+  // launchThread and joinThread do the real work.
+  // They should each be called just once.
+  bool launchThread(SidelineFunc takeSample, void *Arg, u32 FreqMilliSec);
+  bool joinThread();
+
+  // Must be called from the sideline thread itself.
+  bool adjustTimer(u32 FreqMilliSec);
+
+private:
+  static int runSideline(void *Arg);
+  static void registerSignal(int SigNum);
+  static void handleSidelineSignal(int SigNum, void *SigInfo, void *Ctx);
+
+  char *Stack;
+  SidelineFunc sampleFunc;
+  void *FuncArg;
+  u32 Freq;
+  uptr SidelineId;
+  atomic_uintptr_t SidelineExit;
+};
+
+} // namespace __esan
+
+#endif  // ESAN_SIDELINE_H
diff --git a/lib/esan/esan_sideline_linux.cpp b/lib/esan/esan_sideline_linux.cpp
new file mode 100644
index 0000000000000..d04f5909d6a27
--- /dev/null
+++ b/lib/esan/esan_sideline_linux.cpp
@@ -0,0 +1,177 @@
+//===-- esan_sideline_linux.cpp ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Support for a separate or "sideline" tool thread on Linux.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_LINUX
+
+#include "esan_sideline.h"
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include <errno.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/signal.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+namespace __esan {
+
+static const int SigAltStackSize = 4*1024;
+static const int SidelineStackSize = 4*1024;
+static const uptr SidelineIdUninitialized = 1;
+
+// FIXME: we'll need some kind of TLS (can we trust that a pthread key will
+// work in our non-POSIX thread?) to access our data in our signal handler
+// with multiple sideline threads.  For now we assume there is only one
+// sideline thread and we use a dirty solution of a global var.
+static SidelineThread *TheThread;
+
+// We aren't passing SA_NODEFER so the same signal is blocked while here.
+void SidelineThread::handleSidelineSignal(int SigNum, void *SigInfo,
+                                          void *Ctx) {
+  VPrintf(3, "Sideline signal %d\n", SigNum);
+  CHECK_EQ(SigNum, SIGALRM);
+  // See above about needing TLS to avoid this global var.
+  SidelineThread *Thread = TheThread;
+  if (atomic_load(&Thread->SidelineExit, memory_order_relaxed) != 0)
+    return;
+  Thread->sampleFunc(Thread->FuncArg);
+}
+
+void SidelineThread::registerSignal(int SigNum) {
+  __sanitizer_sigaction SigAct;
+  internal_memset(&SigAct, 0, sizeof(SigAct));
+  SigAct.sigaction = handleSidelineSignal;
+  // We do not pass SA_NODEFER as we want to block the same signal.
+  SigAct.sa_flags = SA_ONSTACK | SA_SIGINFO;
+  int Res = internal_sigaction(SigNum, &SigAct, nullptr);
+  CHECK_EQ(Res, 0);
+}
+
+int SidelineThread::runSideline(void *Arg) {
+  VPrintf(1, "Sideline thread starting\n");
+  SidelineThread *Thread = static_cast<SidelineThread*>(Arg);
+
+  // If the parent dies, we want to exit also.
+  internal_prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+  // Set up a signal handler on an alternate stack for safety.
+  InternalScopedBuffer<char> StackMap(SigAltStackSize);
+  struct sigaltstack SigAltStack;
+  SigAltStack.ss_sp = StackMap.data();
+  SigAltStack.ss_size = SigAltStackSize;
+  SigAltStack.ss_flags = 0;
+  internal_sigaltstack(&SigAltStack, nullptr);
+
+  // We inherit the signal mask from the app thread.  In case
+  // we weren't created at init time, we ensure the mask is empty.
+  __sanitizer_sigset_t SigSet;
+  internal_sigfillset(&SigSet);
+  int Res = internal_sigprocmask(SIG_UNBLOCK, &SigSet, nullptr);
+  CHECK_EQ(Res, 0);
+
+  registerSignal(SIGALRM);
+
+  bool TimerSuccess = Thread->adjustTimer(Thread->Freq);
+  CHECK(TimerSuccess);
+
+  // We loop, doing nothing but handling itimer signals.
+  while (atomic_load(&TheThread->SidelineExit, memory_order_relaxed) == 0)
+    sched_yield();
+
+  if (!Thread->adjustTimer(0))
+    VPrintf(1, "Failed to disable timer\n");
+
+  VPrintf(1, "Sideline thread exiting\n");
+  return 0;
+}
+
+bool SidelineThread::launchThread(SidelineFunc takeSample, void *Arg,
+                                  u32 FreqMilliSec) {
+  // This can only be called once.  However, we can't clear a field in
+  // the constructor and check for that here as the constructor for
+  // a static instance is called *after* our module_ctor and thus after
+  // this routine!  Thus we rely on the TheThread check below.
+  CHECK(TheThread == nullptr); // Only one sideline thread is supported.
+  TheThread = this;
+  sampleFunc = takeSample;
+  FuncArg = Arg;
+  Freq = FreqMilliSec;
+  atomic_store(&SidelineExit, 0, memory_order_relaxed);
+
+  // We do without a guard page.
+  Stack = static_cast<char*>(MmapOrDie(SidelineStackSize, "SidelineStack"));
+  // We need to handle the return value from internal_clone() not having been
+  // assigned yet (for our CHECK in adjustTimer()) so we ensure this has a
+  // sentinel value.
+  SidelineId = SidelineIdUninitialized;
+  // By omitting CLONE_THREAD, the child is in its own thread group and will not
+  // receive any of the application's signals.
+  SidelineId = internal_clone(
+      runSideline, Stack + SidelineStackSize,
+      CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_UNTRACED,
+      this, nullptr /* parent_tidptr */,
+      nullptr /* newtls */, nullptr /* child_tidptr */);
+  int ErrCode;
+  if (internal_iserror(SidelineId, &ErrCode)) {
+    Printf("FATAL: EfficiencySanitizer failed to spawn a thread (code %d).\n",
+           ErrCode);
+    Die();
+    return false; // Not reached.
+  }
+  return true;
+}
+
+bool SidelineThread::joinThread() {
+  VPrintf(1, "Joining sideline thread\n");
+  bool Res = true;
+  atomic_store(&SidelineExit, 1, memory_order_relaxed);
+  while (true) {
+    uptr Status = internal_waitpid(SidelineId, nullptr, __WALL);
+    int ErrCode;
+    if (!internal_iserror(Status, &ErrCode))
+      break;
+    if (ErrCode == EINTR)
+      continue;
+    VPrintf(1, "Failed to join sideline thread (errno %d)\n", ErrCode);
+    Res = false;
+    break;
+  }
+  UnmapOrDie(Stack, SidelineStackSize);
+  return Res;
+}
+
+// Must be called from the sideline thread itself.
+bool SidelineThread::adjustTimer(u32 FreqMilliSec) {
+  // The return value of internal_clone() may not have been assigned yet:
+  CHECK(internal_getpid() == SidelineId ||
+        SidelineId == SidelineIdUninitialized);
+  Freq = FreqMilliSec;
+  struct itimerval TimerVal;
+  TimerVal.it_interval.tv_sec = (time_t) Freq / 1000;
+  TimerVal.it_interval.tv_usec = (time_t) (Freq % 1000) * 1000;
+  TimerVal.it_value.tv_sec = (time_t) Freq / 1000;
+  TimerVal.it_value.tv_usec = (time_t) (Freq % 1000) * 1000;
+  // As we're in a different thread group, we cannot use either
+  // ITIMER_PROF or ITIMER_VIRTUAL without taking up scheduled
+  // time ourselves: thus we must use real time.
+  int Res = setitimer(ITIMER_REAL, &TimerVal, nullptr);
+  return (Res == 0);
+}
+
+} // namespace __esan
+
+#endif // SANITIZER_LINUX
diff --git a/lib/esan/working_set.cpp b/lib/esan/working_set.cpp
new file mode 100644
index 0000000000000..f39111993c337
--- /dev/null
+++ b/lib/esan/working_set.cpp
@@ -0,0 +1,279 @@
+//===-- working_set.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// This file contains working-set-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "working_set.h"
+#include "esan.h"
+#include "esan_circular_buffer.h"
+#include "esan_flags.h"
+#include "esan_shadow.h"
+#include "esan_sideline.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+// We shadow every cache line of app memory with one shadow byte.
+// - The highest bit of each shadow byte indicates whether the corresponding
+//   cache line has ever been accessed.
+// - The lowest bit of each shadow byte indicates whether the corresponding
+//   cache line was accessed since the last sample.
+// - The other bits are used for working set snapshots at successively
+//   lower frequencies, each bit to the left from the lowest bit stepping
+//   down the frequency by 2 to the power of getFlags()->snapshot_step.
+// Thus we have something like this:
+//   Bit 0: Since last sample
+//   Bit 1: Since last 2^2 samples
+//   Bit 2: Since last 2^4 samples
+//   Bit 3: ...
+//   Bit 7: Ever accessed.
+// We live with races in accessing each shadow byte.
+typedef unsigned char byte;
+
+namespace __esan {
+
+// Our shadow memory assumes that the line size is 64.
+static const u32 CacheLineSize = 64;
+
+// See the shadow byte layout description above.
+static const u32 TotalWorkingSetBitIdx = 7;
+// We accumulate to the left until we hit this bit.
+// We don't need to accumulate to the final bit as it's set on each ref
+// by the compiler instrumentation.
+static const u32 MaxAccumBitIdx = 6;
+static const u32 CurWorkingSetBitIdx = 0;
+static const byte ShadowAccessedVal =
+  (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx);
+
+static SidelineThread Thread;
+// If we use real-time-based timer samples this won't overflow in any realistic
+// scenario, but if we switch to some other unit (such as memory accesses) we
+// may want to consider a 64-bit int.
+static u32 SnapshotNum;
+
+// We store the wset size for each of 8 different sampling frequencies.
+static const u32 NumFreq = 8; // One for each bit of our shadow bytes.
+// We cannot use static objects as the global destructor is called
+// prior to our finalize routine.
+// These are each circular buffers, sized up front.
+CircularBuffer<u32> SizePerFreq[NumFreq];
+// We cannot rely on static initializers (they may run too late) but
+// we record the size here for clarity:
+u32 CircularBufferSizes[NumFreq] = {
+  // These are each mmap-ed so our minimum is one page.
+  32*1024,
+  16*1024,
+  8*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+};
+
+void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
+                                  bool IsWrite) {
+  if (Size == 0)
+    return;
+  SIZE_T I = 0;
+  uptr LineSize = getFlags()->cache_line_size;
+  // As Addr+Size could overflow at the top of a 32-bit address space,
+  // we avoid the simpler formula that rounds the start and end.
+  SIZE_T NumLines = Size / LineSize +
+    // Add any extra at the start or end adding on an extra line:
+    (LineSize - 1 + Addr % LineSize + Size % LineSize) / LineSize;
+  byte *Shadow = (byte *)appToShadow(Addr);
+  // Write shadow bytes until we're word-aligned.
+  while (I < NumLines && (uptr)Shadow % 4 != 0) {
+    if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
+      *Shadow |= ShadowAccessedVal;
+    ++Shadow;
+    ++I;
+  }
+  // Write whole shadow words at a time.
+  // Using a word-stride loop improves the runtime of a microbenchmark of
+  // memset calls by 10%.
+  u32 WordValue = ShadowAccessedVal | ShadowAccessedVal << 8 |
+    ShadowAccessedVal << 16 | ShadowAccessedVal << 24;
+  while (I + 4 <= NumLines) {
+    if ((*(u32*)Shadow & WordValue) != WordValue)
+      *(u32*)Shadow |= WordValue;
+    Shadow += 4;
+    I += 4;
+  }
+  // Write any trailing shadow bytes.
+  while (I < NumLines) {
+    if ((*Shadow & ShadowAccessedVal) != ShadowAccessedVal)
+      *Shadow |= ShadowAccessedVal;
+    ++Shadow;
+    ++I;
+  }
+}
+
+// This routine will word-align ShadowStart and ShadowEnd prior to scanning.
+// It does *not* clear for BitIdx==TotalWorkingSetBitIdx, as that top bit
+// measures the access during the entire execution and should never be cleared.
+static u32 countAndClearShadowValues(u32 BitIdx, uptr ShadowStart,
+                                     uptr ShadowEnd) {
+  u32 WorkingSetSize = 0;
+  u32 ByteValue = 0x1 << BitIdx;
+  u32 WordValue = ByteValue | ByteValue << 8 | ByteValue << 16 |
+    ByteValue << 24;
+  // Get word aligned start.
+  ShadowStart = RoundDownTo(ShadowStart, sizeof(u32));
+  bool Accum = getFlags()->record_snapshots && BitIdx < MaxAccumBitIdx;
+  // Do not clear the bit that measures access during the entire execution.
+  bool Clear = BitIdx < TotalWorkingSetBitIdx;
+  for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {
+    if ((*Ptr & WordValue) != 0) {
+      byte *BytePtr = (byte *)Ptr;
+      for (u32 j = 0; j < sizeof(u32); ++j) {
+        if (BytePtr[j] & ByteValue) {
+          ++WorkingSetSize;
+          if (Accum) {
+            // Accumulate to the lower-frequency bit to the left.
+            BytePtr[j] |= (ByteValue << 1);
+          }
+        }
+      }
+      if (Clear) {
+        // Clear this bit from every shadow byte.
+        *Ptr &= ~WordValue;
+      }
+    }
+  }
+  return WorkingSetSize;
+}
+
+// Scan shadow memory to calculate the number of cache lines being accessed,
+// i.e., the number of non-zero bits indexed by BitIdx in each shadow byte.
+// We also clear the lowest bits (most recent working set snapshot).
+// We do *not* clear for BitIdx==TotalWorkingSetBitIdx, as that top bit
+// measures the access during the entire execution and should never be cleared.
+static u32 computeWorkingSizeAndReset(u32 BitIdx) {
+  u32 WorkingSetSize = 0;
+  MemoryMappingLayout MemIter(true/*cache*/);
+  uptr Start, End, Prot;
+  while (MemIter.Next(&Start, &End, nullptr/*offs*/, nullptr/*file*/,
+                      0/*file size*/, &Prot)) {
+    VPrintf(4, "%s: considering %p-%p app=%d shadow=%d prot=%u\n",
+            __FUNCTION__, Start, End, Prot, isAppMem(Start),
+            isShadowMem(Start));
+    if (isShadowMem(Start) && (Prot & MemoryMappingLayout::kProtectionWrite)) {
+      VPrintf(3, "%s: walking %p-%p\n", __FUNCTION__, Start, End);
+      WorkingSetSize += countAndClearShadowValues(BitIdx, Start, End);
+    }
+  }
+  return WorkingSetSize;
+}
+
+// This is invoked from a signal handler but in a sideline thread doing nothing
+// else so it is a little less fragile than a typical signal handler.
+static void takeSample(void *Arg) {
+  u32 BitIdx = CurWorkingSetBitIdx;
+  u32 Freq = 1;
+  ++SnapshotNum; // Simpler to skip 0 whose mod matches everything.
+  while (BitIdx <= MaxAccumBitIdx && (SnapshotNum % Freq) == 0) {
+    u32 NumLines = computeWorkingSizeAndReset(BitIdx);
+    VReport(1, "%s: snapshot #%5d bit %d freq %4d: %8u\n", SanitizerToolName,
+            SnapshotNum, BitIdx, Freq, NumLines);
+    SizePerFreq[BitIdx].push_back(NumLines);
+    Freq = Freq << getFlags()->snapshot_step;
+    BitIdx++;
+  }
+}
+
+unsigned int getSampleCountWorkingSet()
+{
+  return SnapshotNum;
+}
+
+// Initialization that must be done before any instrumented code is executed.
+void initializeShadowWorkingSet() {
+  CHECK(getFlags()->cache_line_size == CacheLineSize);
+  registerMemoryFaultHandler();
+}
+
+void initializeWorkingSet() {
+  if (getFlags()->record_snapshots) {
+    for (u32 i = 0; i < NumFreq; ++i)
+      SizePerFreq[i].initialize(CircularBufferSizes[i]);
+    Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq);
+  }
+}
+
+static u32 getPeriodForPrinting(u32 MilliSec, const char *&Unit) {
+  if (MilliSec > 600000) {
+    Unit = "min";
+    return MilliSec / 60000;
+  } else if (MilliSec > 10000) {
+    Unit = "sec";
+    return MilliSec / 1000;
+  } else {
+    Unit = "ms";
+    return MilliSec;
+  }
+}
+
+static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
+  // We need a constant to avoid software divide support:
+  static const u32 KilobyteCachelines = (0x1 << 10) / CacheLineSize;
+  static const u32 MegabyteCachelines = KilobyteCachelines << 10;
+
+  if (NumOfCachelines > 10 * MegabyteCachelines) {
+    Unit = "MB";
+    return NumOfCachelines / MegabyteCachelines;
+  } else if (NumOfCachelines > 10 * KilobyteCachelines) {
+    Unit = "KB";
+    return NumOfCachelines / KilobyteCachelines;
+  } else {
+    Unit = "Bytes";
+    return NumOfCachelines * CacheLineSize;
+  }
+}
+
+void reportWorkingSet() {
+  const char *Unit;
+  if (getFlags()->record_snapshots) {
+    u32 Freq = 1;
+    Report(" Total number of samples: %u\n", SnapshotNum);
+    for (u32 i = 0; i < NumFreq; ++i) {
+      u32 Time = getPeriodForPrinting(getFlags()->sample_freq*Freq, Unit);
+      Report(" Samples array #%d at period %u %s\n", i, Time, Unit);
+      // FIXME: report whether we wrapped around and thus whether we
+      // have data on the whole run or just the last N samples.
+      for (u32 j = 0; j < SizePerFreq[i].size(); ++j) {
+        u32 Size = getSizeForPrinting(SizePerFreq[i][j], Unit);
+        Report("#%4d: %8u %s (%9u cache lines)\n", j, Size, Unit,
+               SizePerFreq[i][j]);
+      }
+      Freq = Freq << getFlags()->snapshot_step;
+    }
+  }
+
+  // Get the working set size for the entire execution.
+  u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx);
+  u32 Size = getSizeForPrinting(NumOfCachelines, Unit);
+  Report(" %s: the total working set size: %u %s (%u cache lines)\n",
+         SanitizerToolName, Size, Unit, NumOfCachelines);
+}
+
+int finalizeWorkingSet() {
+  if (getFlags()->record_snapshots)
+    Thread.joinThread();
+  reportWorkingSet();
+  if (getFlags()->record_snapshots) {
+    for (u32 i = 0; i < NumFreq; ++i)
+      SizePerFreq[i].free();
+  }
+  return 0;
+}
+
+} // namespace __esan
diff --git a/lib/esan/working_set.h b/lib/esan/working_set.h
new file mode 100644
index 0000000000000..6a976c3f9b22e
--- /dev/null
+++ b/lib/esan/working_set.h
@@ -0,0 +1,40 @@
+//===-- working_set.h -------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// Header for working-set-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifndef WORKING_SET_H
+#define WORKING_SET_H
+
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __esan {
+
+void initializeWorkingSet();
+void initializeShadowWorkingSet();
+int finalizeWorkingSet();
+void reportWorkingSet();
+unsigned int getSampleCountWorkingSet();
+void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
+                                  bool IsWrite);
+
+// Platform-dependent.
+void registerMemoryFaultHandler();
+bool processWorkingSetSignal(int SigNum, void (*Handler)(int),
+                             void (**Result)(int));
+bool processWorkingSetSigaction(int SigNum, const void *Act, void *OldAct);
+bool processWorkingSetSigprocmask(int How, void *Set, void *OldSet);
+
+} // namespace __esan
+
+#endif // WORKING_SET_H
diff --git a/lib/esan/working_set_posix.cpp b/lib/esan/working_set_posix.cpp
new file mode 100644
index 0000000000000..fcfa87128857f
--- /dev/null
+++ b/lib/esan/working_set_posix.cpp
@@ -0,0 +1,133 @@
+//===-- working_set_posix.cpp -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of EfficiencySanitizer, a family of performance tuners.
+//
+// POSIX-specific working set tool code.
+//===----------------------------------------------------------------------===//
+
+#include "working_set.h"
+#include "esan_flags.h"
+#include "esan_shadow.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include <signal.h>
+#include <sys/mman.h>
+
+namespace __esan {
+
+// We only support regular POSIX threads with a single signal handler
+// for the whole process == thread group.
+// Thus we only need to store one app signal handler.
+// FIXME: Store and use any alternate stack and signal flags set by
+// the app.  For now we just call the app handler from our handler.
+static __sanitizer_sigaction AppSigAct;
+
+bool processWorkingSetSignal(int SigNum, void (*Handler)(int),
+                             void (**Result)(int)) {
+  VPrintf(2, "%s: %d\n", __FUNCTION__, SigNum);
+  if (SigNum == SIGSEGV) {
+    *Result = AppSigAct.handler;
+    AppSigAct.sigaction = (void (*)(int, void*, void*))Handler;
+    return false; // Skip real call.
+  }
+  return true;
+}
+
+bool processWorkingSetSigaction(int SigNum, const void *ActVoid,
+                                void *OldActVoid) {
+  VPrintf(2, "%s: %d\n", __FUNCTION__, SigNum);
+  if (SigNum == SIGSEGV) {
+    const struct sigaction *Act = (const struct sigaction *) ActVoid;
+    struct sigaction *OldAct = (struct sigaction *) OldActVoid;
+    if (OldAct)
+      internal_memcpy(OldAct, &AppSigAct, sizeof(OldAct));
+    if (Act)
+      internal_memcpy(&AppSigAct, Act, sizeof(AppSigAct));
+    return false; // Skip real call.
+  }
+  return true;
+}
+
+bool processWorkingSetSigprocmask(int How, void *Set, void *OldSet) {
+  VPrintf(2, "%s\n", __FUNCTION__);
+  // All we need to do is ensure that SIGSEGV is not blocked.
+  // FIXME: we are not fully transparent as we do not pretend that
+  // SIGSEGV is still blocked on app queries: that would require
+  // per-thread mask tracking.
+  if (Set && (How == SIG_BLOCK || How == SIG_SETMASK)) {
+    if (internal_sigismember((__sanitizer_sigset_t *)Set, SIGSEGV)) {
+      VPrintf(1, "%s: removing SIGSEGV from the blocked set\n", __FUNCTION__);
+      internal_sigdelset((__sanitizer_sigset_t *)Set, SIGSEGV);
+    }
+  }
+  return true;
+}
+
+static void reinstateDefaultHandler(int SigNum) {
+  __sanitizer_sigaction SigAct;
+  internal_memset(&SigAct, 0, sizeof(SigAct));
+  SigAct.sigaction = (void (*)(int, void*, void*)) SIG_DFL;
+  int Res = internal_sigaction(SigNum, &SigAct, nullptr);
+  CHECK(Res == 0);
+  VPrintf(1, "Unregistered for %d handler\n", SigNum);
+}
+
+// If this is a shadow fault, we handle it here; otherwise, we pass it to the
+// app to handle it just as the app would do without our tool in place.
+static void handleMemoryFault(int SigNum, void *Info, void *Ctx) {
+  if (SigNum == SIGSEGV) {
+    // We rely on si_addr being filled in (thus we do not support old kernels).
+    siginfo_t *SigInfo = (siginfo_t *)Info;
+    uptr Addr = (uptr)SigInfo->si_addr;
+    if (isShadowMem(Addr)) {
+      VPrintf(3, "Shadow fault @%p\n", Addr);
+      uptr PageSize = GetPageSizeCached();
+      int Res = internal_mprotect((void *)RoundDownTo(Addr, PageSize),
+                                  PageSize, PROT_READ|PROT_WRITE);
+      CHECK(Res == 0);
+    } else if (AppSigAct.sigaction) {
+      // FIXME: For simplicity we ignore app options including its signal stack
+      // (we just use ours) and all the delivery flags.
+      AppSigAct.sigaction(SigNum, Info, Ctx);
+    } else {
+      // Crash instead of spinning with infinite faults.
+      reinstateDefaultHandler(SigNum);
+    }
+  } else
+    UNREACHABLE("signal not registered");
+}
+
+void registerMemoryFaultHandler() {
+  // We do not use an alternate signal stack, as doing so would require
+  // setting it up for each app thread.
+  // FIXME: This could result in problems with emulating the app's signal
+  // handling if the app relies on an alternate stack for SIGSEGV.
+
+  // We require that SIGSEGV is not blocked.  We use a sigprocmask
+  // interceptor to ensure that in the future.  Here we ensure it for
+  // the current thread.  We assume there are no other threads at this
+  // point during initialization, or that at least they do not block
+  // SIGSEGV.
+  __sanitizer_sigset_t SigSet;
+  internal_sigemptyset(&SigSet);
+  internal_sigprocmask(SIG_BLOCK, &SigSet, nullptr);
+
+  __sanitizer_sigaction SigAct;
+  internal_memset(&SigAct, 0, sizeof(SigAct));
+  SigAct.sigaction = handleMemoryFault;
+  // We want to handle nested signals b/c we need to handle a
+  // shadow fault in an app signal handler.
+  SigAct.sa_flags = SA_SIGINFO | SA_NODEFER;
+  int Res = internal_sigaction(SIGSEGV, &SigAct, &AppSigAct);
+  CHECK(Res == 0);
+  VPrintf(1, "Registered for SIGSEGV handler\n");
+}
+
+} // namespace __esan