summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Instrumentation
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation')
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp3337
-rw-r--r--llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp248
-rw-r--r--llvm/lib/Transforms/Instrumentation/CFGMST.h288
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp98
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp2110
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp1778
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp1229
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp1521
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp443
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp212
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp1048
-rw-r--r--llvm/lib/Transforms/Instrumentation/Instrumentation.cpp128
-rw-r--r--llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h109
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp4602
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp1814
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp452
-rw-r--r--llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp357
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp947
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp735
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp78
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h79
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc75
22 files changed, 21688 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
new file mode 100644
index 000000000000..d92ee11c2e1a
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -0,0 +1,3337 @@
+//===- AddressSanitizer.cpp - memory error detector -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+// Details of the algorithm:
+// https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iomanip>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asan"
+
+static const uint64_t kDefaultShadowScale = 3;
+static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
+static const uint64_t kDynamicShadowSentinel =
+ std::numeric_limits<uint64_t>::max();
+static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF; // < 2G.
+static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
+static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
+static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44;
+static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;
+static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
+static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
+static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
+static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
+static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kNetBSD_ShadowOffset32 = 1ULL << 30;
+static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kNetBSDKasan_ShadowOffset64 = 0xdfff900000000000;
+static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
+static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
+static const uint64_t kEmscriptenShadowOffset = 0;
+
+static const uint64_t kMyriadShadowScale = 5;
+static const uint64_t kMyriadMemoryOffset32 = 0x80000000ULL;
+static const uint64_t kMyriadMemorySize32 = 0x20000000ULL;
+static const uint64_t kMyriadTagShift = 29;
+static const uint64_t kMyriadDDRTag = 4;
+static const uint64_t kMyriadCacheBitMask32 = 0x40000000ULL;
+
+// The shadow memory space is dynamically allocated.
+static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel;
+
+static const size_t kMinStackMallocSize = 1 << 6; // 64B
+static const size_t kMaxStackMallocSize = 1 << 16; // 64K
+static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
+static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
+
+static const char *const kAsanModuleCtorName = "asan.module_ctor";
+static const char *const kAsanModuleDtorName = "asan.module_dtor";
+static const uint64_t kAsanCtorAndDtorPriority = 1;
+// On Emscripten, the system needs more than one priorities for constructors.
+static const uint64_t kAsanEmscriptenCtorAndDtorPriority = 50;
+static const char *const kAsanReportErrorTemplate = "__asan_report_";
+static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *const kAsanUnregisterGlobalsName =
+ "__asan_unregister_globals";
+static const char *const kAsanRegisterImageGlobalsName =
+ "__asan_register_image_globals";
+static const char *const kAsanUnregisterImageGlobalsName =
+ "__asan_unregister_image_globals";
+static const char *const kAsanRegisterElfGlobalsName =
+ "__asan_register_elf_globals";
+static const char *const kAsanUnregisterElfGlobalsName =
+ "__asan_unregister_elf_globals";
+static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
+static const char *const kAsanInitName = "__asan_init";
+static const char *const kAsanVersionCheckNamePrefix =
+ "__asan_version_mismatch_check_v";
+static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
+static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
+static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
+static const int kMaxAsanStackMallocSizeClass = 10;
+static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
+static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
+static const char *const kAsanGenPrefix = "___asan_gen_";
+static const char *const kODRGenPrefix = "__odr_asan_gen_";
+static const char *const kSanCovGenPrefix = "__sancov_gen_";
+static const char *const kAsanSetShadowPrefix = "__asan_set_shadow_";
+static const char *const kAsanPoisonStackMemoryName =
+ "__asan_poison_stack_memory";
+static const char *const kAsanUnpoisonStackMemoryName =
+ "__asan_unpoison_stack_memory";
+
+// ASan version script has __asan_* wildcard. Triple underscore prevents a
+// linker (gold) warning about attempting to export a local symbol.
+static const char *const kAsanGlobalsRegisteredFlagName =
+ "___asan_globals_registered";
+
+static const char *const kAsanOptionDetectUseAfterReturn =
+ "__asan_option_detect_stack_use_after_return";
+
+static const char *const kAsanShadowMemoryDynamicAddress =
+ "__asan_shadow_memory_dynamic_address";
+
+static const char *const kAsanAllocaPoison = "__asan_alloca_poison";
+static const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison";
+
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
+static const unsigned kAllocaRzSize = 32;
+
+// Command-line flags.
+
+static cl::opt<bool> ClEnableKasan(
+ "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClRecover(
+ "asan-recover",
+ cl::desc("Enable recovery mode (continue-after-error)."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInsertVersionCheck(
+ "asan-guard-against-version-mismatch",
+ cl::desc("Guard against compiler/runtime version mismatch."),
+ cl::Hidden, cl::init(true));
+
+// This flag may need to be replaced with -f[no-]asan-reads.
+static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
+ cl::desc("instrument read instructions"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentWrites(
+ "asan-instrument-writes", cl::desc("instrument write instructions"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentAtomics(
+ "asan-instrument-atomics",
+ cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClAlwaysSlowPath(
+ "asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClForceDynamicShadow(
+ "asan-force-dynamic-shadow",
+ cl::desc("Load shadow address into a local variable for each function"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClWithIfunc("asan-with-ifunc",
+ cl::desc("Access dynamic shadow through an ifunc global on "
+ "platforms that support this"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClWithIfuncSuppressRemat(
+ "asan-with-ifunc-suppress-remat",
+ cl::desc("Suppress rematerialization of dynamic shadow address by passing "
+ "it through inline asm in prologue."),
+ cl::Hidden, cl::init(true));
+
+// This flag limits the number of instructions to be instrumented
+// in any given BB. Normally, this should be set to unlimited (INT_MAX),
+// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
+// set it to 10000.
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB(
+ "asan-max-ins-per-bb", cl::init(10000),
+ cl::desc("maximal number of instructions to instrument in any given BB"),
+ cl::Hidden);
+
+// This flag may need to be replaced with -f[no]asan-stack.
+static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),
+ cl::Hidden, cl::init(true));
+static cl::opt<uint32_t> ClMaxInlinePoisoningSize(
+ "asan-max-inline-poisoning-size",
+ cl::desc(
+ "Inline shadow poisoning for blocks up to the given size in bytes."),
+ cl::Hidden, cl::init(64));
+
+static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
+ cl::desc("Check stack-use-after-return"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClRedzoneByvalArgs("asan-redzone-byval-args",
+ cl::desc("Create redzones for byval "
+ "arguments (extra copy "
+ "required)"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClUseAfterScope("asan-use-after-scope",
+ cl::desc("Check stack-use-after-scope"),
+ cl::Hidden, cl::init(false));
+
+// This flag may need to be replaced with -f[no]asan-globals.
+static cl::opt<bool> ClGlobals("asan-globals",
+ cl::desc("Handle global objects"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+ cl::desc("Handle C++ initializer order"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInvalidPointerPairs(
+ "asan-detect-invalid-pointer-pair",
+ cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerCmp(
+ "asan-detect-invalid-pointer-cmp",
+ cl::desc("Instrument <, <=, >, >= with pointer operands"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClInvalidPointerSub(
+ "asan-detect-invalid-pointer-sub",
+ cl::desc("Instrument - operations with pointer operands"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<unsigned> ClRealignStack(
+ "asan-realign-stack",
+ cl::desc("Realign stack to the value of this flag (power of two)"),
+ cl::Hidden, cl::init(32));
+
+static cl::opt<int> ClInstrumentationWithCallsThreshold(
+ "asan-instrumentation-with-call-threshold",
+ cl::desc(
+ "If the function being instrumented contains more than "
+ "this number of memory accesses, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(7000));
+
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+ "asan-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+ cl::init("__asan_"));
+
+static cl::opt<bool>
+ ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas",
+ cl::desc("instrument dynamic allocas"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClSkipPromotableAllocas(
+ "asan-skip-promotable-allocas",
+ cl::desc("Do not instrument promotable allocas"), cl::Hidden,
+ cl::init(true));
+
+// These flags allow to change the shadow mapping.
+// The shadow mapping looks like
+// Shadow = (Mem >> scale) + offset
+
+static cl::opt<int> ClMappingScale("asan-mapping-scale",
+ cl::desc("scale of asan shadow mapping"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t>
+ ClMappingOffset("asan-mapping-offset",
+ cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"),
+ cl::Hidden, cl::init(0));
+
+// Optimization flags. Not user visible, used mostly for testing
+// and benchmarking the tool.
+
+static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOptSameTemp(
+ "asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOptGlobals("asan-opt-globals",
+ cl::desc("Don't instrument scalar globals"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClOptStack(
+ "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDynamicAllocaStack(
+ "asan-stack-dynamic-alloca",
+ cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<uint32_t> ClForceExperiment(
+ "asan-force-experiment",
+ cl::desc("Force optimization experiment (for testing)"), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<bool>
+ ClUsePrivateAlias("asan-use-private-alias",
+ cl::desc("Use private aliases for global variables"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClUseOdrIndicator("asan-use-odr-indicator",
+ cl::desc("Use odr indicators to improve ODR reporting"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClUseGlobalsGC("asan-globals-live-support",
+ cl::desc("Use linker features to support dead "
+ "code stripping of globals"),
+ cl::Hidden, cl::init(true));
+
+// This is on by default even though there is a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool>
+ ClWithComdat("asan-with-comdat",
+ cl::desc("Place ASan constructors in comdat sections"),
+ cl::Hidden, cl::init(true));
+
+// Debug flags.
+
+static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,
+ cl::desc("Debug func"));
+
+static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
+ cl::Hidden, cl::init(-1));
+
+static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug max inst"),
+ cl::Hidden, cl::init(-1));
+
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOptimizedAccessesToGlobalVar,
+ "Number of optimized accesses to global vars");
+STATISTIC(NumOptimizedAccessesToStackVar,
+ "Number of optimized accesses to stack vars");
+
+namespace {
+
+/// This struct defines the shadow mapping using the rule:
+/// shadow = (mem >> Scale) ADD-or-OR Offset.
+/// If InGlobal is true, then
+/// extern char __asan_shadow[];
+/// shadow = (mem >> Scale) + &__asan_shadow
+struct ShadowMapping {
+ int Scale;
+ uint64_t Offset;
+ bool OrShadowOffset;
+ bool InGlobal;
+};
+
+} // end anonymous namespace
+
+static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
+ bool IsKasan) {
+ bool IsAndroid = TargetTriple.isAndroid();
+ bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
+ bool IsFreeBSD = TargetTriple.isOSFreeBSD();
+ bool IsNetBSD = TargetTriple.isOSNetBSD();
+ bool IsPS4CPU = TargetTriple.isPS4CPU();
+ bool IsLinux = TargetTriple.isOSLinux();
+ bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le;
+ bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
+ bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+ bool IsMIPS32 = TargetTriple.isMIPS32();
+ bool IsMIPS64 = TargetTriple.isMIPS64();
+ bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
+ bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
+ bool IsWindows = TargetTriple.isOSWindows();
+ bool IsFuchsia = TargetTriple.isOSFuchsia();
+ bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+ bool IsEmscripten = TargetTriple.isOSEmscripten();
+
+ ShadowMapping Mapping;
+
+ Mapping.Scale = IsMyriad ? kMyriadShadowScale : kDefaultShadowScale;
+ if (ClMappingScale.getNumOccurrences() > 0) {
+ Mapping.Scale = ClMappingScale;
+ }
+
+ if (LongSize == 32) {
+ if (IsAndroid)
+ Mapping.Offset = kDynamicShadowSentinel;
+ else if (IsMIPS32)
+ Mapping.Offset = kMIPS32_ShadowOffset32;
+ else if (IsFreeBSD)
+ Mapping.Offset = kFreeBSD_ShadowOffset32;
+ else if (IsNetBSD)
+ Mapping.Offset = kNetBSD_ShadowOffset32;
+ else if (IsIOS)
+ Mapping.Offset = kDynamicShadowSentinel;
+ else if (IsWindows)
+ Mapping.Offset = kWindowsShadowOffset32;
+ else if (IsEmscripten)
+ Mapping.Offset = kEmscriptenShadowOffset;
+ else if (IsMyriad) {
+ uint64_t ShadowOffset = (kMyriadMemoryOffset32 + kMyriadMemorySize32 -
+ (kMyriadMemorySize32 >> Mapping.Scale));
+ Mapping.Offset = ShadowOffset - (kMyriadMemoryOffset32 >> Mapping.Scale);
+ }
+ else
+ Mapping.Offset = kDefaultShadowOffset32;
+ } else { // LongSize == 64
+ // Fuchsia is always PIE, which means that the beginning of the address
+ // space is always available.
+ if (IsFuchsia)
+ Mapping.Offset = 0;
+ else if (IsPPC64)
+ Mapping.Offset = kPPC64_ShadowOffset64;
+ else if (IsSystemZ)
+ Mapping.Offset = kSystemZ_ShadowOffset64;
+ else if (IsFreeBSD && !IsMIPS64)
+ Mapping.Offset = kFreeBSD_ShadowOffset64;
+ else if (IsNetBSD) {
+ if (IsKasan)
+ Mapping.Offset = kNetBSDKasan_ShadowOffset64;
+ else
+ Mapping.Offset = kNetBSD_ShadowOffset64;
+ } else if (IsPS4CPU)
+ Mapping.Offset = kPS4CPU_ShadowOffset64;
+ else if (IsLinux && IsX86_64) {
+ if (IsKasan)
+ Mapping.Offset = kLinuxKasan_ShadowOffset64;
+ else
+ Mapping.Offset = (kSmallX86_64ShadowOffsetBase &
+ (kSmallX86_64ShadowOffsetAlignMask << Mapping.Scale));
+ } else if (IsWindows && IsX86_64) {
+ Mapping.Offset = kWindowsShadowOffset64;
+ } else if (IsMIPS64)
+ Mapping.Offset = kMIPS64_ShadowOffset64;
+ else if (IsIOS)
+ Mapping.Offset = kDynamicShadowSentinel;
+ else if (IsAArch64)
+ Mapping.Offset = kAArch64_ShadowOffset64;
+ else
+ Mapping.Offset = kDefaultShadowOffset64;
+ }
+
+ if (ClForceDynamicShadow) {
+ Mapping.Offset = kDynamicShadowSentinel;
+ }
+
+ if (ClMappingOffset.getNumOccurrences() > 0) {
+ Mapping.Offset = ClMappingOffset;
+ }
+
+ // OR-ing shadow offset if more efficient (at least on x86) if the offset
+ // is a power of two, but on ppc64 we have to use add since the shadow
+ // offset is not necessary 1/8-th of the address space. On SystemZ,
+ // we could OR the constant in a single instruction, but it's more
+ // efficient to load it once and use indexed addressing.
+ Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU &&
+ !(Mapping.Offset & (Mapping.Offset - 1)) &&
+ Mapping.Offset != kDynamicShadowSentinel;
+ bool IsAndroidWithIfuncSupport =
+ IsAndroid && !TargetTriple.isAndroidVersionLT(21);
+ Mapping.InGlobal = ClWithIfunc && IsAndroidWithIfuncSupport && IsArmOrThumb;
+
+ return Mapping;
+}
+
+static size_t RedzoneSizeForScale(int MappingScale) {
+ // Redzone used for stack and globals is at least 32 bytes.
+ // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+ return std::max(32U, 1U << MappingScale);
+}
+
+static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
+ if (TargetTriple.isOSEmscripten()) {
+ return kAsanEmscriptenCtorAndDtorPriority;
+ } else {
+ return kAsanCtorAndDtorPriority;
+ }
+}
+
+namespace {
+
+/// Module analysis for getting various metadata about the module.
+class ASanGlobalsMetadataWrapperPass : public ModulePass {
+public:
+ static char ID;
+
+ ASanGlobalsMetadataWrapperPass() : ModulePass(ID) {
+ initializeASanGlobalsMetadataWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ GlobalsMD = GlobalsMetadata(M);
+ return false;
+ }
+
+ StringRef getPassName() const override {
+ return "ASanGlobalsMetadataWrapperPass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+ GlobalsMetadata &getGlobalsMD() { return GlobalsMD; }
+
+private:
+ GlobalsMetadata GlobalsMD;
+};
+
+char ASanGlobalsMetadataWrapperPass::ID = 0;
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer {
+ AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseAfterScope = false)
+ : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) {
+ this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+ this->CompileKernel =
+ ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+ C = &(M.getContext());
+ LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ TargetTriple = Triple(M.getTargetTriple());
+
+ Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
+ }
+
+ uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
+ uint64_t ArraySize = 1;
+ if (AI.isArrayAllocation()) {
+ const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
+ assert(CI && "non-constant array size");
+ ArraySize = CI->getZExtValue();
+ }
+ Type *Ty = AI.getAllocatedType();
+ uint64_t SizeInBytes =
+ AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
+ return SizeInBytes * ArraySize;
+ }
+
+ /// Check if we want (and can) handle this alloca.
+ bool isInterestingAlloca(const AllocaInst &AI);
+
+ /// If it is an interesting memory access, return the PointerOperand
+ /// and set IsWrite/Alignment. Otherwise return nullptr.
+ /// MaybeMask is an output parameter for the mask Value, if we're looking at a
+ /// masked load/store.
+ Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+ uint64_t *TypeSize, unsigned *Alignment,
+ Value **MaybeMask = nullptr);
+
+ void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
+ bool UseCalls, const DataLayout &DL);
+ void instrumentPointerComparisonOrSubtraction(Instruction *I);
+ void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
+ Value *Addr, uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls, uint32_t Exp);
+ void instrumentUnusualSizeOrAlignment(Instruction *I,
+ Instruction *InsertBefore, Value *Addr,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp);
+ Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+ Value *ShadowValue, uint32_t TypeSize);
+ Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
+ bool IsWrite, size_t AccessSizeIndex,
+ Value *SizeArgument, uint32_t Exp);
+ void instrumentMemIntrinsic(MemIntrinsic *MI);
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI);
+ bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+ void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
+ void markEscapedLocalAllocas(Function &F);
+
+private:
+ friend struct FunctionStackPoisoner;
+
+ void initializeCallbacks(Module &M);
+
+ bool LooksLikeCodeInBug11395(Instruction *I);
+ bool GlobalIsLinkerInitialized(GlobalVariable *G);
+ bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
+ uint64_t TypeSize) const;
+
+ /// Helper to cleanup per-function state.
+ struct FunctionStateRAII {
+ AddressSanitizer *Pass;
+
+ FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {
+ assert(Pass->ProcessedAllocas.empty() &&
+ "last pass forgot to clear cache");
+ assert(!Pass->LocalDynamicShadow);
+ }
+
+ ~FunctionStateRAII() {
+ Pass->LocalDynamicShadow = nullptr;
+ Pass->ProcessedAllocas.clear();
+ }
+ };
+
+ LLVMContext *C;
+ Triple TargetTriple;
+ int LongSize;
+ bool CompileKernel;
+ bool Recover;
+ bool UseAfterScope;
+ Type *IntptrTy;
+ ShadowMapping Mapping;
+ FunctionCallee AsanHandleNoReturnFunc;
+ FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
+ Constant *AsanShadowGlobal;
+
+ // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize).
+ FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes];
+ FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+
+ // These arrays is indexed by AccessIsWrite and Experiment.
+ FunctionCallee AsanErrorCallbackSized[2][2];
+ FunctionCallee AsanMemoryAccessCallbackSized[2][2];
+
+ FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
+ InlineAsm *EmptyAsm;
+ Value *LocalDynamicShadow = nullptr;
+ const GlobalsMetadata &GlobalsMD;
+ DenseMap<const AllocaInst *, bool> ProcessedAllocas;
+};
+
+class AddressSanitizerLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ explicit AddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseAfterScope = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
+ UseAfterScope(UseAfterScope) {
+ initializeAddressSanitizerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "AddressSanitizerFunctionPass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ GlobalsMetadata &GlobalsMD =
+ getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover,
+ UseAfterScope);
+ return ASan.instrumentFunction(F, TLI);
+ }
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseAfterScope;
+};
+
+class ModuleAddressSanitizer {
+public:
+ ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
+ bool CompileKernel = false, bool Recover = false,
+ bool UseGlobalsGC = true, bool UseOdrIndicator = false)
+ : GlobalsMD(*GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC),
+ // Enable aliases as they should have no downside with ODR indicators.
+ UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias),
+ UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator),
+ // Not a typo: ClWithComdat is almost completely pointless without
+ // ClUseGlobalsGC (because then it only works on modules without
+ // globals, which are rare); it is a prerequisite for ClUseGlobalsGC;
+ // and both suffer from gold PR19002 for which UseGlobalsGC constructor
+ // argument is designed as workaround. Therefore, disable both
+ // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to
+ // do globals-gc.
+ UseCtorComdat(UseGlobalsGC && ClWithComdat) {
+ this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+ this->CompileKernel =
+ ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel;
+
+ C = &(M.getContext());
+ int LongSize = M.getDataLayout().getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ TargetTriple = Triple(M.getTargetTriple());
+ Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
+ }
+
+ bool instrumentModule(Module &);
+
+private:
+ void initializeCallbacks(Module &M);
+
+ bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);
+ void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers);
+ void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers,
+ const std::string &UniqueModuleId);
+ void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers);
+ void
+ InstrumentGlobalsWithMetadataArray(IRBuilder<> &IRB, Module &M,
+ ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers);
+
+ GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,
+ StringRef OriginalName);
+ void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata,
+ StringRef InternalSuffix);
+ IRBuilder<> CreateAsanModuleDtor(Module &M);
+
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
+ bool ShouldUseMachOGlobalsSection() const;
+ StringRef getGlobalMetadataSection() const;
+ void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
+ void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
+ size_t MinRedzoneSizeForGlobal() const {
+ return RedzoneSizeForScale(Mapping.Scale);
+ }
+ int GetAsanVersion(const Module &M) const;
+
+ const GlobalsMetadata &GlobalsMD;
+ bool CompileKernel;
+ bool Recover;
+ bool UseGlobalsGC;
+ bool UsePrivateAlias;
+ bool UseOdrIndicator;
+ bool UseCtorComdat;
+ Type *IntptrTy;
+ LLVMContext *C;
+ Triple TargetTriple;
+ ShadowMapping Mapping;
+ FunctionCallee AsanPoisonGlobals;
+ FunctionCallee AsanUnpoisonGlobals;
+ FunctionCallee AsanRegisterGlobals;
+ FunctionCallee AsanUnregisterGlobals;
+ FunctionCallee AsanRegisterImageGlobals;
+ FunctionCallee AsanUnregisterImageGlobals;
+ FunctionCallee AsanRegisterElfGlobals;
+ FunctionCallee AsanUnregisterElfGlobals;
+
+ Function *AsanCtorFunction = nullptr;
+ Function *AsanDtorFunction = nullptr;
+};
+
+class ModuleAddressSanitizerLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ explicit ModuleAddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false,
+ bool UseGlobalGC = true,
+ bool UseOdrIndicator = false)
+ : ModulePass(ID), CompileKernel(CompileKernel), Recover(Recover),
+ UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator) {
+ initializeModuleAddressSanitizerLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "ModuleAddressSanitizer"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ GlobalsMetadata &GlobalsMD =
+ getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover,
+ UseGlobalGC, UseOdrIndicator);
+ return ASanModule.instrumentModule(M);
+ }
+
+private:
+ bool CompileKernel;
+ bool Recover;
+ bool UseGlobalGC;
+ bool UseOdrIndicator;
+};
+
+// Stack poisoning does not play well with exception handling.
+// When an exception is thrown, we essentially bypass the code
+// that unpoisones the stack. This is why the run-time library has
+// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
+// stack in the interceptor. This however does not work inside the
+// actual function which catches the exception. Most likely because the
+// compiler hoists the load of the shadow value somewhere too high.
+// This causes asan to report a non-existing bug on 453.povray.
+// It sounds like an LLVM bug.
+struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
+ Function &F;
+ AddressSanitizer &ASan;
+ DIBuilder DIB;
+ LLVMContext *C;
+ Type *IntptrTy;
+ Type *IntptrPtrTy;
+ ShadowMapping Mapping;
+
+ SmallVector<AllocaInst *, 16> AllocaVec;
+ SmallVector<AllocaInst *, 16> StaticAllocasToMoveUp;
+ SmallVector<Instruction *, 8> RetVec;
+ unsigned StackAlignment;
+
+ FunctionCallee AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+ AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+ FunctionCallee AsanSetShadowFunc[0x100] = {};
+ FunctionCallee AsanPoisonStackMemoryFunc, AsanUnpoisonStackMemoryFunc;
+ FunctionCallee AsanAllocaPoisonFunc, AsanAllocasUnpoisonFunc;
+
+ // Stores a place and arguments of poisoning/unpoisoning call for alloca.
+ struct AllocaPoisonCall {
+ IntrinsicInst *InsBefore;
+ AllocaInst *AI;
+ uint64_t Size;
+ bool DoPoison;
+ };
+ SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec;
+ SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec;
+ bool HasUntracedLifetimeIntrinsic = false;
+
+ SmallVector<AllocaInst *, 1> DynamicAllocaVec;
+ SmallVector<IntrinsicInst *, 1> StackRestoreVec;
+ AllocaInst *DynamicAllocaLayout = nullptr;
+ IntrinsicInst *LocalEscapeCall = nullptr;
+
+ // Maps Value to an AllocaInst from which the Value is originated.
+ using AllocaForValueMapTy = DenseMap<Value *, AllocaInst *>;
+ AllocaForValueMapTy AllocaForValue;
+
+ bool HasNonEmptyInlineAsm = false;
+ bool HasReturnsTwiceCall = false;
+ std::unique_ptr<CallInst> EmptyInlineAsm;
+
+ FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
+ : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
+ C(ASan.C), IntptrTy(ASan.IntptrTy),
+ IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
+ StackAlignment(1 << Mapping.Scale),
+ EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
+
+ bool runOnFunction() {
+ if (!ClStack) return false;
+
+ if (ClRedzoneByvalArgs)
+ copyArgsPassedByValToAllocas();
+
+ // Collect alloca, ret, lifetime instructions etc.
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB);
+
+ if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false;
+
+ initializeCallbacks(*F.getParent());
+
+ if (HasUntracedLifetimeIntrinsic) {
+ // If there are lifetime intrinsics which couldn't be traced back to an
+ // alloca, we may not know exactly when a variable enters scope, and
+ // therefore should "fail safe" by not poisoning them.
+ StaticAllocaPoisonCallVec.clear();
+ DynamicAllocaPoisonCallVec.clear();
+ }
+
+ processDynamicAllocas();
+ processStaticAllocas();
+
+ if (ClDebugStack) {
+ LLVM_DEBUG(dbgs() << F);
+ }
+ return true;
+ }
+
+ // Arguments marked with the "byval" attribute are implicitly copied without
+ // using an alloca instruction. To produce redzones for those arguments, we
+ // copy them a second time into memory allocated with an alloca instruction.
+ void copyArgsPassedByValToAllocas();
+
+ // Finds all Alloca instructions and puts
+ // poisoned red zones around all of them.
+ // Then unpoison everything back before the function returns.
+ void processStaticAllocas();
+ void processDynamicAllocas();
+
+ void createDynamicAllocasInitStorage();
+
+ // ----------------------- Visitors.
+ /// Collect all Ret instructions.
+ void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
+
+ /// Collect all Resume instructions.
+ void visitResumeInst(ResumeInst &RI) { RetVec.push_back(&RI); }
+
+ /// Collect all CatchReturnInst instructions.
+ void visitCleanupReturnInst(CleanupReturnInst &CRI) { RetVec.push_back(&CRI); }
+
+ void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
+ Value *SavedStack) {
+ IRBuilder<> IRB(InstBefore);
+ Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy);
+ // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we
+ // need to adjust extracted SP to compute the address of the most recent
+ // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for
+ // this purpose.
+ if (!isa<ReturnInst>(InstBefore)) {
+ Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration(
+ InstBefore->getModule(), Intrinsic::get_dynamic_area_offset,
+ {IntptrTy});
+
+ Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {});
+
+ DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy),
+ DynamicAreaOffset);
+ }
+
+ IRB.CreateCall(
+ AsanAllocasUnpoisonFunc,
+ {IRB.CreateLoad(IntptrTy, DynamicAllocaLayout), DynamicAreaPtr});
+ }
+
+ // Unpoison dynamic allocas redzones.
+ void unpoisonDynamicAllocas() {
+ for (auto &Ret : RetVec)
+ unpoisonDynamicAllocasBeforeInst(Ret, DynamicAllocaLayout);
+
+ for (auto &StackRestoreInst : StackRestoreVec)
+ unpoisonDynamicAllocasBeforeInst(StackRestoreInst,
+ StackRestoreInst->getOperand(0));
+ }
+
+ // Deploy and poison redzones around dynamic alloca call. To do this, we
+ // should replace this call with another one with changed parameters and
+ // replace all its uses with new address, so
+ // addr = alloca type, old_size, align
+ // is replaced by
+ // new_size = (old_size + additional_size) * sizeof(type)
+ // tmp = alloca i8, new_size, max(align, 32)
+ // addr = tmp + 32 (first 32 bytes are for the left redzone).
+ // Additional_size is added to make new memory allocation contain not only
+ // requested memory, but also left, partial and right redzones.
+ void handleDynamicAllocaCall(AllocaInst *AI);
+
+ /// Collect Alloca instructions we want (and can) handle.
+ void visitAllocaInst(AllocaInst &AI) {
+ if (!ASan.isInterestingAlloca(AI)) {
+ if (AI.isStaticAlloca()) {
+ // Skip over allocas that are present *before* the first instrumented
+ // alloca, we don't want to move those around.
+ if (AllocaVec.empty())
+ return;
+
+ StaticAllocasToMoveUp.push_back(&AI);
+ }
+ return;
+ }
+
+ StackAlignment = std::max(StackAlignment, AI.getAlignment());
+ if (!AI.isStaticAlloca())
+ DynamicAllocaVec.push_back(&AI);
+ else
+ AllocaVec.push_back(&AI);
+ }
+
+ /// Collect lifetime intrinsic calls to check for use-after-scope
+ /// errors.
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ Intrinsic::ID ID = II.getIntrinsicID();
+ if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II);
+ if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
+ if (!ASan.UseAfterScope)
+ return;
+ if (!II.isLifetimeStartOrEnd())
+ return;
+ // Found lifetime intrinsic, add ASan instrumentation if necessary.
+ auto *Size = cast<ConstantInt>(II.getArgOperand(0));
+ // If size argument is undefined, don't do anything.
+ if (Size->isMinusOne()) return;
+ // Check that size doesn't saturate uint64_t and can
+ // be stored in IntptrTy.
+ const uint64_t SizeValue = Size->getValue().getLimitedValue();
+ if (SizeValue == ~0ULL ||
+ !ConstantInt::isValueValidForType(IntptrTy, SizeValue))
+ return;
+ // Find alloca instruction that corresponds to llvm.lifetime argument.
+ AllocaInst *AI =
+ llvm::findAllocaForValue(II.getArgOperand(1), AllocaForValue);
+ if (!AI) {
+ HasUntracedLifetimeIntrinsic = true;
+ return;
+ }
+ // We're interested only in allocas we can handle.
+ if (!ASan.isInterestingAlloca(*AI))
+ return;
+ bool DoPoison = (ID == Intrinsic::lifetime_end);
+ AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
+ if (AI->isStaticAlloca())
+ StaticAllocaPoisonCallVec.push_back(APC);
+ else if (ClInstrumentDynamicAllocas)
+ DynamicAllocaPoisonCallVec.push_back(APC);
+ }
+
+ void visitCallSite(CallSite CS) {
+ Instruction *I = CS.getInstruction();
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ HasNonEmptyInlineAsm |= CI->isInlineAsm() &&
+ !CI->isIdenticalTo(EmptyInlineAsm.get()) &&
+ I != ASan.LocalDynamicShadow;
+ HasReturnsTwiceCall |= CI->canReturnTwice();
+ }
+ }
+
+ // ---------------------- Helpers.
+ void initializeCallbacks(Module &M);
+
+ // Copies bytes from ShadowBytes into shadow memory for indexes where
+ // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
+ // ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
+ void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
+ IRBuilder<> &IRB, Value *ShadowBase);
+ void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
+ size_t Begin, size_t End, IRBuilder<> &IRB,
+ Value *ShadowBase);
+ void copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
+ ArrayRef<uint8_t> ShadowBytes, size_t Begin,
+ size_t End, IRBuilder<> &IRB, Value *ShadowBase);
+
+ void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
+
+ Value *createAllocaForLayout(IRBuilder<> &IRB, const ASanStackFrameLayout &L,
+ bool Dynamic);
+ PHINode *createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue,
+ Instruction *ThenTerm, Value *ValueIfFalse);
+};
+
+} // end anonymous namespace
+
+void LocationMetadata::parse(MDNode *MDN) {
+ assert(MDN->getNumOperands() == 3);
+ MDString *DIFilename = cast<MDString>(MDN->getOperand(0));
+ Filename = DIFilename->getString();
+ LineNo = mdconst::extract<ConstantInt>(MDN->getOperand(1))->getLimitedValue();
+ ColumnNo =
+ mdconst::extract<ConstantInt>(MDN->getOperand(2))->getLimitedValue();
+}
+
+// FIXME: It would be cleaner to instead attach relevant metadata to the globals
+// we want to sanitize instead and reading this metadata on each pass over a
+// function instead of reading module level metadata at first.
+GlobalsMetadata::GlobalsMetadata(Module &M) {
+ NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+ if (!Globals)
+ return;
+ for (auto MDN : Globals->operands()) {
+ // Metadata node contains the global and the fields of "Entry".
+ assert(MDN->getNumOperands() == 5);
+ auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
+ // The optimizer may optimize away a global entirely.
+ if (!V)
+ continue;
+ auto *StrippedV = V->stripPointerCasts();
+ auto *GV = dyn_cast<GlobalVariable>(StrippedV);
+ if (!GV)
+ continue;
+ // We can already have an entry for GV if it was merged with another
+ // global.
+ Entry &E = Entries[GV];
+ if (auto *Loc = cast_or_null<MDNode>(MDN->getOperand(1)))
+ E.SourceLoc.parse(Loc);
+ if (auto *Name = cast_or_null<MDString>(MDN->getOperand(2)))
+ E.Name = Name->getString();
+ ConstantInt *IsDynInit = mdconst::extract<ConstantInt>(MDN->getOperand(3));
+ E.IsDynInit |= IsDynInit->isOne();
+ ConstantInt *IsBlacklisted =
+ mdconst::extract<ConstantInt>(MDN->getOperand(4));
+ E.IsBlacklisted |= IsBlacklisted->isOne();
+ }
+}
+
+AnalysisKey ASanGlobalsMetadataAnalysis::Key;
+
+GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ return GlobalsMetadata(M);
+}
+
+AddressSanitizerPass::AddressSanitizerPass(bool CompileKernel, bool Recover,
+ bool UseAfterScope)
+ : CompileKernel(CompileKernel), Recover(Recover),
+ UseAfterScope(UseAfterScope) {}
+
+PreservedAnalyses AddressSanitizerPass::run(Function &F,
+ AnalysisManager<Function> &AM) {
+ auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ Module &M = *F.getParent();
+ if (auto *R = MAM.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
+ const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope);
+ if (Sanitizer.instrumentFunction(F, TLI))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+ }
+
+ report_fatal_error(
+ "The ASanGlobalsMetadataAnalysis is required to run before "
+ "AddressSanitizer can run");
+ return PreservedAnalyses::all();
+}
+
+ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel,
+ bool Recover,
+ bool UseGlobalGC,
+ bool UseOdrIndicator)
+ : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+ UseOdrIndicator(UseOdrIndicator) {}
+
+PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
+ AnalysisManager<Module> &AM) {
+ GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
+ ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover,
+ UseGlobalGC, UseOdrIndicator);
+ if (Sanitizer.instrumentModule(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
+ "Read metadata to mark which globals should be instrumented "
+ "when running ASan.",
+ false, true)
+
+char AddressSanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+ AddressSanitizerLegacyPass, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+ AddressSanitizerLegacyPass, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+ false)
+
+FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
+ bool Recover,
+ bool UseAfterScope) {
+ assert(!CompileKernel || Recover);
+ return new AddressSanitizerLegacyPass(CompileKernel, Recover, UseAfterScope);
+}
+
+char ModuleAddressSanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS(
+ ModuleAddressSanitizerLegacyPass, "asan-module",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
+ "ModulePass",
+ false, false)
+
+ModulePass *llvm::createModuleAddressSanitizerLegacyPassPass(
+ bool CompileKernel, bool Recover, bool UseGlobalsGC, bool UseOdrIndicator) {
+ assert(!CompileKernel || Recover);
+ return new ModuleAddressSanitizerLegacyPass(CompileKernel, Recover,
+ UseGlobalsGC, UseOdrIndicator);
+}
+
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+ size_t Res = countTrailingZeros(TypeSize / 8);
+ assert(Res < kNumberOfAccessSizes);
+ return Res;
+}
+
+/// Create a global describing a source location.
+static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M,
+ LocationMetadata MD) {
+ Constant *LocData[] = {
+ createPrivateGlobalForString(M, MD.Filename, true, kAsanGenPrefix),
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.LineNo),
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), MD.ColumnNo),
+ };
+ auto LocStruct = ConstantStruct::getAnon(LocData);
+ auto GV = new GlobalVariable(M, LocStruct->getType(), true,
+ GlobalValue::PrivateLinkage, LocStruct,
+ kAsanGenPrefix);
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ return GV;
+}
+
+/// Check if \p G has been created by a trusted compiler pass.
+static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
+ // Do not instrument @llvm.global_ctors, @llvm.used, etc.
+ if (G->getName().startswith("llvm."))
+ return true;
+
+ // Do not instrument asan globals.
+ if (G->getName().startswith(kAsanGenPrefix) ||
+ G->getName().startswith(kSanCovGenPrefix) ||
+ G->getName().startswith(kODRGenPrefix))
+ return true;
+
+ // Do not instrument gcov counter arrays.
+ if (G->getName() == "__llvm_gcov_ctr")
+ return true;
+
+ return false;
+}
+
+Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+ // Shadow >> scale
+ Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+ if (Mapping.Offset == 0) return Shadow;
+ // (Shadow >> scale) | offset
+ Value *ShadowBase;
+ if (LocalDynamicShadow)
+ ShadowBase = LocalDynamicShadow;
+ else
+ ShadowBase = ConstantInt::get(IntptrTy, Mapping.Offset);
+ if (Mapping.OrShadowOffset)
+ return IRB.CreateOr(Shadow, ShadowBase);
+ else
+ return IRB.CreateAdd(Shadow, ShadowBase);
+}
+
+// Instrument memset/memmove/memcpy
+void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> IRB(MI);
+ if (isa<MemTransferInst>(MI)) {
+ IRB.CreateCall(
+ isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ } else if (isa<MemSetInst>(MI)) {
+ IRB.CreateCall(
+ AsanMemset,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ }
+ MI->eraseFromParent();
+}
+
+/// Check if we want (and can) handle this alloca.
+bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
+ auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
+
+ if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
+ return PreviouslySeenAllocaInfo->getSecond();
+
+ bool IsInteresting =
+ (AI.getAllocatedType()->isSized() &&
+ // alloca() may be called with 0 size, ignore it.
+ ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(AI) > 0) &&
+ // We are only interested in allocas not promotable to registers.
+ // Promotable allocas are common under -O0.
+ (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
+ // inalloca allocas are not treated as static, and we don't want
+ // dynamic alloca instrumentation for them as well.
+ !AI.isUsedWithInAlloca() &&
+ // swifterror allocas are register promoted by ISel
+ !AI.isSwiftError());
+
+ ProcessedAllocas[&AI] = IsInteresting;
+ return IsInteresting;
+}
+
+Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
+ bool *IsWrite,
+ uint64_t *TypeSize,
+ unsigned *Alignment,
+ Value **MaybeMask) {
+ // Skip memory accesses inserted by another instrumentation.
+ if (I->hasMetadata("nosanitize")) return nullptr;
+
+ // Do not instrument the load fetching the dynamic shadow address.
+ if (LocalDynamicShadow == I)
+ return nullptr;
+
+ Value *PtrOperand = nullptr;
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!ClInstrumentReads) return nullptr;
+ *IsWrite = false;
+ *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
+ *Alignment = LI->getAlignment();
+ PtrOperand = LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!ClInstrumentWrites) return nullptr;
+ *IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
+ *Alignment = SI->getAlignment();
+ PtrOperand = SI->getPointerOperand();
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ if (!ClInstrumentAtomics) return nullptr;
+ *IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
+ *Alignment = 0;
+ PtrOperand = RMW->getPointerOperand();
+ } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (!ClInstrumentAtomics) return nullptr;
+ *IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
+ *Alignment = 0;
+ PtrOperand = XCHG->getPointerOperand();
+ } else if (auto CI = dyn_cast<CallInst>(I)) {
+ auto *F = dyn_cast<Function>(CI->getCalledValue());
+ if (F && (F->getName().startswith("llvm.masked.load.") ||
+ F->getName().startswith("llvm.masked.store."))) {
+ unsigned OpOffset = 0;
+ if (F->getName().startswith("llvm.masked.store.")) {
+ if (!ClInstrumentWrites)
+ return nullptr;
+ // Masked store has an initial operand for the value.
+ OpOffset = 1;
+ *IsWrite = true;
+ } else {
+ if (!ClInstrumentReads)
+ return nullptr;
+ *IsWrite = false;
+ }
+
+ auto BasePtr = CI->getOperand(0 + OpOffset);
+ auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+ *TypeSize = DL.getTypeStoreSizeInBits(Ty);
+ if (auto AlignmentConstant =
+ dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+ *Alignment = (unsigned)AlignmentConstant->getZExtValue();
+ else
+ *Alignment = 1; // No alignment guarantees. We probably got Undef
+ if (MaybeMask)
+ *MaybeMask = CI->getOperand(2 + OpOffset);
+ PtrOperand = BasePtr;
+ }
+ }
+
+ if (PtrOperand) {
+ // Do not instrument acesses from different address spaces; we cannot deal
+ // with them.
+ Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
+ if (PtrTy->getPointerAddressSpace() != 0)
+ return nullptr;
+
+ // Ignore swifterror addresses.
+ // swifterror memory addresses are mem2reg promoted by instruction
+ // selection. As such they cannot have regular uses like an instrumentation
+ // function and it makes no sense to track them as memory.
+ if (PtrOperand->isSwiftError())
+ return nullptr;
+ }
+
+ // Treat memory accesses to promotable allocas as non-interesting since they
+ // will not cause memory violations. This greatly speeds up the instrumented
+ // executable at -O0.
+ if (ClSkipPromotableAllocas)
+ if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand))
+ return isInterestingAlloca(*AI) ? AI : nullptr;
+
+ return PtrOperand;
+}
+
+static bool isPointerOperand(Value *V) {
+ return V->getType()->isPointerTy() || isa<PtrToIntInst>(V);
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerComparison(Instruction *I) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
+ if (!Cmp->isRelational())
+ return false;
+ } else {
+ return false;
+ }
+ return isPointerOperand(I->getOperand(0)) &&
+ isPointerOperand(I->getOperand(1));
+}
+
+// This is a rough heuristic; it may cause both false positives and
+// false negatives. The proper implementation requires cooperation with
+// the frontend.
+static bool isInterestingPointerSubtraction(Instruction *I) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (BO->getOpcode() != Instruction::Sub)
+ return false;
+ } else {
+ return false;
+ }
+ return isPointerOperand(I->getOperand(0)) &&
+ isPointerOperand(I->getOperand(1));
+}
+
+bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global does not have initializer
+ // at all, we assume it has dynamic initializer (in other TU).
+ //
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
+ return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
+}
+
+void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
+ Instruction *I) {
+ IRBuilder<> IRB(I);
+ FunctionCallee F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
+ Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
+ for (Value *&i : Param) {
+ if (i->getType()->isPointerTy())
+ i = IRB.CreatePointerCast(i, IntptrTy);
+ }
+ IRB.CreateCall(F, Param);
+}
+
+static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
+ Instruction *InsertBefore, Value *Addr,
+ unsigned Alignment, unsigned Granularity,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
+ // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
+ // if the data is properly aligned.
+ if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
+ TypeSize == 128) &&
+ (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
+ return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
+ Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,
+ IsWrite, nullptr, UseCalls, Exp);
+}
+
+static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
+ const DataLayout &DL, Type *IntptrTy,
+ Value *Mask, Instruction *I,
+ Value *Addr, unsigned Alignment,
+ unsigned Granularity, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument,
+ bool UseCalls, uint32_t Exp) {
+ auto *VTy = cast<PointerType>(Addr->getType())->getElementType();
+ uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
+ unsigned Num = VTy->getVectorNumElements();
+ auto Zero = ConstantInt::get(IntptrTy, 0);
+ for (unsigned Idx = 0; Idx < Num; ++Idx) {
+ Value *InstrumentedAddress = nullptr;
+ Instruction *InsertBefore = I;
+ if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
+ // dyn_cast as we might get UndefValue
+ if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
+ if (Masked->isZero())
+ // Mask is constant false, so no instrumentation needed.
+ continue;
+ // If we have a true or undef value, fall through to doInstrumentAddress
+ // with InsertBefore == I
+ }
+ } else {
+ IRBuilder<> IRB(I);
+ Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
+ InsertBefore = ThenTerm;
+ }
+
+ IRBuilder<> IRB(InsertBefore);
+ InstrumentedAddress =
+ IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+ doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
+ Granularity, ElemTypeSize, IsWrite, SizeArgument,
+ UseCalls, Exp);
+ }
+}
+
+void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
+ Instruction *I, bool UseCalls,
+ const DataLayout &DL) {
+ bool IsWrite = false;
+ unsigned Alignment = 0;
+ uint64_t TypeSize = 0;
+ Value *MaybeMask = nullptr;
+ Value *Addr =
+ isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
+ assert(Addr);
+
+ // Optimization experiments.
+ // The experiments can be used to evaluate potential optimizations that remove
+ // instrumentation (assess false negatives). Instead of completely removing
+ // some instrumentation, you set Exp to a non-zero value (mask of optimization
+ // experiments that want to remove instrumentation of this instruction).
+ // If Exp is non-zero, this pass will emit special calls into runtime
+ // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls
+ // make runtime terminate the program in a special way (with a different
+ // exit status). Then you run the new compiler on a buggy corpus, collect
+ // the special terminations (ideally, you don't see them at all -- no false
+ // negatives) and make the decision on the optimization.
+ uint32_t Exp = ClForceExperiment;
+
+ if (ClOpt && ClOptGlobals) {
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
+ if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+ isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+ NumOptimizedAccessesToGlobalVar++;
+ return;
+ }
+ }
+
+ if (ClOpt && ClOptStack) {
+ // A direct inbounds access to a stack variable is always valid.
+ if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+ isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+ NumOptimizedAccessesToStackVar++;
+ return;
+ }
+ }
+
+ if (IsWrite)
+ NumInstrumentedWrites++;
+ else
+ NumInstrumentedReads++;
+
+ unsigned Granularity = 1 << Mapping.Scale;
+ if (MaybeMask) {
+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, MaybeMask, I, Addr,
+ Alignment, Granularity, TypeSize, IsWrite,
+ nullptr, UseCalls, Exp);
+ } else {
+ doInstrumentAddress(this, I, I, Addr, Alignment, Granularity, TypeSize,
+ IsWrite, nullptr, UseCalls, Exp);
+ }
+}
+
+Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
+ Value *Addr, bool IsWrite,
+ size_t AccessSizeIndex,
+ Value *SizeArgument,
+ uint32_t Exp) {
+ IRBuilder<> IRB(InsertBefore);
+ Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp);
+ CallInst *Call = nullptr;
+ if (SizeArgument) {
+ if (Exp == 0)
+ Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][0],
+ {Addr, SizeArgument});
+ else
+ Call = IRB.CreateCall(AsanErrorCallbackSized[IsWrite][1],
+ {Addr, SizeArgument, ExpVal});
+ } else {
+ if (Exp == 0)
+ Call =
+ IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr);
+ else
+ Call = IRB.CreateCall(AsanErrorCallback[IsWrite][1][AccessSizeIndex],
+ {Addr, ExpVal});
+ }
+
+ // We don't do Call->setDoesNotReturn() because the BB already has
+ // UnreachableInst at the end.
+ // This EmptyAsm is required to avoid callback merge.
+ IRB.CreateCall(EmptyAsm, {});
+ return Call;
+}
+
+Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
+ Value *ShadowValue,
+ uint32_t TypeSize) {
+ size_t Granularity = static_cast<size_t>(1) << Mapping.Scale;
+ // Addr & (Granularity - 1)
+ Value *LastAccessedByte =
+ IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+ // (Addr & (Granularity - 1)) + size - 1
+ if (TypeSize / 8 > 1)
+ LastAccessedByte = IRB.CreateAdd(
+ LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+ // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+ LastAccessedByte =
+ IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
+ // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+ return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
+}
+
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
+ Instruction *InsertBefore, Value *Addr,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
+ bool IsMyriad = TargetTriple.getVendor() == llvm::Triple::Myriad;
+
+ IRBuilder<> IRB(InsertBefore);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+
+ if (UseCalls) {
+ if (Exp == 0)
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+ AddrLong);
+ else
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex],
+ {AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+ return;
+ }
+
+ if (IsMyriad) {
+ // Strip the cache bit and do range check.
+ // AddrLong &= ~kMyriadCacheBitMask32
+ AddrLong = IRB.CreateAnd(AddrLong, ~kMyriadCacheBitMask32);
+ // Tag = AddrLong >> kMyriadTagShift
+ Value *Tag = IRB.CreateLShr(AddrLong, kMyriadTagShift);
+ // Tag == kMyriadDDRTag
+ Value *TagCheck =
+ IRB.CreateICmpEQ(Tag, ConstantInt::get(IntptrTy, kMyriadDDRTag));
+
+ Instruction *TagCheckTerm =
+ SplitBlockAndInsertIfThen(TagCheck, InsertBefore, false,
+ MDBuilder(*C).createBranchWeights(1, 100000));
+ assert(cast<BranchInst>(TagCheckTerm)->isUnconditional());
+ IRB.SetInsertPoint(TagCheckTerm);
+ InsertBefore = TagCheckTerm;
+ }
+
+ Type *ShadowTy =
+ IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale));
+ Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+ Value *ShadowPtr = memToShadow(AddrLong, IRB);
+ Value *CmpVal = Constant::getNullValue(ShadowTy);
+ Value *ShadowValue =
+ IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+
+ Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
+ size_t Granularity = 1ULL << Mapping.Scale;
+ Instruction *CrashTerm = nullptr;
+
+ if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ // We use branch weights for the slow path check, to indicate that the slow
+ // path is rarely taken. This seems to be the case for SPEC benchmarks.
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
+ assert(cast<BranchInst>(CheckTerm)->isUnconditional());
+ BasicBlock *NextBB = CheckTerm->getSuccessor(0);
+ IRB.SetInsertPoint(CheckTerm);
+ Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+ if (Recover) {
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false);
+ } else {
+ BasicBlock *CrashBlock =
+ BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
+ BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+ ReplaceInstWithInst(CheckTerm, NewTerm);
+ }
+ } else {
+ CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover);
+ }
+
+ Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
+ AccessSizeIndex, SizeArgument, Exp);
+ Crash->setDebugLoc(OrigIns->getDebugLoc());
+}
+
+// Instrument unusual size or unusual alignment.
+// We can not do it with a single check, so we do 1-byte check for the first
+// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+// to report the actual access size.
+void AddressSanitizer::instrumentUnusualSizeOrAlignment(
+ Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ IRBuilder<> IRB(InsertBefore);
+ Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ if (UseCalls) {
+ if (Exp == 0)
+ IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][0],
+ {AddrLong, Size});
+ else
+ IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1],
+ {AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)});
+ } else {
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ Addr->getType());
+ instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);
+ }
+}
+
+void ModuleAddressSanitizer::poisonOneInitializer(Function &GlobalInit,
+ GlobalValue *ModuleName) {
+ // Set up the arguments to our poison/unpoison functions.
+ IRBuilder<> IRB(&GlobalInit.front(),
+ GlobalInit.front().getFirstInsertionPt());
+
+ // Add a call to poison all external globals before the given function starts.
+ Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
+ IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
+
+ // Add calls to unpoison all globals before each return instruction.
+ for (auto &BB : GlobalInit.getBasicBlockList())
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
+ CallInst::Create(AsanUnpoisonGlobals, "", RI);
+}
+
+void ModuleAddressSanitizer::createInitializerPoisonCalls(
+ Module &M, GlobalValue *ModuleName) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return;
+
+ ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!CA)
+ return;
+
+ for (Use &OP : CA->operands()) {
+ if (isa<ConstantAggregateZero>(OP)) continue;
+ ConstantStruct *CS = cast<ConstantStruct>(OP);
+
+ // Must have a function or null ptr.
+ if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
+ if (F->getName() == kAsanModuleCtorName) continue;
+ auto *Priority = cast<ConstantInt>(CS->getOperand(0));
+ // Don't instrument CTORs that will run before asan.module_ctor.
+ if (Priority->getLimitedValue() <= GetCtorAndDtorPriority(TargetTriple))
+ continue;
+ poisonOneInitializer(*F, ModuleName);
+ }
+ }
+}
+
+bool ModuleAddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+ Type *Ty = G->getValueType();
+ LLVM_DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
+
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
+ if (GlobalsMD.get(G).IsBlacklisted) return false;
+ if (!Ty->isSized()) return false;
+ if (!G->hasInitializer()) return false;
+ if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal()) return false;
+ // For now, just ignore this Global if the alignment is large.
+ if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
+
+ // For non-COFF targets, only instrument globals known to be defined by this
+ // TU.
+ // FIXME: We can instrument comdat globals on ELF if we are using the
+ // GC-friendly metadata scheme.
+ if (!TargetTriple.isOSBinFormatCOFF()) {
+ if (!G->hasExactDefinition() || G->hasComdat())
+ return false;
+ } else {
+ // On COFF, don't instrument non-ODR linkages.
+ if (G->isInterposable())
+ return false;
+ }
+
+ // If a comdat is present, it must have a selection kind that implies ODR
+ // semantics: no duplicates, any, or exact match.
+ if (Comdat *C = G->getComdat()) {
+ switch (C->getSelectionKind()) {
+ case Comdat::Any:
+ case Comdat::ExactMatch:
+ case Comdat::NoDuplicates:
+ break;
+ case Comdat::Largest:
+ case Comdat::SameSize:
+ return false;
+ }
+ }
+
+ if (G->hasSection()) {
+ StringRef Section = G->getSection();
+
+ // Globals from llvm.metadata aren't emitted, do not instrument them.
+ if (Section == "llvm.metadata") return false;
+ // Do not instrument globals from special LLVM sections.
+ if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false;
+
+ // Do not instrument function pointers to initialization and termination
+ // routines: dynamic linker will not properly handle redzones.
+ if (Section.startswith(".preinit_array") ||
+ Section.startswith(".init_array") ||
+ Section.startswith(".fini_array")) {
+ return false;
+ }
+
+ // On COFF, if the section name contains '$', it is highly likely that the
+ // user is using section sorting to create an array of globals similar to
+ // the way initialization callbacks are registered in .init_array and
+ // .CRT$XCU. The ATL also registers things in .ATL$__[azm]. Adding redzones
+ // to such globals is counterproductive, because the intent is that they
+ // will form an array, and out-of-bounds accesses are expected.
+ // See https://github.com/google/sanitizers/issues/305
+ // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx
+ if (TargetTriple.isOSBinFormatCOFF() && Section.contains('$')) {
+ LLVM_DEBUG(dbgs() << "Ignoring global in sorted section (contains '$'): "
+ << *G << "\n");
+ return false;
+ }
+
+ if (TargetTriple.isOSBinFormatMachO()) {
+ StringRef ParsedSegment, ParsedSection;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
+ Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
+ assert(ErrorCode.empty() && "Invalid section specifier.");
+
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if (ParsedSegment == "__OBJC" ||
+ (ParsedSegment == "__DATA" && ParsedSection.startswith("__objc_"))) {
+ LLVM_DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");
+ return false;
+ }
+ // See https://github.com/google/sanitizers/issues/32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (ParsedSegment == "__DATA" && ParsedSection == "__cfstring") {
+ LLVM_DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n");
+ return false;
+ }
+ // The linker merges the contents of cstring_literals and removes the
+ // trailing zeroes.
+ if (ParsedSegment == "__TEXT" && (TAA & MachO::S_CSTRING_LITERALS)) {
+ LLVM_DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+// On Mach-O platforms, we emit global metadata in a separate section of the
+// binary in order to allow the linker to properly dead strip. This is only
+// supported on recent versions of ld64.
+bool ModuleAddressSanitizer::ShouldUseMachOGlobalsSection() const {
+ if (!TargetTriple.isOSBinFormatMachO())
+ return false;
+
+ if (TargetTriple.isMacOSX() && !TargetTriple.isMacOSXVersionLT(10, 11))
+ return true;
+ if (TargetTriple.isiOS() /* or tvOS */ && !TargetTriple.isOSVersionLT(9))
+ return true;
+ if (TargetTriple.isWatchOS() && !TargetTriple.isOSVersionLT(2))
+ return true;
+
+ return false;
+}
+
+StringRef ModuleAddressSanitizer::getGlobalMetadataSection() const {
+ switch (TargetTriple.getObjectFormat()) {
+ case Triple::COFF: return ".ASAN$GL";
+ case Triple::ELF: return "asan_globals";
+ case Triple::MachO: return "__DATA,__asan_globals,regular";
+ case Triple::Wasm:
+ case Triple::XCOFF:
+ report_fatal_error(
+ "ModuleAddressSanitizer not implemented for object file format.");
+ case Triple::UnknownObjectFormat:
+ break;
+ }
+ llvm_unreachable("unsupported object format");
+}
+
+void ModuleAddressSanitizer::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+
+ // Declare our poisoning and unpoisoning functions.
+ AsanPoisonGlobals =
+ M.getOrInsertFunction(kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy);
+ AsanUnpoisonGlobals =
+ M.getOrInsertFunction(kAsanUnpoisonGlobalsName, IRB.getVoidTy());
+
+ // Declare functions that register/unregister globals.
+ AsanRegisterGlobals = M.getOrInsertFunction(
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanUnregisterGlobals = M.getOrInsertFunction(
+ kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+
+ // Declare the functions that find globals in a shared object and then invoke
+ // the (un)register function on them.
+ AsanRegisterImageGlobals = M.getOrInsertFunction(
+ kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+ AsanUnregisterImageGlobals = M.getOrInsertFunction(
+ kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy);
+
+ AsanRegisterElfGlobals =
+ M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy);
+ AsanUnregisterElfGlobals =
+ M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, IntptrTy);
+}
+
+// Put the metadata and the instrumented global in the same group. This ensures
+// that the metadata is discarded if the instrumented global is discarded.
+void ModuleAddressSanitizer::SetComdatForGlobalMetadata(
+ GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {
+ Module &M = *G->getParent();
+ Comdat *C = G->getComdat();
+ if (!C) {
+ if (!G->hasName()) {
+ // If G is unnamed, it must be internal. Give it an artificial name
+ // so we can put it in a comdat.
+ assert(G->hasLocalLinkage());
+ G->setName(Twine(kAsanGenPrefix) + "_anon_global");
+ }
+
+ if (!InternalSuffix.empty() && G->hasLocalLinkage()) {
+ std::string Name = G->getName();
+ Name += InternalSuffix;
+ C = M.getOrInsertComdat(Name);
+ } else {
+ C = M.getOrInsertComdat(G->getName());
+ }
+
+ // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. Also upgrade private
+ // linkage to internal linkage so that a symbol table entry is emitted. This
+ // is necessary in order to create the comdat group.
+ if (TargetTriple.isOSBinFormatCOFF()) {
+ C->setSelectionKind(Comdat::NoDuplicates);
+ if (G->hasPrivateLinkage())
+ G->setLinkage(GlobalValue::InternalLinkage);
+ }
+ G->setComdat(C);
+ }
+
+ assert(G->hasComdat());
+ Metadata->setComdat(G->getComdat());
+}
+
+// Create a separate metadata global and put it in the appropriate ASan
+// global registration section.
+GlobalVariable *
+ModuleAddressSanitizer::CreateMetadataGlobal(Module &M, Constant *Initializer,
+ StringRef OriginalName) {
+ auto Linkage = TargetTriple.isOSBinFormatMachO()
+ ? GlobalVariable::InternalLinkage
+ : GlobalVariable::PrivateLinkage;
+ GlobalVariable *Metadata = new GlobalVariable(
+ M, Initializer->getType(), false, Linkage, Initializer,
+ Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName));
+ Metadata->setSection(getGlobalMetadataSection());
+ return Metadata;
+}
+
+IRBuilder<> ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
+ AsanDtorFunction =
+ Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+
+ return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsCOFF(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+ auto &DL = M.getDataLayout();
+
+ for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+ Constant *Initializer = MetadataInitializers[i];
+ GlobalVariable *G = ExtendedGlobals[i];
+ GlobalVariable *Metadata =
+ CreateMetadataGlobal(M, Initializer, G->getName());
+
+ // The MSVC linker always inserts padding when linking incrementally. We
+ // cope with that by aligning each struct to its size, which must be a power
+ // of two.
+ unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(Initializer->getType());
+ assert(isPowerOf2_32(SizeOfGlobalStruct) &&
+ "global metadata will not be padded appropriately");
+ Metadata->setAlignment(assumeAligned(SizeOfGlobalStruct));
+
+ SetComdatForGlobalMetadata(G, Metadata, "");
+ }
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsELF(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers,
+ const std::string &UniqueModuleId) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+ SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size());
+ for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+ GlobalVariable *G = ExtendedGlobals[i];
+ GlobalVariable *Metadata =
+ CreateMetadataGlobal(M, MetadataInitializers[i], G->getName());
+ MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G));
+ Metadata->setMetadata(LLVMContext::MD_associated, MD);
+ MetadataGlobals[i] = Metadata;
+
+ SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
+ }
+
+ // Update llvm.compiler.used, adding the new metadata globals. This is
+ // needed so that during LTO these variables stay alive.
+ if (!MetadataGlobals.empty())
+ appendToCompilerUsed(M, MetadataGlobals);
+
+ // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+ // to look up the loaded image that contains it. Second, we can store in it
+ // whether registration has already occurred, to prevent duplicate
+ // registration.
+ //
+ // Common linkage ensures that there is only one global per shared library.
+ GlobalVariable *RegisteredFlag = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::CommonLinkage,
+ ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+ RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+ // Create start and stop symbols.
+ GlobalVariable *StartELFMetadata = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__start_" + getGlobalMetadataSection());
+ StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+ GlobalVariable *StopELFMetadata = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+ "__stop_" + getGlobalMetadataSection());
+ StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+
+ // Create a call to register the globals with the runtime.
+ IRB.CreateCall(AsanRegisterElfGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+ IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+ IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+
+ // We also need to unregister globals at the end, e.g., when a shared library
+ // gets closed.
+ IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+ IRB_Dtor.CreateCall(AsanUnregisterElfGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy),
+ IRB.CreatePointerCast(StartELFMetadata, IntptrTy),
+ IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsMachO(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+
+ // On recent Mach-O platforms, use a structure which binds the liveness of
+ // the global variable to the metadata struct. Keep the list of "Liveness" GV
+ // created to be added to llvm.compiler.used
+ StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy);
+ SmallVector<GlobalValue *, 16> LivenessGlobals(ExtendedGlobals.size());
+
+ for (size_t i = 0; i < ExtendedGlobals.size(); i++) {
+ Constant *Initializer = MetadataInitializers[i];
+ GlobalVariable *G = ExtendedGlobals[i];
+ GlobalVariable *Metadata =
+ CreateMetadataGlobal(M, Initializer, G->getName());
+
+ // On recent Mach-O platforms, we emit the global metadata in a way that
+ // allows the linker to properly strip dead globals.
+ auto LivenessBinder =
+ ConstantStruct::get(LivenessTy, Initializer->getAggregateElement(0u),
+ ConstantExpr::getPointerCast(Metadata, IntptrTy));
+ GlobalVariable *Liveness = new GlobalVariable(
+ M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder,
+ Twine("__asan_binder_") + G->getName());
+ Liveness->setSection("__DATA,__asan_liveness,regular,live_support");
+ LivenessGlobals[i] = Liveness;
+ }
+
+ // Update llvm.compiler.used, adding the new liveness globals. This is
+ // needed so that during LTO these variables stay alive. The alternative
+ // would be to have the linker handling the LTO symbols, but libLTO
+ // current API does not expose access to the section for each symbol.
+ if (!LivenessGlobals.empty())
+ appendToCompilerUsed(M, LivenessGlobals);
+
+ // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
+ // to look up the loaded image that contains it. Second, we can store in it
+ // whether registration has already occurred, to prevent duplicate
+ // registration.
+ //
+ // common linkage ensures that there is only one global per shared library.
+ GlobalVariable *RegisteredFlag = new GlobalVariable(
+ M, IntptrTy, false, GlobalVariable::CommonLinkage,
+ ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+ RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
+
+ IRB.CreateCall(AsanRegisterImageGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
+
+ // We also need to unregister globals at the end, e.g., when a shared library
+ // gets closed.
+ IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+ IRB_Dtor.CreateCall(AsanUnregisterImageGlobals,
+ {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
+}
+
+void ModuleAddressSanitizer::InstrumentGlobalsWithMetadataArray(
+ IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals,
+ ArrayRef<Constant *> MetadataInitializers) {
+ assert(ExtendedGlobals.size() == MetadataInitializers.size());
+ unsigned N = ExtendedGlobals.size();
+ assert(N > 0);
+
+ // On platforms that don't have a custom metadata section, we emit an array
+ // of global metadata structures.
+ ArrayType *ArrayOfGlobalStructTy =
+ ArrayType::get(MetadataInitializers[0]->getType(), N);
+ auto AllGlobals = new GlobalVariable(
+ M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
+ ConstantArray::get(ArrayOfGlobalStructTy, MetadataInitializers), "");
+ if (Mapping.Scale > 3)
+ AllGlobals->setAlignment(Align(1ULL << Mapping.Scale));
+
+ IRB.CreateCall(AsanRegisterGlobals,
+ {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, N)});
+
+ // We also need to unregister globals at the end, e.g., when a shared library
+ // gets closed.
+ IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M);
+ IRB_Dtor.CreateCall(AsanUnregisterGlobals,
+ {IRB.CreatePointerCast(AllGlobals, IntptrTy),
+ ConstantInt::get(IntptrTy, N)});
+}
+
+// This function replaces all global variables with new variables that have
+// trailing redzones. It also creates a function that poisons
+// redzones and inserts this function into llvm.global_ctors.
+// Sets *CtorComdat to true if the global registration code emitted into the
+// asan constructor is comdat-compatible.
+bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
+ bool *CtorComdat) {
+ *CtorComdat = false;
+
+ SmallVector<GlobalVariable *, 16> GlobalsToChange;
+
+ for (auto &G : M.globals()) {
+ if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G);
+ }
+
+ size_t n = GlobalsToChange.size();
+ if (n == 0) {
+ *CtorComdat = true;
+ return false;
+ }
+
+ auto &DL = M.getDataLayout();
+
+ // A global is described by a structure
+ // size_t beg;
+ // size_t size;
+ // size_t size_with_redzone;
+ // const char *name;
+ // const char *module_name;
+ // size_t has_dynamic_init;
+ // void *source_location;
+ // size_t odr_indicator;
+ // We initialize an array of such structures and pass it to a run-time call.
+ StructType *GlobalStructTy =
+ StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
+ IntptrTy, IntptrTy, IntptrTy);
+ SmallVector<GlobalVariable *, 16> NewGlobals(n);
+ SmallVector<Constant *, 16> Initializers(n);
+
+ bool HasDynamicallyInitializedGlobals = false;
+
+ // We shouldn't merge same module names, as this string serves as unique
+ // module ID in runtime.
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier(), /*AllowMerging*/ false, kAsanGenPrefix);
+
+ for (size_t i = 0; i < n; i++) {
+ static const uint64_t kMaxGlobalRedzone = 1 << 18;
+ GlobalVariable *G = GlobalsToChange[i];
+
+ // FIXME: Metadata should be attched directly to the global directly instead
+ // of being added to llvm.asan.globals.
+ auto MD = GlobalsMD.get(G);
+ StringRef NameForGlobal = G->getName();
+ // Create string holding the global name (use global name from metadata
+ // if it's available, otherwise just write the name of global variable).
+ GlobalVariable *Name = createPrivateGlobalForString(
+ M, MD.Name.empty() ? NameForGlobal : MD.Name,
+ /*AllowMerging*/ true, kAsanGenPrefix);
+
+ Type *Ty = G->getValueType();
+ uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
+ uint64_t MinRZ = MinRedzoneSizeForGlobal();
+ // MinRZ <= RZ <= kMaxGlobalRedzone
+ // and trying to make RZ to be ~ 1/4 of SizeInBytes.
+ uint64_t RZ = std::max(
+ MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ));
+ uint64_t RightRedzoneSize = RZ;
+ // Round up to MinRZ
+ if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+ assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
+ Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+
+ StructType *NewTy = StructType::get(Ty, RightRedZoneTy);
+ Constant *NewInitializer = ConstantStruct::get(
+ NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy));
+
+ // Create a new global variable with enough space for a redzone.
+ GlobalValue::LinkageTypes Linkage = G->getLinkage();
+ if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage)
+ Linkage = GlobalValue::InternalLinkage;
+ GlobalVariable *NewGlobal =
+ new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
+ "", G, G->getThreadLocalMode());
+ NewGlobal->copyAttributesFrom(G);
+ NewGlobal->setComdat(G->getComdat());
+ NewGlobal->setAlignment(MaybeAlign(MinRZ));
+ // Don't fold globals with redzones. ODR violation detector and redzone
+ // poisoning implicitly creates a dependence on the global's address, so it
+ // is no longer valid for it to be marked unnamed_addr.
+ NewGlobal->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
+ // Move null-terminated C strings to "__asan_cstring" section on Darwin.
+ if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() &&
+ G->isConstant()) {
+ auto Seq = dyn_cast<ConstantDataSequential>(G->getInitializer());
+ if (Seq && Seq->isCString())
+ NewGlobal->setSection("__TEXT,__asan_cstring,regular");
+ }
+
+ // Transfer the debug info. The payload starts at offset zero so we can
+ // copy the debug info over as is.
+ SmallVector<DIGlobalVariableExpression *, 1> GVs;
+ G->getDebugInfo(GVs);
+ for (auto *GV : GVs)
+ NewGlobal->addDebugInfo(GV);
+
+ Value *Indices2[2];
+ Indices2[0] = IRB.getInt32(0);
+ Indices2[1] = IRB.getInt32(0);
+
+ G->replaceAllUsesWith(
+ ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
+ NewGlobal->takeName(G);
+ G->eraseFromParent();
+ NewGlobals[i] = NewGlobal;
+
+ Constant *SourceLoc;
+ if (!MD.SourceLoc.empty()) {
+ auto SourceLocGlobal = createPrivateGlobalForSourceLoc(M, MD.SourceLoc);
+ SourceLoc = ConstantExpr::getPointerCast(SourceLocGlobal, IntptrTy);
+ } else {
+ SourceLoc = ConstantInt::get(IntptrTy, 0);
+ }
+
+ Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy());
+ GlobalValue *InstrumentedGlobal = NewGlobal;
+
+ bool CanUsePrivateAliases =
+ TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() ||
+ TargetTriple.isOSBinFormatWasm();
+ if (CanUsePrivateAliases && UsePrivateAlias) {
+ // Create local alias for NewGlobal to avoid crash on ODR between
+ // instrumented and non-instrumented libraries.
+ InstrumentedGlobal =
+ GlobalAlias::create(GlobalValue::PrivateLinkage, "", NewGlobal);
+ }
+
+ // ODR should not happen for local linkage.
+ if (NewGlobal->hasLocalLinkage()) {
+ ODRIndicator = ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, -1),
+ IRB.getInt8PtrTy());
+ } else if (UseOdrIndicator) {
+ // With local aliases, we need to provide another externally visible
+ // symbol __odr_asan_XXX to detect ODR violation.
+ auto *ODRIndicatorSym =
+ new GlobalVariable(M, IRB.getInt8Ty(), false, Linkage,
+ Constant::getNullValue(IRB.getInt8Ty()),
+ kODRGenPrefix + NameForGlobal, nullptr,
+ NewGlobal->getThreadLocalMode());
+
+ // Set meaningful attributes for indicator symbol.
+ ODRIndicatorSym->setVisibility(NewGlobal->getVisibility());
+ ODRIndicatorSym->setDLLStorageClass(NewGlobal->getDLLStorageClass());
+ ODRIndicatorSym->setAlignment(Align::None());
+ ODRIndicator = ODRIndicatorSym;
+ }
+
+ Constant *Initializer = ConstantStruct::get(
+ GlobalStructTy,
+ ConstantExpr::getPointerCast(InstrumentedGlobal, IntptrTy),
+ ConstantInt::get(IntptrTy, SizeInBytes),
+ ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
+ ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
+ ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc,
+ ConstantExpr::getPointerCast(ODRIndicator, IntptrTy));
+
+ if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
+
+ LLVM_DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
+
+ Initializers[i] = Initializer;
+ }
+
+ // Add instrumented globals to llvm.compiler.used list to avoid LTO from
+ // ConstantMerge'ing them.
+ SmallVector<GlobalValue *, 16> GlobalsToAddToUsedList;
+ for (size_t i = 0; i < n; i++) {
+ GlobalVariable *G = NewGlobals[i];
+ if (G->getName().empty()) continue;
+ GlobalsToAddToUsedList.push_back(G);
+ }
+ appendToCompilerUsed(M, ArrayRef<GlobalValue *>(GlobalsToAddToUsedList));
+
+ std::string ELFUniqueModuleId =
+ (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M)
+ : "";
+
+ if (!ELFUniqueModuleId.empty()) {
+ InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId);
+ *CtorComdat = true;
+ } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
+ InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
+ } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
+ InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
+ } else {
+ InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
+ }
+
+ // Create calls for poisoning before initializers run and unpoisoning after.
+ if (HasDynamicallyInitializedGlobals)
+ createInitializerPoisonCalls(M, ModuleName);
+
+ LLVM_DEBUG(dbgs() << M);
+ return true;
+}
+
+int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
+ int LongSize = M.getDataLayout().getPointerSizeInBits();
+ bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
+ int Version = 8;
+ // 32-bit Android is one version ahead because of the switch to dynamic
+ // shadow.
+ Version += (LongSize == 32 && isAndroid);
+ return Version;
+}
+
+bool ModuleAddressSanitizer::instrumentModule(Module &M) {
+ initializeCallbacks(M);
+
+ if (CompileKernel)
+ return false;
+
+ // Create a module constructor. A destructor is created lazily because not all
+ // platforms, and not all modules need it.
+ std::string AsanVersion = std::to_string(GetAsanVersion(M));
+ std::string VersionCheckName =
+ ClInsertVersionCheck ? (kAsanVersionCheckNamePrefix + AsanVersion) : "";
+ std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{}, VersionCheckName);
+
+ bool CtorComdat = true;
+ // TODO(glider): temporarily disabled globals instrumentation for KASan.
+ if (ClGlobals) {
+ IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
+ InstrumentGlobals(IRB, M, &CtorComdat);
+ }
+
+ const uint64_t Priority = GetCtorAndDtorPriority(TargetTriple);
+
+ // Put the constructor and destructor in comdat if both
+ // (1) global instrumentation is not TU-specific
+ // (2) target is ELF.
+ if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) {
+ AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName));
+ appendToGlobalCtors(M, AsanCtorFunction, Priority, AsanCtorFunction);
+ if (AsanDtorFunction) {
+ AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName));
+ appendToGlobalDtors(M, AsanDtorFunction, Priority, AsanDtorFunction);
+ }
+ } else {
+ appendToGlobalCtors(M, AsanCtorFunction, Priority);
+ if (AsanDtorFunction)
+ appendToGlobalDtors(M, AsanDtorFunction, Priority);
+ }
+
+ return true;
+}
+
+void AddressSanitizer::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+ // Create __asan_report* callbacks.
+ // IsWrite, TypeSize and Exp are encoded in the function name.
+ for (int Exp = 0; Exp < 2; Exp++) {
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ const std::string TypeStr = AccessIsWrite ? "store" : "load";
+ const std::string ExpStr = Exp ? "exp_" : "";
+ const std::string EndingStr = Recover ? "_noabort" : "";
+
+ SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
+ SmallVector<Type *, 2> Args1{1, IntptrTy};
+ if (Exp) {
+ Type *ExpType = Type::getInt32Ty(*C);
+ Args2.push_back(ExpType);
+ Args1.push_back(ExpType);
+ }
+ AsanErrorCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + TypeStr + "_n" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
+
+ AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false));
+
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
+ AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
+
+ AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args1, false));
+ }
+ }
+ }
+
+ const std::string MemIntrinCallbackPrefix =
+ CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+ AsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ AsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ AsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+
+ AsanHandleNoReturnFunc =
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy());
+
+ AsanPtrCmpFunction =
+ M.getOrInsertFunction(kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanPtrSubFunction =
+ M.getOrInsertFunction(kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ // We insert an empty inline asm after __asan_report* to avoid callback merge.
+ EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+ StringRef(""), StringRef(""),
+ /*hasSideEffects=*/true);
+ if (Mapping.InGlobal)
+ AsanShadowGlobal = M.getOrInsertGlobal("__asan_shadow",
+ ArrayType::get(IRB.getInt8Ty(), 0));
+}
+
+bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
+ // For each NSObject descendant having a +load method, this method is invoked
+ // by the ObjC runtime before any of the static constructors is called.
+ // Therefore we need to instrument such methods with a call to __asan_init
+ // at the beginning in order to initialize our runtime before any access to
+ // the shadow memory.
+ // We cannot just ignore these methods, because they may call other
+ // instrumented functions.
+ if (F.getName().find(" load]") != std::string::npos) {
+ FunctionCallee AsanInitFunction =
+ declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
+ IRBuilder<> IRB(&F.front(), F.front().begin());
+ IRB.CreateCall(AsanInitFunction, {});
+ return true;
+ }
+ return false;
+}
+
+void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
+ // Generate code only when dynamic addressing is needed.
+ if (Mapping.Offset != kDynamicShadowSentinel)
+ return;
+
+ IRBuilder<> IRB(&F.front().front());
+ if (Mapping.InGlobal) {
+ if (ClWithIfuncSuppressRemat) {
+ // An empty inline asm with input reg == output reg.
+ // An opaque pointer-to-int cast, basically.
+ InlineAsm *Asm = InlineAsm::get(
+ FunctionType::get(IntptrTy, {AsanShadowGlobal->getType()}, false),
+ StringRef(""), StringRef("=r,0"),
+ /*hasSideEffects=*/false);
+ LocalDynamicShadow =
+ IRB.CreateCall(Asm, {AsanShadowGlobal}, ".asan.shadow");
+ } else {
+ LocalDynamicShadow =
+ IRB.CreatePointerCast(AsanShadowGlobal, IntptrTy, ".asan.shadow");
+ }
+ } else {
+ Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
+ kAsanShadowMemoryDynamicAddress, IntptrTy);
+ LocalDynamicShadow = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
+ }
+}
+
+void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
+ // Find the one possible call to llvm.localescape and pre-mark allocas passed
+ // to it as uninteresting. This assumes we haven't started processing allocas
+ // yet. This check is done up front because iterating the use list in
+ // isInterestingAlloca would be algorithmically slower.
+ assert(ProcessedAllocas.empty() && "must process localescape before allocas");
+
+ // Try to get the declaration of llvm.localescape. If it's not in the module,
+ // we can exit early.
+ if (!F.getParent()->getFunction("llvm.localescape")) return;
+
+ // Look for a call to llvm.localescape call in the entry block. It can't be in
+ // any other block.
+ for (Instruction &I : F.getEntryBlock()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::localescape) {
+ // We found a call. Mark all the allocas passed in as uninteresting.
+ for (Value *Arg : II->arg_operands()) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+ assert(AI && AI->isStaticAlloca() &&
+ "non-static alloca arg to localescape");
+ ProcessedAllocas[AI] = false;
+ }
+ break;
+ }
+ }
+}
+
+bool AddressSanitizer::instrumentFunction(Function &F,
+ const TargetLibraryInfo *TLI) {
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
+ if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
+ if (F.getName().startswith("__asan_")) return false;
+
+ bool FunctionModified = false;
+
+ // If needed, insert __asan_init before checking for SanitizeAddress attr.
+ // This function needs to be called even if the function body is not
+ // instrumented.
+ if (maybeInsertAsanInitAtFunctionEntry(F))
+ FunctionModified = true;
+
+ // Leave if the function doesn't need instrumentation.
+ if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
+
+ LLVM_DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
+
+ initializeCallbacks(*F.getParent());
+
+ FunctionStateRAII CleanupObj(this);
+
+ maybeInsertDynamicShadowAtFunctionEntry(F);
+
+ // We can't instrument allocas used with llvm.localescape. Only static allocas
+ // can be passed to that intrinsic.
+ markEscapedLocalAllocas(F);
+
+ // We want to instrument every address only once per basic block (unless there
+ // are calls between uses).
+ SmallPtrSet<Value *, 16> TempsToInstrument;
+ SmallVector<Instruction *, 16> ToInstrument;
+ SmallVector<Instruction *, 8> NoReturnCalls;
+ SmallVector<BasicBlock *, 16> AllBlocks;
+ SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
+ int NumAllocas = 0;
+ bool IsWrite;
+ unsigned Alignment;
+ uint64_t TypeSize;
+
+ // Fill the set of memory operations to instrument.
+ for (auto &BB : F) {
+ AllBlocks.push_back(&BB);
+ TempsToInstrument.clear();
+ int NumInsnsPerBB = 0;
+ for (auto &Inst : BB) {
+ if (LooksLikeCodeInBug11395(&Inst)) return false;
+ Value *MaybeMask = nullptr;
+ if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
+ &Alignment, &MaybeMask)) {
+ if (ClOpt && ClOptSameTemp) {
+ // If we have a mask, skip instrumentation if we've already
+ // instrumented the full object. But don't add to TempsToInstrument
+ // because we might get another load/store with a different mask.
+ if (MaybeMask) {
+ if (TempsToInstrument.count(Addr))
+ continue; // We've seen this (whole) temp in the current BB.
+ } else {
+ if (!TempsToInstrument.insert(Addr).second)
+ continue; // We've seen this temp in the current BB.
+ }
+ }
+ } else if (((ClInvalidPointerPairs || ClInvalidPointerCmp) &&
+ isInterestingPointerComparison(&Inst)) ||
+ ((ClInvalidPointerPairs || ClInvalidPointerSub) &&
+ isInterestingPointerSubtraction(&Inst))) {
+ PointerComparisonsOrSubtracts.push_back(&Inst);
+ continue;
+ } else if (isa<MemIntrinsic>(Inst)) {
+ // ok, take it.
+ } else {
+ if (isa<AllocaInst>(Inst)) NumAllocas++;
+ CallSite CS(&Inst);
+ if (CS) {
+ // A call inside BB.
+ TempsToInstrument.clear();
+ if (CS.doesNotReturn() && !CS->hasMetadata("nosanitize"))
+ NoReturnCalls.push_back(CS.getInstruction());
+ }
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+ maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
+ continue;
+ }
+ ToInstrument.push_back(&Inst);
+ NumInsnsPerBB++;
+ if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break;
+ }
+ }
+
+ bool UseCalls =
+ (ClInstrumentationWithCallsThreshold >= 0 &&
+ ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ ObjectSizeOpts ObjSizeOpts;
+ ObjSizeOpts.RoundToAlign = true;
+ ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), ObjSizeOpts);
+
+ // Instrument.
+ int NumInstrumented = 0;
+ for (auto Inst : ToInstrument) {
+ if (ClDebugMin < 0 || ClDebugMax < 0 ||
+ (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
+ if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment))
+ instrumentMop(ObjSizeVis, Inst, UseCalls,
+ F.getParent()->getDataLayout());
+ else
+ instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+ }
+ NumInstrumented++;
+ }
+
+ FunctionStackPoisoner FSP(F, *this);
+ bool ChangedStack = FSP.runOnFunction();
+
+ // We must unpoison the stack before NoReturn calls (throw, _exit, etc).
+ // See e.g. https://github.com/google/sanitizers/issues/37
+ for (auto CI : NoReturnCalls) {
+ IRBuilder<> IRB(CI);
+ IRB.CreateCall(AsanHandleNoReturnFunc, {});
+ }
+
+ for (auto Inst : PointerComparisonsOrSubtracts) {
+ instrumentPointerComparisonOrSubtraction(Inst);
+ NumInstrumented++;
+ }
+
+ if (NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty())
+ FunctionModified = true;
+
+ LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " "
+ << F << "\n");
+
+ return FunctionModified;
+}
+
+// Workaround for bug 11395: we don't want to instrument stack in functions
+// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
+// FIXME: remove once the bug 11395 is fixed.
+bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
+ if (LongSize != 32) return false;
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (!CI || !CI->isInlineAsm()) return false;
+ if (CI->getNumArgOperands() <= 5) return false;
+ // We have inline assembly with quite a few arguments.
+ return true;
+}
+
+void FunctionStackPoisoner::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+ for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
+ std::string Suffix = itostr(i);
+ AsanStackMallocFunc[i] = M.getOrInsertFunction(
+ kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy);
+ AsanStackFreeFunc[i] =
+ M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
+ IRB.getVoidTy(), IntptrTy, IntptrTy);
+ }
+ if (ASan.UseAfterScope) {
+ AsanPoisonStackMemoryFunc = M.getOrInsertFunction(
+ kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanUnpoisonStackMemoryFunc = M.getOrInsertFunction(
+ kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ }
+
+ for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
+ std::ostringstream Name;
+ Name << kAsanSetShadowPrefix;
+ Name << std::setw(2) << std::setfill('0') << std::hex << Val;
+ AsanSetShadowFunc[Val] =
+ M.getOrInsertFunction(Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy);
+ }
+
+ AsanAllocaPoisonFunc = M.getOrInsertFunction(
+ kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+ AsanAllocasUnpoisonFunc = M.getOrInsertFunction(
+ kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy);
+}
+
+void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
+ ArrayRef<uint8_t> ShadowBytes,
+ size_t Begin, size_t End,
+ IRBuilder<> &IRB,
+ Value *ShadowBase) {
+ if (Begin >= End)
+ return;
+
+ const size_t LargestStoreSizeInBytes =
+ std::min<size_t>(sizeof(uint64_t), ASan.LongSize / 8);
+
+ const bool IsLittleEndian = F.getParent()->getDataLayout().isLittleEndian();
+
+ // Poison given range in shadow using larges store size with out leading and
+ // trailing zeros in ShadowMask. Zeros never change, so they need neither
+ // poisoning nor up-poisoning. Still we don't mind if some of them get into a
+ // middle of a store.
+ for (size_t i = Begin; i < End;) {
+ if (!ShadowMask[i]) {
+ assert(!ShadowBytes[i]);
+ ++i;
+ continue;
+ }
+
+ size_t StoreSizeInBytes = LargestStoreSizeInBytes;
+ // Fit store size into the range.
+ while (StoreSizeInBytes > End - i)
+ StoreSizeInBytes /= 2;
+
+ // Minimize store size by trimming trailing zeros.
+ for (size_t j = StoreSizeInBytes - 1; j && !ShadowMask[i + j]; --j) {
+ while (j <= StoreSizeInBytes / 2)
+ StoreSizeInBytes /= 2;
+ }
+
+ uint64_t Val = 0;
+ for (size_t j = 0; j < StoreSizeInBytes; j++) {
+ if (IsLittleEndian)
+ Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
+ else
+ Val = (Val << 8) | ShadowBytes[i + j];
+ }
+
+ Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+ Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val);
+ IRB.CreateAlignedStore(
+ Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()), 1);
+
+ i += StoreSizeInBytes;
+ }
+}
+
+void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
+ ArrayRef<uint8_t> ShadowBytes,
+ IRBuilder<> &IRB, Value *ShadowBase) {
+ copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase);
+}
+
+void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
+ ArrayRef<uint8_t> ShadowBytes,
+ size_t Begin, size_t End,
+ IRBuilder<> &IRB, Value *ShadowBase) {
+ assert(ShadowMask.size() == ShadowBytes.size());
+ size_t Done = Begin;
+ for (size_t i = Begin, j = Begin + 1; i < End; i = j++) {
+ if (!ShadowMask[i]) {
+ assert(!ShadowBytes[i]);
+ continue;
+ }
+ uint8_t Val = ShadowBytes[i];
+ if (!AsanSetShadowFunc[Val])
+ continue;
+
+ // Skip same values.
+ for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) {
+ }
+
+ if (j - i >= ClMaxInlinePoisoningSize) {
+ copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase);
+ IRB.CreateCall(AsanSetShadowFunc[Val],
+ {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)),
+ ConstantInt::get(IntptrTy, j - i)});
+ Done = j;
+ }
+ }
+
+ copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
+}
+
+// Fake stack allocator (asan_fake_stack.h) has 11 size classes
+// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
+static int StackMallocSizeClass(uint64_t LocalStackSize) {
+ assert(LocalStackSize <= kMaxStackMallocSize);
+ uint64_t MaxSize = kMinStackMallocSize;
+ for (int i = 0;; i++, MaxSize *= 2)
+ if (LocalStackSize <= MaxSize) return i;
+ llvm_unreachable("impossible LocalStackSize");
+}
+
+void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
+ Instruction *CopyInsertPoint = &F.front().front();
+ if (CopyInsertPoint == ASan.LocalDynamicShadow) {
+ // Insert after the dynamic shadow location is determined
+ CopyInsertPoint = CopyInsertPoint->getNextNode();
+ assert(CopyInsertPoint);
+ }
+ IRBuilder<> IRB(CopyInsertPoint);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasByValAttr()) {
+ Type *Ty = Arg.getType()->getPointerElementType();
+ unsigned Alignment = Arg.getParamAlignment();
+ if (Alignment == 0)
+ Alignment = DL.getABITypeAlignment(Ty);
+
+ AllocaInst *AI = IRB.CreateAlloca(
+ Ty, nullptr,
+ (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
+ ".byval");
+ AI->setAlignment(Align(Alignment));
+ Arg.replaceAllUsesWith(AI);
+
+ uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+ IRB.CreateMemCpy(AI, Alignment, &Arg, Alignment, AllocSize);
+ }
+ }
+}
+
+PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond,
+ Value *ValueIfTrue,
+ Instruction *ThenTerm,
+ Value *ValueIfFalse) {
+ PHINode *PHI = IRB.CreatePHI(IntptrTy, 2);
+ BasicBlock *CondBlock = cast<Instruction>(Cond)->getParent();
+ PHI->addIncoming(ValueIfFalse, CondBlock);
+ BasicBlock *ThenBlock = ThenTerm->getParent();
+ PHI->addIncoming(ValueIfTrue, ThenBlock);
+ return PHI;
+}
+
+Value *FunctionStackPoisoner::createAllocaForLayout(
+ IRBuilder<> &IRB, const ASanStackFrameLayout &L, bool Dynamic) {
+ AllocaInst *Alloca;
+ if (Dynamic) {
+ Alloca = IRB.CreateAlloca(IRB.getInt8Ty(),
+ ConstantInt::get(IRB.getInt64Ty(), L.FrameSize),
+ "MyAlloca");
+ } else {
+ Alloca = IRB.CreateAlloca(ArrayType::get(IRB.getInt8Ty(), L.FrameSize),
+ nullptr, "MyAlloca");
+ assert(Alloca->isStaticAlloca());
+ }
+ assert((ClRealignStack & (ClRealignStack - 1)) == 0);
+ size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
+ Alloca->setAlignment(MaybeAlign(FrameAlignment));
+ return IRB.CreatePointerCast(Alloca, IntptrTy);
+}
+
+void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
+ BasicBlock &FirstBB = *F.begin();
+ IRBuilder<> IRB(dyn_cast<Instruction>(FirstBB.begin()));
+ DynamicAllocaLayout = IRB.CreateAlloca(IntptrTy, nullptr);
+ IRB.CreateStore(Constant::getNullValue(IntptrTy), DynamicAllocaLayout);
+ DynamicAllocaLayout->setAlignment(Align(32));
+}
+
+void FunctionStackPoisoner::processDynamicAllocas() {
+ if (!ClInstrumentDynamicAllocas || DynamicAllocaVec.empty()) {
+ assert(DynamicAllocaPoisonCallVec.empty());
+ return;
+ }
+
+ // Insert poison calls for lifetime intrinsics for dynamic allocas.
+ for (const auto &APC : DynamicAllocaPoisonCallVec) {
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ assert(ASan.isInterestingAlloca(*APC.AI));
+ assert(!APC.AI->isStaticAlloca());
+
+ IRBuilder<> IRB(APC.InsBefore);
+ poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
+ // Dynamic allocas will be unpoisoned unconditionally below in
+ // unpoisonDynamicAllocas.
+ // Flag that we need unpoison static allocas.
+ }
+
+ // Handle dynamic allocas.
+ createDynamicAllocasInitStorage();
+ for (auto &AI : DynamicAllocaVec)
+ handleDynamicAllocaCall(AI);
+ unpoisonDynamicAllocas();
+}
+
+void FunctionStackPoisoner::processStaticAllocas() {
+ if (AllocaVec.empty()) {
+ assert(StaticAllocaPoisonCallVec.empty());
+ return;
+ }
+
+ int StackMallocIdx = -1;
+ DebugLoc EntryDebugLocation;
+ if (auto SP = F.getSubprogram())
+ EntryDebugLocation = DebugLoc::get(SP->getScopeLine(), 0, SP);
+
+ Instruction *InsBefore = AllocaVec[0];
+ IRBuilder<> IRB(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
+
+ // Make sure non-instrumented allocas stay in the entry block. Otherwise,
+ // debug info is broken, because only entry-block allocas are treated as
+ // regular stack slots.
+ auto InsBeforeB = InsBefore->getParent();
+ assert(InsBeforeB == &F.getEntryBlock());
+ for (auto *AI : StaticAllocasToMoveUp)
+ if (AI->getParent() == InsBeforeB)
+ AI->moveBefore(InsBefore);
+
+ // If we have a call to llvm.localescape, keep it in the entry block.
+ if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore);
+
+ SmallVector<ASanStackVariableDescription, 16> SVD;
+ SVD.reserve(AllocaVec.size());
+ for (AllocaInst *AI : AllocaVec) {
+ ASanStackVariableDescription D = {AI->getName().data(),
+ ASan.getAllocaSizeInBytes(*AI),
+ 0,
+ AI->getAlignment(),
+ AI,
+ 0,
+ 0};
+ SVD.push_back(D);
+ }
+
+ // Minimal header size (left redzone) is 4 pointers,
+ // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms.
+ size_t Granularity = 1ULL << Mapping.Scale;
+ size_t MinHeaderSize = std::max((size_t)ASan.LongSize / 2, Granularity);
+ const ASanStackFrameLayout &L =
+ ComputeASanStackFrameLayout(SVD, Granularity, MinHeaderSize);
+
+ // Build AllocaToSVDMap for ASanStackVariableDescription lookup.
+ DenseMap<const AllocaInst *, ASanStackVariableDescription *> AllocaToSVDMap;
+ for (auto &Desc : SVD)
+ AllocaToSVDMap[Desc.AI] = &Desc;
+
+ // Update SVD with information from lifetime intrinsics.
+ for (const auto &APC : StaticAllocaPoisonCallVec) {
+ assert(APC.InsBefore);
+ assert(APC.AI);
+ assert(ASan.isInterestingAlloca(*APC.AI));
+ assert(APC.AI->isStaticAlloca());
+
+ ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI];
+ Desc.LifetimeSize = Desc.Size;
+ if (const DILocation *FnLoc = EntryDebugLocation.get()) {
+ if (const DILocation *LifetimeLoc = APC.InsBefore->getDebugLoc().get()) {
+ if (LifetimeLoc->getFile() == FnLoc->getFile())
+ if (unsigned Line = LifetimeLoc->getLine())
+ Desc.Line = std::min(Desc.Line ? Desc.Line : Line, Line);
+ }
+ }
+ }
+
+ auto DescriptionString = ComputeASanStackFrameDescription(SVD);
+ LLVM_DEBUG(dbgs() << DescriptionString << " --- " << L.FrameSize << "\n");
+ uint64_t LocalStackSize = L.FrameSize;
+ bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
+ LocalStackSize <= kMaxStackMallocSize;
+ bool DoDynamicAlloca = ClDynamicAllocaStack;
+ // Don't do dynamic alloca or stack malloc if:
+ // 1) There is inline asm: too often it makes assumptions on which registers
+ // are available.
+ // 2) There is a returns_twice call (typically setjmp), which is
+ // optimization-hostile, and doesn't play well with introduced indirect
+ // register-relative calculation of local variable addresses.
+ DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
+ DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
+
+ Value *StaticAlloca =
+ DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
+
+ Value *FakeStack;
+ Value *LocalStackBase;
+ Value *LocalStackBaseAlloca;
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
+
+ if (DoStackMalloc) {
+ LocalStackBaseAlloca =
+ IRB.CreateAlloca(IntptrTy, nullptr, "asan_local_stack_base");
+ // void *FakeStack = __asan_option_detect_stack_use_after_return
+ // ? __asan_stack_malloc_N(LocalStackSize)
+ // : nullptr;
+ // void *LocalStackBase = (FakeStack) ? FakeStack : alloca(LocalStackSize);
+ Constant *OptionDetectUseAfterReturn = F.getParent()->getOrInsertGlobal(
+ kAsanOptionDetectUseAfterReturn, IRB.getInt32Ty());
+ Value *UseAfterReturnIsEnabled = IRB.CreateICmpNE(
+ IRB.CreateLoad(IRB.getInt32Ty(), OptionDetectUseAfterReturn),
+ Constant::getNullValue(IRB.getInt32Ty()));
+ Instruction *Term =
+ SplitBlockAndInsertIfThen(UseAfterReturnIsEnabled, InsBefore, false);
+ IRBuilder<> IRBIf(Term);
+ IRBIf.SetCurrentDebugLocation(EntryDebugLocation);
+ StackMallocIdx = StackMallocSizeClass(LocalStackSize);
+ assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
+ Value *FakeStackValue =
+ IRBIf.CreateCall(AsanStackMallocFunc[StackMallocIdx],
+ ConstantInt::get(IntptrTy, LocalStackSize));
+ IRB.SetInsertPoint(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
+ FakeStack = createPHI(IRB, UseAfterReturnIsEnabled, FakeStackValue, Term,
+ ConstantInt::get(IntptrTy, 0));
+
+ Value *NoFakeStack =
+ IRB.CreateICmpEQ(FakeStack, Constant::getNullValue(IntptrTy));
+ Term = SplitBlockAndInsertIfThen(NoFakeStack, InsBefore, false);
+ IRBIf.SetInsertPoint(Term);
+ IRBIf.SetCurrentDebugLocation(EntryDebugLocation);
+ Value *AllocaValue =
+ DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca;
+
+ IRB.SetInsertPoint(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
+ LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
+ IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca);
+ DIExprFlags |= DIExpression::DerefBefore;
+ } else {
+ // void *FakeStack = nullptr;
+ // void *LocalStackBase = alloca(LocalStackSize);
+ FakeStack = ConstantInt::get(IntptrTy, 0);
+ LocalStackBase =
+ DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
+ LocalStackBaseAlloca = LocalStackBase;
+ }
+
+ // Replace Alloca instructions with base+offset.
+ for (const auto &Desc : SVD) {
+ AllocaInst *AI = Desc.AI;
+ replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, DIExprFlags,
+ Desc.Offset);
+ Value *NewAllocaPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
+ AI->getType());
+ AI->replaceAllUsesWith(NewAllocaPtr);
+ }
+
+ // The left-most redzone has enough space for at least 4 pointers.
+ // Write the Magic value to redzone[0].
+ Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
+ IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
+ BasePlus0);
+ // Write the frame description constant to redzone[1].
+ Value *BasePlus1 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
+ IntptrPtrTy);
+ GlobalVariable *StackDescriptionGlobal =
+ createPrivateGlobalForString(*F.getParent(), DescriptionString,
+ /*AllowMerging*/ true, kAsanGenPrefix);
+ Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
+ IRB.CreateStore(Description, BasePlus1);
+ // Write the PC to redzone[2].
+ Value *BasePlus2 = IRB.CreateIntToPtr(
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
+ IntptrPtrTy);
+ IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
+
+ const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L);
+
+ // Poison the stack red zones at the entry.
+ Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
+ // As mask we must use most poisoned case: red zones and after scope.
+ // As bytes we can use either the same or just red zones only.
+ copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
+
+ if (!StaticAllocaPoisonCallVec.empty()) {
+ const auto &ShadowInScope = GetShadowBytes(SVD, L);
+
+ // Poison static allocas near lifetime intrinsics.
+ for (const auto &APC : StaticAllocaPoisonCallVec) {
+ const ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI];
+ assert(Desc.Offset % L.Granularity == 0);
+ size_t Begin = Desc.Offset / L.Granularity;
+ size_t End = Begin + (APC.Size + L.Granularity - 1) / L.Granularity;
+
+ IRBuilder<> IRB(APC.InsBefore);
+ copyToShadow(ShadowAfterScope,
+ APC.DoPoison ? ShadowAfterScope : ShadowInScope, Begin, End,
+ IRB, ShadowBase);
+ }
+ }
+
+ SmallVector<uint8_t, 64> ShadowClean(ShadowAfterScope.size(), 0);
+ SmallVector<uint8_t, 64> ShadowAfterReturn;
+
+ // (Un)poison the stack before all ret instructions.
+ for (auto Ret : RetVec) {
+ IRBuilder<> IRBRet(Ret);
+ // Mark the current frame as retired.
+ IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
+ BasePlus0);
+ if (DoStackMalloc) {
+ assert(StackMallocIdx >= 0);
+ // if FakeStack != 0 // LocalStackBase == FakeStack
+ // // In use-after-return mode, poison the whole stack frame.
+ // if StackMallocIdx <= 4
+ // // For small sizes inline the whole thing:
+ // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+ // **SavedFlagPtr(FakeStack) = 0
+ // else
+ // __asan_stack_free_N(FakeStack, LocalStackSize)
+ // else
+ // <This is not a fake stack; unpoison the redzones>
+ Value *Cmp =
+ IRBRet.CreateICmpNE(FakeStack, Constant::getNullValue(IntptrTy));
+ Instruction *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm);
+
+ IRBuilder<> IRBPoison(ThenTerm);
+ if (StackMallocIdx <= 4) {
+ int ClassSize = kMinStackMallocSize << StackMallocIdx;
+ ShadowAfterReturn.resize(ClassSize / L.Granularity,
+ kAsanStackUseAfterReturnMagic);
+ copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison,
+ ShadowBase);
+ Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
+ FakeStack,
+ ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
+ Value *SavedFlagPtr = IRBPoison.CreateLoad(
+ IntptrTy, IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+ IRBPoison.CreateStore(
+ Constant::getNullValue(IRBPoison.getInt8Ty()),
+ IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+ } else {
+ // For larger frames call __asan_stack_free_*.
+ IRBPoison.CreateCall(
+ AsanStackFreeFunc[StackMallocIdx],
+ {FakeStack, ConstantInt::get(IntptrTy, LocalStackSize)});
+ }
+
+ IRBuilder<> IRBElse(ElseTerm);
+ copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase);
+ } else {
+ copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase);
+ }
+ }
+
+ // We are done. Remove the old unused alloca instructions.
+ for (auto AI : AllocaVec) AI->eraseFromParent();
+}
+
+void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
+ IRBuilder<> &IRB, bool DoPoison) {
+ // For now just insert the call to ASan runtime.
+ Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
+ Value *SizeArg = ConstantInt::get(IntptrTy, Size);
+ IRB.CreateCall(
+ DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc,
+ {AddrArg, SizeArg});
+}
+
+// Handling llvm.lifetime intrinsics for a given %alloca:
+// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca.
+// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect
+// invalid accesses) and unpoison it for llvm.lifetime.start (the memory
+// could be poisoned by previous llvm.lifetime.end instruction, as the
+// variable may go in and out of scope several times, e.g. in loops).
+// (3) if we poisoned at least one %alloca in a function,
+// unpoison the whole stack frame at function exit.
+void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
+ IRBuilder<> IRB(AI);
+
+ const unsigned Align = std::max(kAllocaRzSize, AI->getAlignment());
+ const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
+
+ Value *Zero = Constant::getNullValue(IntptrTy);
+ Value *AllocaRzSize = ConstantInt::get(IntptrTy, kAllocaRzSize);
+ Value *AllocaRzMask = ConstantInt::get(IntptrTy, AllocaRedzoneMask);
+
+ // Since we need to extend alloca with additional memory to locate
+ // redzones, and OldSize is number of allocated blocks with
+ // ElementSize size, get allocated memory size in bytes by
+ // OldSize * ElementSize.
+ const unsigned ElementSize =
+ F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType());
+ Value *OldSize =
+ IRB.CreateMul(IRB.CreateIntCast(AI->getArraySize(), IntptrTy, false),
+ ConstantInt::get(IntptrTy, ElementSize));
+
+ // PartialSize = OldSize % 32
+ Value *PartialSize = IRB.CreateAnd(OldSize, AllocaRzMask);
+
+ // Misalign = kAllocaRzSize - PartialSize;
+ Value *Misalign = IRB.CreateSub(AllocaRzSize, PartialSize);
+
+ // PartialPadding = Misalign != kAllocaRzSize ? Misalign : 0;
+ Value *Cond = IRB.CreateICmpNE(Misalign, AllocaRzSize);
+ Value *PartialPadding = IRB.CreateSelect(Cond, Misalign, Zero);
+
+ // AdditionalChunkSize = Align + PartialPadding + kAllocaRzSize
+ // Align is added to locate left redzone, PartialPadding for possible
+ // partial redzone and kAllocaRzSize for right redzone respectively.
+ Value *AdditionalChunkSize = IRB.CreateAdd(
+ ConstantInt::get(IntptrTy, Align + kAllocaRzSize), PartialPadding);
+
+ Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize);
+
+ // Insert new alloca with new NewSize and Align params.
+ AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize);
+ NewAlloca->setAlignment(MaybeAlign(Align));
+
+ // NewAddress = Address + Align
+ Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
+ ConstantInt::get(IntptrTy, Align));
+
+ // Insert __asan_alloca_poison call for new created alloca.
+ IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize});
+
+ // Store the last alloca's address to DynamicAllocaLayout. We'll need this
+ // for unpoisoning stuff.
+ IRB.CreateStore(IRB.CreatePtrToInt(NewAlloca, IntptrTy), DynamicAllocaLayout);
+
+ Value *NewAddressPtr = IRB.CreateIntToPtr(NewAddress, AI->getType());
+
+ // Replace all uses of AddessReturnedByAlloca with NewAddressPtr.
+ AI->replaceAllUsesWith(NewAddressPtr);
+
+ // We are done. Erase old alloca from parent.
+ AI->eraseFromParent();
+}
+
+// isSafeAccess returns true if Addr is always inbounds with respect to its
+// base object. For example, it is a field access or an array access with
+// constant inbounds index.
+bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
+ Value *Addr, uint64_t TypeSize) const {
+ SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr);
+ if (!ObjSizeVis.bothKnown(SizeOffset)) return false;
+ uint64_t Size = SizeOffset.first.getZExtValue();
+ int64_t Offset = SizeOffset.second.getSExtValue();
+ // Three checks are required to ensure safety:
+ // . Offset >= 0 (since the offset is given from the base ptr)
+ // . Size >= Offset (unsigned)
+ // . Size - Offset >= NeededSize (unsigned)
+ return Offset >= 0 && Size >= uint64_t(Offset) &&
+ Size - uint64_t(Offset) >= TypeSize / 8;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
new file mode 100644
index 000000000000..ae34be986537
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -0,0 +1,248 @@
+//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bounds-checking"
+
+static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
+ cl::desc("Use one trap block per function"));
+
+STATISTIC(ChecksAdded, "Bounds checks added");
+STATISTIC(ChecksSkipped, "Bounds checks skipped");
+STATISTIC(ChecksUnable, "Bounds checks unable to add");
+
+using BuilderTy = IRBuilder<TargetFolder>;
+
+/// Gets the conditions under which memory accessing instructions will overflow.
+///
+/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
+/// the result from the load or the value being stored. It is used to determine
+/// the size of memory block that is touched.
+///
+/// Returns the condition under which the access will overflow.
+static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
+ const DataLayout &DL, TargetLibraryInfo &TLI,
+ ObjectSizeOffsetEvaluator &ObjSizeEval,
+ BuilderTy &IRB, ScalarEvolution &SE) {
+ uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
+ LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
+ << " bytes\n");
+
+ SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr);
+
+ if (!ObjSizeEval.bothKnown(SizeOffset)) {
+ ++ChecksUnable;
+ return nullptr;
+ }
+
+ Value *Size = SizeOffset.first;
+ Value *Offset = SizeOffset.second;
+ ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
+
+ Type *IntTy = DL.getIntPtrType(Ptr->getType());
+ Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
+
+ auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
+ auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
+ auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
+
+ // three checks are required to ensure safety:
+ // . Offset >= 0 (since the offset is given from the base ptr)
+ // . Size >= Offset (unsigned)
+ // . Size - Offset >= NeededSize (unsigned)
+ //
+ // optimization: if Size >= 0 (signed), skip 1st check
+ // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
+ Value *ObjSize = IRB.CreateSub(Size, Offset);
+ Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
+ ? ConstantInt::getFalse(Ptr->getContext())
+ : IRB.CreateICmpULT(Size, Offset);
+ Value *Cmp3 = SizeRange.sub(OffsetRange)
+ .getUnsignedMin()
+ .uge(NeededSizeRange.getUnsignedMax())
+ ? ConstantInt::getFalse(Ptr->getContext())
+ : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
+ Value *Or = IRB.CreateOr(Cmp2, Cmp3);
+ if ((!SizeCI || SizeCI->getValue().slt(0)) &&
+ !SizeRange.getSignedMin().isNonNegative()) {
+ Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+ Or = IRB.CreateOr(Cmp1, Or);
+ }
+
+ return Or;
+}
+
+/// Adds run-time bounds checks to memory accessing instructions.
+///
+/// \p Or is the condition that should guard the trap.
+///
+/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
+template <typename GetTrapBBT>
+static void insertBoundsCheck(Value *Or, BuilderTy IRB, GetTrapBBT GetTrapBB) {
+ // check if the comparison is always false
+ ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
+ if (C) {
+ ++ChecksSkipped;
+ // If non-zero, nothing to do.
+ if (!C->getZExtValue())
+ return;
+ }
+ ++ChecksAdded;
+
+ BasicBlock::iterator SplitI = IRB.GetInsertPoint();
+ BasicBlock *OldBB = SplitI->getParent();
+ BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
+ OldBB->getTerminator()->eraseFromParent();
+
+ if (C) {
+ // If we have a constant zero, unconditionally branch.
+ // FIXME: We should really handle this differently to bypass the splitting
+ // the block.
+ BranchInst::Create(GetTrapBB(IRB), OldBB);
+ return;
+ }
+
+ // Create the conditional branch.
+ BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
+}
+
+static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI,
+ ScalarEvolution &SE) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ ObjectSizeOpts EvalOpts;
+ EvalOpts.RoundToAlign = true;
+ ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
+
+ // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
+ // touching instructions
+ SmallVector<std::pair<Instruction *, Value *>, 4> TrapInfo;
+ for (Instruction &I : instructions(F)) {
+ Value *Or = nullptr;
+ BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
+ ObjSizeEval, IRB, SE);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
+ DL, TLI, ObjSizeEval, IRB, SE);
+ } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+ Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
+ DL, TLI, ObjSizeEval, IRB, SE);
+ } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
+ Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(), DL,
+ TLI, ObjSizeEval, IRB, SE);
+ }
+ if (Or)
+ TrapInfo.push_back(std::make_pair(&I, Or));
+ }
+
+ // Create a trapping basic block on demand using a callback. Depending on
+ // flags, this will either create a single block for the entire function or
+ // will create a fresh block every time it is called.
+ BasicBlock *TrapBB = nullptr;
+ auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
+ if (TrapBB && SingleTrapBB)
+ return TrapBB;
+
+ Function *Fn = IRB.GetInsertBlock()->getParent();
+ // FIXME: This debug location doesn't make a lot of sense in the
+ // `SingleTrapBB` case.
+ auto DebugLoc = IRB.getCurrentDebugLocation();
+ IRBuilder<>::InsertPointGuard Guard(IRB);
+ TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
+ IRB.SetInsertPoint(TrapBB);
+
+ auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
+ CallInst *TrapCall = IRB.CreateCall(F, {});
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ TrapCall->setDebugLoc(DebugLoc);
+ IRB.CreateUnreachable();
+
+ return TrapBB;
+ };
+
+ // Add the checks.
+ for (const auto &Entry : TrapInfo) {
+ Instruction *Inst = Entry.first;
+ BuilderTy IRB(Inst->getParent(), BasicBlock::iterator(Inst), TargetFolder(DL));
+ insertBoundsCheck(Entry.second, IRB, GetTrapBB);
+ }
+
+ return !TrapInfo.empty();
+}
+
+PreservedAnalyses BoundsCheckingPass::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+
+ if (!addBoundsChecking(F, TLI, SE))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+namespace {
+struct BoundsCheckingLegacyPass : public FunctionPass {
+ static char ID;
+
+ BoundsCheckingLegacyPass() : FunctionPass(ID) {
+ initializeBoundsCheckingLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ return addBoundsChecking(F, TLI, SE);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+};
+} // namespace
+
+char BoundsCheckingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(BoundsCheckingLegacyPass, "bounds-checking",
+ "Run-time bounds checking", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(BoundsCheckingLegacyPass, "bounds-checking",
+ "Run-time bounds checking", false, false)
+
+FunctionPass *llvm::createBoundsCheckingLegacyPass() {
+ return new BoundsCheckingLegacyPass();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/CFGMST.h b/llvm/lib/Transforms/Instrumentation/CFGMST.h
new file mode 100644
index 000000000000..8bb6f47c4846
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -0,0 +1,288 @@
+//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Union-find algorithm to compute Minimum Spanning Tree
+// for a given CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <utility>
+#include <vector>
+
+#define DEBUG_TYPE "cfgmst"
+
+namespace llvm {
+
+/// An union-find based Minimum Spanning Tree for CFG
+///
+/// Implements a Union-find algorithm to compute Minimum Spanning Tree
+/// for a given CFG.
+template <class Edge, class BBInfo> class CFGMST {
+public:
+ Function &F;
+
+ // Store all the edges in CFG. It may contain some stale edges
+ // when Removed is set.
+ std::vector<std::unique_ptr<Edge>> AllEdges;
+
+ // This map records the auxiliary information for each BB.
+ DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos;
+
+ // Whehter the function has an exit block with no successors.
+ // (For function with an infinite loop, this block may be absent)
+ bool ExitBlockFound = false;
+
+ // Find the root group of the G and compress the path from G to the root.
+ BBInfo *findAndCompressGroup(BBInfo *G) {
+ if (G->Group != G)
+ G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group));
+ return static_cast<BBInfo *>(G->Group);
+ }
+
+ // Union BB1 and BB2 into the same group and return true.
+ // Returns false if BB1 and BB2 are already in the same group.
+ bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1));
+ BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2));
+
+ if (BB1G == BB2G)
+ return false;
+
+ // Make the smaller rank tree a direct child or the root of high rank tree.
+ if (BB1G->Rank < BB2G->Rank)
+ BB1G->Group = BB2G;
+ else {
+ BB2G->Group = BB1G;
+ // If the ranks are the same, increment root of one tree by one.
+ if (BB1G->Rank == BB2G->Rank)
+ BB1G->Rank++;
+ }
+ return true;
+ }
+
+ // Give BB, return the auxiliary information.
+ BBInfo &getBBInfo(const BasicBlock *BB) const {
+ auto It = BBInfos.find(BB);
+ assert(It->second.get() != nullptr);
+ return *It->second.get();
+ }
+
+ // Give BB, return the auxiliary information if it's available.
+ BBInfo *findBBInfo(const BasicBlock *BB) const {
+ auto It = BBInfos.find(BB);
+ if (It == BBInfos.end())
+ return nullptr;
+ return It->second.get();
+ }
+
+ // Traverse the CFG using a stack. Find all the edges and assign the weight.
+ // Edges with large weight will be put into MST first so they are less likely
+ // to be instrumented.
+ void buildEdges() {
+ LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
+
+ const BasicBlock *Entry = &(F.getEntryBlock());
+ uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
+ Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr,
+ *ExitOutgoing = nullptr, *ExitIncoming = nullptr;
+ uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0;
+
+ // Add a fake edge to the entry.
+ EntryIncoming = &addEdge(nullptr, Entry, EntryWeight);
+ LLVM_DEBUG(dbgs() << " Edge: from fake node to " << Entry->getName()
+ << " w = " << EntryWeight << "\n");
+
+ // Special handling for single BB functions.
+ if (succ_empty(Entry)) {
+ addEdge(Entry, nullptr, EntryWeight);
+ return;
+ }
+
+ static const uint32_t CriticalEdgeMultiplier = 1000;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ Instruction *TI = BB->getTerminator();
+ uint64_t BBWeight =
+ (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
+ uint64_t Weight = 2;
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ BasicBlock *TargetBB = TI->getSuccessor(i);
+ bool Critical = isCriticalEdge(TI, i);
+ uint64_t scaleFactor = BBWeight;
+ if (Critical) {
+ if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+ scaleFactor *= CriticalEdgeMultiplier;
+ else
+ scaleFactor = UINT64_MAX;
+ }
+ if (BPI != nullptr)
+ Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor);
+ auto *E = &addEdge(&*BB, TargetBB, Weight);
+ E->IsCritical = Critical;
+ LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to "
+ << TargetBB->getName() << " w=" << Weight << "\n");
+
+ // Keep track of entry/exit edges:
+ if (&*BB == Entry) {
+ if (Weight > MaxEntryOutWeight) {
+ MaxEntryOutWeight = Weight;
+ EntryOutgoing = E;
+ }
+ }
+
+ auto *TargetTI = TargetBB->getTerminator();
+ if (TargetTI && !TargetTI->getNumSuccessors()) {
+ if (Weight > MaxExitInWeight) {
+ MaxExitInWeight = Weight;
+ ExitIncoming = E;
+ }
+ }
+ }
+ } else {
+ ExitBlockFound = true;
+ Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight);
+ if (BBWeight > MaxExitOutWeight) {
+ MaxExitOutWeight = BBWeight;
+ ExitOutgoing = ExitO;
+ }
+ LLVM_DEBUG(dbgs() << " Edge: from " << BB->getName() << " to fake exit"
+ << " w = " << BBWeight << "\n");
+ }
+ }
+
+ // Entry/exit edge adjustment heurisitic:
+ // prefer instrumenting entry edge over exit edge
+ // if possible. Those exit edges may never have a chance to be
+ // executed (for instance the program is an event handling loop)
+ // before the profile is asynchronously dumped.
+ //
+ // If EntryIncoming and ExitOutgoing has similar weight, make sure
+ // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing
+ // and ExitIncoming has similar weight, make sure ExitIncoming becomes
+ // the min-edge.
+ uint64_t EntryInWeight = EntryWeight;
+
+ if (EntryInWeight >= MaxExitOutWeight &&
+ EntryInWeight * 2 < MaxExitOutWeight * 3) {
+ EntryIncoming->Weight = MaxExitOutWeight;
+ ExitOutgoing->Weight = EntryInWeight + 1;
+ }
+
+ if (MaxEntryOutWeight >= MaxExitInWeight &&
+ MaxEntryOutWeight * 2 < MaxExitInWeight * 3) {
+ EntryOutgoing->Weight = MaxExitInWeight;
+ ExitIncoming->Weight = MaxEntryOutWeight + 1;
+ }
+ }
+
+ // Sort CFG edges based on its weight.
+ void sortEdgesByWeight() {
+ llvm::stable_sort(AllEdges, [](const std::unique_ptr<Edge> &Edge1,
+ const std::unique_ptr<Edge> &Edge2) {
+ return Edge1->Weight > Edge2->Weight;
+ });
+ }
+
+ // Traverse all the edges and compute the Minimum Weight Spanning Tree
+ // using union-find algorithm.
+ void computeMinimumSpanningTree() {
+ // First, put all the critical edge with landing-pad as the Dest to MST.
+ // This works around the insufficient support of critical edges split
+ // when destination BB is a landing pad.
+ for (auto &Ei : AllEdges) {
+ if (Ei->Removed)
+ continue;
+ if (Ei->IsCritical) {
+ if (Ei->DestBB && Ei->DestBB->isLandingPad()) {
+ if (unionGroups(Ei->SrcBB, Ei->DestBB))
+ Ei->InMST = true;
+ }
+ }
+ }
+
+ for (auto &Ei : AllEdges) {
+ if (Ei->Removed)
+ continue;
+ // If we detect infinite loops, force
+ // instrumenting the entry edge:
+ if (!ExitBlockFound && Ei->SrcBB == nullptr)
+ continue;
+ if (unionGroups(Ei->SrcBB, Ei->DestBB))
+ Ei->InMST = true;
+ }
+ }
+
+ // Dump the Debug information about the instrumentation.
+ void dumpEdges(raw_ostream &OS, const Twine &Message) const {
+ if (!Message.str().empty())
+ OS << Message << "\n";
+ OS << " Number of Basic Blocks: " << BBInfos.size() << "\n";
+ for (auto &BI : BBInfos) {
+ const BasicBlock *BB = BI.first;
+ OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " "
+ << BI.second->infoString() << "\n";
+ }
+
+ OS << " Number of Edges: " << AllEdges.size()
+ << " (*: Instrument, C: CriticalEdge, -: Removed)\n";
+ uint32_t Count = 0;
+ for (auto &EI : AllEdges)
+ OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->"
+ << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n";
+ }
+
+ // Add an edge to AllEdges with weight W.
+ Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
+ uint32_t Index = BBInfos.size();
+ auto Iter = BBInfos.end();
+ bool Inserted;
+ std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
+ if (Inserted) {
+ // Newly inserted, update the real info.
+ Iter->second = std::move(std::make_unique<BBInfo>(Index));
+ Index++;
+ }
+ std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
+ if (Inserted)
+ // Newly inserted, update the real info.
+ Iter->second = std::move(std::make_unique<BBInfo>(Index));
+ AllEdges.emplace_back(new Edge(Src, Dest, W));
+ return *AllEdges.back();
+ }
+
+ BranchProbabilityInfo *BPI;
+ BlockFrequencyInfo *BFI;
+
+public:
+ CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr,
+ BlockFrequencyInfo *BFI_ = nullptr)
+ : F(Func), BPI(BPI_), BFI(BFI_) {
+ buildEdges();
+ sortEdgesByWeight();
+ computeMinimumSpanningTree();
+ }
+};
+
+} // end namespace llvm
+
+#undef DEBUG_TYPE // "cfgmst"
+
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
new file mode 100644
index 000000000000..358abab3cceb
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -0,0 +1,98 @@
+//===-- CGProfile.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Transforms/Instrumentation.h"
+
+#include <array>
+
+using namespace llvm;
+
+PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
+ MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ InstrProfSymtab Symtab;
+ auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
+ Function *CalledF, uint64_t NewCount) {
+ if (!CalledF || !TTI.isLoweredToCall(CalledF))
+ return;
+ uint64_t &Count = Counts[std::make_pair(F, CalledF)];
+ Count = SaturatingAdd(Count, NewCount);
+ };
+ // Ignore error here. Indirect calls are ignored if this fails.
+ (void)(bool)Symtab.create(M);
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ if (BFI.getEntryFreq() == 0)
+ continue;
+ TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
+ for (auto &BB : F) {
+ Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
+ if (!BBCount)
+ continue;
+ for (auto &I : BB) {
+ CallSite CS(&I);
+ if (!CS)
+ continue;
+ if (CS.isIndirectCall()) {
+ InstrProfValueData ValueData[8];
+ uint32_t ActualNumValueData;
+ uint64_t TotalC;
+ if (!getValueProfDataFromInst(*CS.getInstruction(),
+ IPVK_IndirectCallTarget, 8, ValueData,
+ ActualNumValueData, TotalC))
+ continue;
+ for (const auto &VD :
+ ArrayRef<InstrProfValueData>(ValueData, ActualNumValueData)) {
+ UpdateCounts(TTI, &F, Symtab.getFunction(VD.Value), VD.Count);
+ }
+ continue;
+ }
+ UpdateCounts(TTI, &F, CS.getCalledFunction(), *BBCount);
+ }
+ }
+ }
+
+ addModuleFlags(M, Counts);
+
+ return PreservedAnalyses::all();
+}
+
+void CGProfilePass::addModuleFlags(
+ Module &M,
+ MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
+ if (Counts.empty())
+ return;
+
+ LLVMContext &Context = M.getContext();
+ MDBuilder MDB(Context);
+ std::vector<Metadata *> Nodes;
+
+ for (auto E : Counts) {
+ Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
+ ValueAsMetadata::get(E.first.second),
+ MDB.createConstant(ConstantInt::get(
+ Type::getInt64Ty(Context), E.second))};
+ Nodes.push_back(MDNode::get(Context, Vals));
+ }
+
+ M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
+}
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
new file mode 100644
index 000000000000..55c64fa4b727
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -0,0 +1,2110 @@
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+ cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+ "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+ cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+ "chr-merge-threshold", cl::init(2), cl::Hidden,
+ cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+ "chr-module-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+ "chr-function-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void parseCHRFilterFiles() {
+ if (!CHRModuleList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRModules.insert(Line);
+ }
+ }
+ if (!CHRFunctionList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRFunctions.insert(Line);
+ }
+ }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+ initializeControlHeightReductionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ parseCHRFilterFiles();
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<RegionInfoPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+ return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+ CHRStats() : NumBranches(0), NumBranchesDelta(0),
+ WeightedNumBranchesDelta(0) {}
+ void print(raw_ostream &OS) const {
+ OS << "CHRStats: NumBranches " << NumBranches
+ << " NumBranchesDelta " << NumBranchesDelta
+ << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+ }
+ uint64_t NumBranches; // The original number of conditional branches /
+ // selects
+ uint64_t NumBranchesDelta; // The decrease of the number of conditional
+ // branches / selects in the hot paths due to CHR.
+ uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+ // count at the scope entry.
+};
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+ RegInfo() : R(nullptr), HasBranch(false) {}
+ RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+ Region *R;
+ bool HasBranch;
+ SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+ CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+ assert(RI.R && "Null RegionIn");
+ RegInfos.push_back(RI);
+ }
+
+ Region *getParentRegion() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ Region *Parent = RegInfos[0].R->getParent();
+ assert(Parent && "Unexpected to call this on the top-level region");
+ return Parent;
+ }
+
+ BasicBlock *getEntryBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.front().R->getEntry();
+ }
+
+ BasicBlock *getExitBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.back().R->getExit();
+ }
+
+ bool appendable(CHRScope *Next) {
+ // The next scope is appendable only if this scope is directly connected to
+ // it (which implies it post-dominates this scope) and this scope dominates
+ // it (no edge to the next scope outside this scope).
+ BasicBlock *NextEntry = Next->getEntryBlock();
+ if (getExitBlock() != NextEntry)
+ // Not directly connected.
+ return false;
+ Region *LastRegion = RegInfos.back().R;
+ for (BasicBlock *Pred : predecessors(NextEntry))
+ if (!LastRegion->contains(Pred))
+ // There's an edge going into the entry of the next scope from outside
+ // of this scope.
+ return false;
+ return true;
+ }
+
+ void append(CHRScope *Next) {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+ assert(getParentRegion() == Next->getParentRegion() &&
+ "Must be siblings");
+ assert(getExitBlock() == Next->getEntryBlock() &&
+ "Must be adjacent");
+ for (RegInfo &RI : Next->RegInfos)
+ RegInfos.push_back(RI);
+ for (CHRScope *Sub : Next->Subs)
+ Subs.push_back(Sub);
+ }
+
+ void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+ bool IsChild = false;
+ for (RegInfo &RI : RegInfos)
+ if (RI.R == SubIn->getParentRegion()) {
+ IsChild = true;
+ break;
+ }
+ assert(IsChild && "Must be a child");
+#endif
+ Subs.push_back(SubIn);
+ }
+
+ // Split this scope at the boundary region into two, which will belong to the
+ // tail and returns the tail.
+ CHRScope *split(Region *Boundary) {
+ assert(Boundary && "Boundary null");
+ assert(RegInfos.begin()->R != Boundary &&
+ "Can't be split at beginning");
+ auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Boundary](const RegInfo& RI) {
+ return Boundary == RI.R;
+ });
+ if (BoundaryIt == RegInfos.end())
+ return nullptr;
+ SmallVector<RegInfo, 8> TailRegInfos;
+ SmallVector<CHRScope *, 8> TailSubs;
+ TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
+ RegInfos.resize(BoundaryIt - RegInfos.begin());
+ DenseSet<Region *> TailRegionSet;
+ for (RegInfo &RI : TailRegInfos)
+ TailRegionSet.insert(RI.R);
+ for (auto It = Subs.begin(); It != Subs.end(); ) {
+ CHRScope *Sub = *It;
+ assert(Sub && "null Sub");
+ Region *Parent = Sub->getParentRegion();
+ if (TailRegionSet.count(Parent)) {
+ TailSubs.push_back(Sub);
+ It = Subs.erase(It);
+ } else {
+ assert(std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Parent](const RegInfo& RI) {
+ return Parent == RI.R;
+ }) != RegInfos.end() &&
+ "Must be in head");
+ ++It;
+ }
+ }
+ assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+ return new CHRScope(TailRegInfos, TailSubs);
+ }
+
+ bool contains(Instruction *I) const {
+ BasicBlock *Parent = I->getParent();
+ for (const RegInfo &RI : RegInfos)
+ if (RI.R->contains(Parent))
+ return true;
+ return false;
+ }
+
+ void print(raw_ostream &OS) const;
+
+ SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+ SmallVector<CHRScope *, 8> Subs; // Subscopes.
+
+ // The instruction at which to insert the CHR conditional branch (and hoist
+ // the dependent condition values).
+ Instruction *BranchInsertPoint;
+
+ // True-biased and false-biased regions (conditional blocks),
+ // respectively. Used only for the outermost scope and includes regions in
+ // subscopes. The rest are unbiased.
+ DenseSet<Region *> TrueBiasedRegions;
+ DenseSet<Region *> FalseBiasedRegions;
+ // Among the biased regions, the regions that get CHRed.
+ SmallVector<RegInfo, 8> CHRRegions;
+
+ // True-biased and false-biased selects, respectively. Used only for the
+ // outermost scope and includes ones in subscopes.
+ DenseSet<SelectInst *> TrueBiasedSelects;
+ DenseSet<SelectInst *> FalseBiasedSelects;
+
+ // Map from one of the above regions to the instructions to stop
+ // hoisting instructions at through use-def chains.
+ HoistStopMapTy HoistStopMap;
+
+ private:
+ CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
+ SmallVector<CHRScope *, 8> &SubsIn)
+ : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
+};
+
+class CHR {
+ public:
+ CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+ ProfileSummaryInfo &PSIin, RegionInfo &RIin,
+ OptimizationRemarkEmitter &OREin)
+ : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
+
+ ~CHR() {
+ for (CHRScope *Scope : Scopes) {
+ delete Scope;
+ }
+ }
+
+ bool run();
+
+ private:
+ // See the comments in CHR::run() for the high level flow of the algorithm and
+ // what the following functions do.
+
+ void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+ Region *R = RI.getTopLevelRegion();
+ CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
+ if (Scope) {
+ Output.push_back(Scope);
+ }
+ }
+ CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes);
+ CHRScope *findScope(Region *R);
+ void checkScopeHoistable(CHRScope *Scope);
+
+ void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables);
+
+ void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+ void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+ void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+ void cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap);
+ BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap);
+ void fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount);
+ void fixupBranch(Region *R,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void fixupSelect(SelectInst* SI,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition);
+
+ Function &F;
+ BlockFrequencyInfo &BFI;
+ DominatorTree &DT;
+ ProfileSummaryInfo &PSI;
+ RegionInfo &RI;
+ OptimizationRemarkEmitter &ORE;
+ CHRStats Stats;
+
+ // All the true-biased regions in the function
+ DenseSet<Region *> TrueBiasedRegionsGlobal;
+ // All the false-biased regions in the function
+ DenseSet<Region *> FalseBiasedRegionsGlobal;
+ // All the true-biased selects in the function
+ DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+ // All the false-biased selects in the function
+ DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+ // A map from biased regions to their branch bias
+ DenseMap<Region *, BranchProbability> BranchBiasMap;
+ // A map from biased selects to their branch bias
+ DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+ // All the scopes.
+ DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static inline
+raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
+ const CHRStats &Stats) {
+ Stats.print(OS);
+ return OS;
+}
+
+static inline
+raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+ Scope.print(OS);
+ return OS;
+}
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+ if (ForceCHR)
+ return true;
+
+ if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+ if (CHRModules.count(F.getParent()->getName()))
+ return true;
+ return CHRFunctions.count(F.getName());
+ }
+
+ assert(PSI.hasProfileSummary() && "Empty PSI?");
+ return PSI.isFunctionEntryHot(&F);
+}
+
+static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
+ CHRStats *Stats) {
+ StringRef FuncName = F.getName();
+ StringRef ModuleName = F.getParent()->getName();
+ (void)(FuncName); // Unused in release build.
+ (void)(ModuleName); // Unused in release build.
+ CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+ << FuncName);
+ if (Stats)
+ CHR_DEBUG(dbgs() << " " << *Stats);
+ CHR_DEBUG(dbgs() << "\n");
+ CHR_DEBUG(F.dump());
+}
+
+void CHRScope::print(raw_ostream &OS) const {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ OS << "CHRScope[";
+ OS << RegInfos.size() << ", Regions[";
+ for (const RegInfo &RI : RegInfos) {
+ OS << RI.R->getNameStr();
+ if (RI.HasBranch)
+ OS << " B";
+ if (RI.Selects.size() > 0)
+ OS << " S" << RI.Selects.size();
+ OS << ", ";
+ }
+ if (RegInfos[0].R->getParent()) {
+ OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+ } else {
+ // top level region
+ OS << "]";
+ }
+ OS << ", Subs[";
+ for (CHRScope *Sub : Subs) {
+ OS << *Sub << ", ";
+ }
+ OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+ isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+ if (!isHoistableInstructionType(I))
+ return false;
+ return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static std::set<Value *> getBaseValues(
+ Value *V, DominatorTree &DT,
+ DenseMap<Value *, std::set<Value *>> &Visited) {
+ if (Visited.count(V)) {
+ return Visited[V];
+ }
+ std::set<Value *> Result;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // We don't stop at a block that's not in the Scope because we would miss some
+ // instructions that are based on the same base values if we stop there.
+ if (!isHoistable(I, DT)) {
+ Result.insert(I);
+ Visited.insert(std::make_pair(V, Result));
+ return Result;
+ }
+ // I is hoistable above the Scope.
+ for (Value *Op : I->operands()) {
+ std::set<Value *> OpResult = getBaseValues(Op, DT, Visited);
+ Result.insert(OpResult.begin(), OpResult.end());
+ }
+ Visited.insert(std::make_pair(V, Result));
+ return Result;
+ }
+ if (isa<Argument>(V)) {
+ Result.insert(V);
+ Visited.insert(std::make_pair(V, Result));
+ return Result;
+ }
+ // We don't include others like constants because those won't lead to any
+ // chance of folding of conditions (eg two bit checks merged into one check)
+ // after CHR.
+ Visited.insert(std::make_pair(V, Result));
+ return Result; // empty
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables,
+ DenseSet<Instruction *> *HoistStops,
+ DenseMap<Instruction *, bool> &Visited) {
+ assert(InsertPoint && "Null InsertPoint");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (Visited.count(I)) {
+ return Visited[I];
+ }
+ assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+ assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+ if (Unhoistables.count(I)) {
+ // Don't hoist if they are not to be hoisted.
+ Visited[I] = false;
+ return false;
+ }
+ if (DT.dominates(I, InsertPoint)) {
+ // We are already above the insert point. Stop here.
+ if (HoistStops)
+ HoistStops->insert(I);
+ Visited[I] = true;
+ return true;
+ }
+ // We aren't not above the insert point, check if we can hoist it above the
+ // insert point.
+ if (isHoistable(I, DT)) {
+ // Check operands first.
+ DenseSet<Instruction *> OpsHoistStops;
+ bool AllOpsHoisted = true;
+ for (Value *Op : I->operands()) {
+ if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
+ Visited)) {
+ AllOpsHoisted = false;
+ break;
+ }
+ }
+ if (AllOpsHoisted) {
+ CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+ if (HoistStops)
+ HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+ Visited[I] = true;
+ return true;
+ }
+ }
+ Visited[I] = false;
+ return false;
+ }
+ // Non-instructions are considered hoistable.
+ return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
+ BranchProbability &FalseProb) {
+ if (!MD) return false;
+ MDString *MDName = cast<MDString>(MD->getOperand(0));
+ if (MDName->getString() != "branch_weights" ||
+ MD->getNumOperands() != 3)
+ return false;
+ ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+ ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+ if (!TrueWeight || !FalseWeight)
+ return false;
+ uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
+ uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
+ uint64_t SumWt = TrueWt + FalseWt;
+
+ assert(SumWt >= TrueWt && SumWt >= FalseWt &&
+ "Overflow calculating branch probabilities.");
+
+ TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
+ FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
+ return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+ return BranchProbability::getBranchProbability(
+ static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template <typename K, typename S, typename M>
+static bool checkBias(K *Key, BranchProbability TrueProb,
+ BranchProbability FalseProb, S &TrueSet, S &FalseSet,
+ M &BiasMap) {
+ BranchProbability Threshold = getCHRBiasThreshold();
+ if (TrueProb >= Threshold) {
+ TrueSet.insert(Key);
+ BiasMap[Key] = TrueProb;
+ return true;
+ } else if (FalseProb >= Threshold) {
+ FalseSet.insert(Key);
+ BiasMap[Key] = FalseProb;
+ return true;
+ }
+ return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool checkBiasedBranch(BranchInst *BI, Region *R,
+ DenseSet<Region *> &TrueBiasedRegionsGlobal,
+ DenseSet<Region *> &FalseBiasedRegionsGlobal,
+ DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+ if (!BI->isConditional())
+ return false;
+ BranchProbability ThenProb, ElseProb;
+ if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
+ ThenProb, ElseProb))
+ return false;
+ BasicBlock *IfThen = BI->getSuccessor(0);
+ BasicBlock *IfElse = BI->getSuccessor(1);
+ assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+ IfThen != IfElse &&
+ "Invariant from findScopes");
+ if (IfThen == R->getExit()) {
+ // Swap them so that IfThen/ThenProb means going into the conditional code
+ // and IfElse/ElseProb means skipping it.
+ std::swap(IfThen, IfElse);
+ std::swap(ThenProb, ElseProb);
+ }
+ CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+ CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+ CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+ return checkBias(R, ThenProb, ElseProb,
+ TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool checkBiasedSelect(
+ SelectInst *SI, Region *R,
+ DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+ DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+ DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+ BranchProbability TrueProb, FalseProb;
+ if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
+ TrueProb, FalseProb))
+ return false;
+ CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+ CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+ CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+ return checkBias(SI, TrueProb, FalseProb,
+ TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+ SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ // The hoist point is by default the terminator of the entry block, which is
+ // the same as the branch instruction if RI.HasBranch is true.
+ Instruction *HoistPoint = EntryBB->getTerminator();
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ // Pick the first select in Selects in the entry block. Note Selects is
+ // sorted in the instruction order within a block (asserted below).
+ HoistPoint = SI;
+ break;
+ }
+ }
+ assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+ // Check that HoistPoint is the first one in Selects in the entry block,
+ // if any.
+ DenseSet<Instruction *> EntryBlockSelectSet;
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ EntryBlockSelectSet.insert(SI);
+ }
+ }
+ for (Instruction &I : *EntryBB) {
+ if (EntryBlockSelectSet.count(&I) > 0) {
+ assert(&I == HoistPoint &&
+ "HoistPoint must be the first one in Selects");
+ break;
+ }
+ }
+#endif
+ return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+ CHRScope *Result = nullptr;
+ BasicBlock *Entry = R->getEntry();
+ BasicBlock *Exit = R->getExit(); // null if top level.
+ assert(Entry && "Entry must not be null");
+ assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+ "Only top level region has a null exit");
+ if (Entry)
+ CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Entry null\n");
+ if (Exit)
+ CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Exit null\n");
+ // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+ // to this region).
+ bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+ if (EntryInSubregion)
+ return nullptr;
+ // Exclude loops
+ for (BasicBlock *Pred : predecessors(Entry))
+ if (R->contains(Pred))
+ return nullptr;
+ if (Exit) {
+ // Try to find an if-then block (check if R is an if-then).
+ // if (cond) {
+ // ...
+ // }
+ auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+ if (BI)
+ CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "BI null\n");
+ if (BI && BI->isConditional()) {
+ BasicBlock *S0 = BI->getSuccessor(0);
+ BasicBlock *S1 = BI->getSuccessor(1);
+ CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+ CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+ if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+ RegInfo RI(R);
+ RI.HasBranch = checkBiasedBranch(
+ BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+ ++Stats.NumBranches;
+ if (!RI.HasBranch) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
+ << "Branch not biased";
+ });
+ }
+ }
+ }
+ }
+ {
+ // Try to look for selects in the direct child blocks (as opposed to in
+ // subregions) of R.
+ // ...
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // ...
+ // a = cond ? b : c;
+ // ...
+ SmallVector<SelectInst *, 8> Selects;
+ for (RegionNode *E : R->elements()) {
+ if (E->isSubRegion())
+ continue;
+ // This returns the basic block of E if E is a direct child of R (not a
+ // subregion.)
+ BasicBlock *BB = E->getEntry();
+ // Need to push in the order to make it easier to find the first Select
+ // later.
+ for (Instruction &I : *BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ Selects.push_back(SI);
+ ++Stats.NumBranches;
+ }
+ }
+ }
+ if (Selects.size() > 0) {
+ auto AddSelects = [&](RegInfo &RI) {
+ for (auto *SI : Selects)
+ if (checkBiasedSelect(SI, RI.R,
+ TrueBiasedSelectsGlobal,
+ FalseBiasedSelectsGlobal,
+ SelectBiasMap))
+ RI.Selects.push_back(SI);
+ else
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
+ << "Select not biased";
+ });
+ };
+ if (!Result) {
+ CHR_DEBUG(dbgs() << "Found a select-only region\n");
+ RegInfo RI(R);
+ AddSelects(RI);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ } else {
+ CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+ AddSelects(Result->RegInfos[0]);
+ }
+ }
+ }
+
+ if (Result) {
+ checkScopeHoistable(Result);
+ }
+ return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+// ...
+// c4 = foo() // A call.
+// g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+ RegInfo &RI = Scope->RegInfos[0];
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ auto *Branch = RI.HasBranch ?
+ cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+ SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+ if (RI.HasBranch || !Selects.empty()) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ // Avoid a data dependence from a select or a branch to a(nother)
+ // select. Note no instruction can't data-depend on a branch (a branch
+ // instruction doesn't produce a value).
+ DenseSet<Instruction *> Unhoistables;
+ // Initialize Unhoistables with the selects.
+ for (SelectInst *SI : Selects) {
+ Unhoistables.insert(SI);
+ }
+ // Remove Selects that can't be hoisted.
+ for (auto it = Selects.begin(); it != Selects.end(); ) {
+ SelectInst *SI = *it;
+ if (SI == InsertPoint) {
+ ++it;
+ continue;
+ }
+ DenseMap<Instruction *, bool> Visited;
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr, Visited);
+ if (!IsHoistable) {
+ CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "DropUnhoistableSelect", SI)
+ << "Dropped unhoistable select";
+ });
+ it = Selects.erase(it);
+ // Since we are dropping the select here, we also drop it from
+ // Unhoistables.
+ Unhoistables.erase(SI);
+ } else
+ ++it;
+ }
+ // Update InsertPoint after potentially removing selects.
+ InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ if (RI.HasBranch && InsertPoint != Branch) {
+ DenseMap<Instruction *, bool> Visited;
+ bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr, Visited);
+ if (!IsHoistable) {
+ // If the branch isn't hoistable, drop the selects in the entry
+ // block, preferring the branch, which makes the branch the hoist
+ // point.
+ assert(InsertPoint != Branch && "Branch must not be the hoist point");
+ CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+ CHR_DEBUG(
+ for (SelectInst *SI : Selects) {
+ dbgs() << "SI " << *SI << "\n";
+ });
+ for (SelectInst *SI : Selects) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "DropSelectUnhoistableBranch", SI)
+ << "Dropped select due to unhoistable branch";
+ });
+ }
+ Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
+ [EntryBB](SelectInst *SI) {
+ return SI->getParent() == EntryBB;
+ }), Selects.end());
+ Unhoistables.clear();
+ InsertPoint = Branch;
+ }
+ }
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+ if (RI.HasBranch) {
+ assert(!DT.dominates(Branch, InsertPoint) &&
+ "Branch can't be already above the hoist point");
+ DenseMap<Instruction *, bool> Visited;
+ assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr, Visited) &&
+ "checkHoistValue for branch");
+ }
+ for (auto *SI : Selects) {
+ assert(!DT.dominates(SI, InsertPoint) &&
+ "SI can't be already above the hoist point");
+ DenseMap<Instruction *, bool> Visited;
+ assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, nullptr, Visited) &&
+ "checkHoistValue for selects");
+ }
+ CHR_DEBUG(dbgs() << "Result\n");
+ if (RI.HasBranch) {
+ CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+ }
+ for (auto *SI : Selects) {
+ CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+ }
+#endif
+ }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes) {
+ CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+ CHRScope *Result = findScope(R);
+ // Visit subscopes.
+ CHRScope *ConsecutiveSubscope = nullptr;
+ SmallVector<CHRScope *, 8> Subscopes;
+ for (auto It = R->begin(); It != R->end(); ++It) {
+ const std::unique_ptr<Region> &SubR = *It;
+ auto NextIt = std::next(It);
+ Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
+ CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+ << "\n");
+ CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+ if (SubCHRScope) {
+ CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+ } else {
+ CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+ }
+ if (SubCHRScope) {
+ if (!ConsecutiveSubscope)
+ ConsecutiveSubscope = SubCHRScope;
+ else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ ConsecutiveSubscope = SubCHRScope;
+ } else
+ ConsecutiveSubscope->append(SubCHRScope);
+ } else {
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ ConsecutiveSubscope = nullptr;
+ }
+ }
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ for (CHRScope *Sub : Subscopes) {
+ if (Result) {
+ // Combine it with the parent.
+ Result->addSub(Sub);
+ } else {
+ // Push Subscopes as they won't be combined with the parent.
+ Scopes.push_back(Sub);
+ }
+ }
+ return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+ DenseSet<Value *> ConditionValues;
+ if (RI.HasBranch) {
+ auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+ ConditionValues.insert(BI->getCondition());
+ }
+ for (SelectInst *SI : RI.Selects) {
+ ConditionValues.insert(SI->getCondition());
+ }
+ return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+ DenseSet<Value *> &PrevConditionValues,
+ DenseSet<Value *> &ConditionValues,
+ DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables) {
+ CHR_DEBUG(
+ dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+ for (Value *V : PrevConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ assert(InsertPoint && "Null InsertPoint");
+ // If any of Bases isn't hoistable to the hoist point, split.
+ for (Value *V : ConditionValues) {
+ DenseMap<Instruction *, bool> Visited;
+ if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
+ CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+ return true; // Not hoistable, split.
+ }
+ }
+ // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+ // unnecessary splits at scopes with no branch/selects. If
+ // PrevConditionValues and ConditionValues don't intersect at all, split.
+ if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+ // Use std::set as DenseSet doesn't work with set_intersection.
+ std::set<Value *> PrevBases, Bases;
+ DenseMap<Value *, std::set<Value *>> Visited;
+ for (Value *V : PrevConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT, Visited);
+ PrevBases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ for (Value *V : ConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT, Visited);
+ Bases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ CHR_DEBUG(
+ dbgs() << "PrevBases ";
+ for (Value *V : PrevBases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " Bases ";
+ for (Value *V : Bases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ std::set<Value *> Intersection;
+ std::set_intersection(PrevBases.begin(), PrevBases.end(),
+ Bases.begin(), Bases.end(),
+ std::inserter(Intersection, Intersection.begin()));
+ if (Intersection.empty()) {
+ // Empty intersection, split.
+ CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+ return true;
+ }
+ }
+ CHR_DEBUG(dbgs() << "No split\n");
+ return false; // Don't split.
+}
+
+static void getSelectsInScope(CHRScope *Scope,
+ DenseSet<Instruction *> &Output) {
+ for (RegInfo &RI : Scope->RegInfos)
+ for (SelectInst *SI : RI.Selects)
+ Output.insert(SI);
+ for (CHRScope *Sub : Scope->Subs)
+ getSelectsInScope(Sub, Output);
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(!Scope->BranchInsertPoint &&
+ "BranchInsertPoint must not be set");
+ DenseSet<Instruction *> Unhoistables;
+ getSelectsInScope(Scope, Unhoistables);
+ splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+ }
+#ifndef NDEBUG
+ for (CHRScope *Scope : Output) {
+ assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+ }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+ CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables) {
+ if (Outer) {
+ assert(OuterConditionValues && "Null OuterConditionValues");
+ assert(OuterInsertPoint && "Null OuterInsertPoint");
+ }
+ bool PrevSplitFromOuter = true;
+ DenseSet<Value *> PrevConditionValues;
+ Instruction *PrevInsertPoint = nullptr;
+ SmallVector<CHRScope *, 8> Splits;
+ SmallVector<bool, 8> SplitsSplitFromOuter;
+ SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+ SmallVector<Instruction *, 8> SplitsInsertPoints;
+ SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
+ for (RegInfo &RI : RegInfos) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+ CHR_DEBUG(
+ dbgs() << "ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ if (RI.R == RegInfos[0].R) {
+ // First iteration. Check to see if we should split from the outer.
+ if (Outer) {
+ CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+ CHR_DEBUG(dbgs() << "Should split from outer at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+ ConditionValues, DT, Unhoistables)) {
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "SplitScopeFromOuter",
+ RI.R->getEntry()->getTerminator())
+ << "Split scope from outer due to unhoistable branch/select "
+ << "and/or lack of common condition values";
+ });
+ } else {
+ // Not splitting from the outer. Use the outer bases and insert
+ // point. Union the bases.
+ PrevSplitFromOuter = false;
+ PrevConditionValues = *OuterConditionValues;
+ PrevConditionValues.insert(ConditionValues.begin(),
+ ConditionValues.end());
+ PrevInsertPoint = OuterInsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Outer null\n");
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Should split from prev at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+ DT, Unhoistables)) {
+ CHRScope *Tail = Scope->split(RI.R);
+ Scopes.insert(Tail);
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ Scope = Tail;
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ PrevSplitFromOuter = true;
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "SplitScopeFromPrev",
+ RI.R->getEntry()->getTerminator())
+ << "Split scope from previous due to unhoistable branch/select "
+ << "and/or lack of common condition values";
+ });
+ } else {
+ // Not splitting. Union the bases. Keep the hoist point.
+ PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+ }
+ }
+ }
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ assert(PrevInsertPoint && "Null PrevInsertPoint");
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ assert(Splits.size() == SplitsConditionValues.size() &&
+ Splits.size() == SplitsSplitFromOuter.size() &&
+ Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+ Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+ SmallVector<CHRScope *, 8> NewSubs;
+ DenseSet<Instruction *> SplitUnhoistables;
+ getSelectsInScope(Split, SplitUnhoistables);
+ for (CHRScope *Sub : Split->Subs) {
+ SmallVector<CHRScope *, 8> SubSplits = splitScope(
+ Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+ SplitUnhoistables);
+ NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+ }
+ Split->Subs = NewSubs;
+ }
+ SmallVector<CHRScope *, 8> Result;
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ if (SplitsSplitFromOuter[I]) {
+ // Split from the outer.
+ Output.push_back(Split);
+ Split->BranchInsertPoint = SplitsInsertPoints[I];
+ CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+ << "\n");
+ } else {
+ // Connected to the outer.
+ Result.push_back(Split);
+ }
+ }
+ if (!Outer)
+ assert(Result.empty() &&
+ "If no outer (top-level), must return no nested ones");
+ return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+ for (CHRScope *Scope : Scopes) {
+ assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+ classifyBiasedScopes(Scope, Scope);
+ CHR_DEBUG(
+ dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+ dbgs() << "TrueBiasedRegions ";
+ for (Region *R : Scope->TrueBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedRegions ";
+ for (Region *R : Scope->FalseBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "TrueBiasedSelects ";
+ for (SelectInst *SI : Scope->TrueBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedSelects ";
+ for (SelectInst *SI : Scope->FalseBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";);
+ }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+ for (RegInfo &RI : Scope->RegInfos) {
+ if (RI.HasBranch) {
+ Region *R = RI.R;
+ if (TrueBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->TrueBiasedRegions.insert(R);
+ else if (FalseBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->FalseBiasedRegions.insert(R);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ for (SelectInst *SI : RI.Selects) {
+ if (TrueBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->TrueBiasedSelects.insert(SI);
+ else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->FalseBiasedSelects.insert(SI);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs) {
+ classifyBiasedScopes(Sub, OutermostScope);
+ }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+ unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+ Scope->FalseBiasedRegions.size() +
+ Scope->TrueBiasedSelects.size() +
+ Scope->FalseBiasedSelects.size();
+ return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ // Filter out the ones with only one region and no subs.
+ if (!hasAtLeastTwoBiasedBranches(Scope)) {
+ CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+ << Scope->TrueBiasedRegions.size()
+ << " falsy-regions " << Scope->FalseBiasedRegions.size()
+ << " true-selects " << Scope->TrueBiasedSelects.size()
+ << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(
+ DEBUG_TYPE,
+ "DropScopeWithOneBranchOrSelect",
+ Scope->RegInfos[0].R->getEntry()->getTerminator())
+ << "Drop scope with < "
+ << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
+ << " biased branch(es) or select(s)";
+ });
+ continue;
+ }
+ Output.push_back(Scope);
+ }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+ "Empty");
+ setCHRRegions(Scope, Scope);
+ Output.push_back(Scope);
+ CHR_DEBUG(
+ dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+ for (auto pair : Scope->HoistStopMap) {
+ Region *R = pair.first;
+ dbgs() << "Region " << R->getNameStr() << "\n";
+ for (Instruction *I : pair.second) {
+ dbgs() << "HoistStop " << *I << "\n";
+ }
+ }
+ dbgs() << "CHRRegions" << "\n";
+ for (RegInfo &RI : Scope->CHRRegions) {
+ dbgs() << RI.R->getNameStr() << "\n";
+ });
+ }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+ DenseSet<Instruction *> Unhoistables;
+ // Put the biased selects in Unhoistables because they should stay where they
+ // are and constant-folded after CHR (in case one biased select or a branch
+ // can depend on another biased select.)
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (SelectInst *SI : RI.Selects) {
+ Unhoistables.insert(SI);
+ }
+ }
+ Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+ for (RegInfo &RI : Scope->RegInfos) {
+ Region *R = RI.R;
+ DenseSet<Instruction *> HoistStops;
+ bool IsHoisted = false;
+ if (RI.HasBranch) {
+ assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
+ OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ // Note checkHoistValue fills in HoistStops.
+ DenseMap<Instruction *, bool> Visited;
+ bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops, Visited);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
+ OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+ "Must be true or false biased");
+ // Note checkHoistValue fills in HoistStops.
+ DenseMap<Instruction *, bool> Visited;
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops, Visited);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ if (IsHoisted) {
+ OutermostScope->CHRRegions.push_back(RI);
+ OutermostScope->HoistStopMap[R] = HoistStops;
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs)
+ setCHRRegions(Sub, OutermostScope);
+}
+
+bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+ return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ Output.resize(Input.size());
+ llvm::copy(Input, Output.begin());
+ llvm::stable_sort(Output, CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+ HoistStopMapTy &HoistStopMap,
+ DenseSet<Instruction *> &HoistedSet,
+ DenseSet<PHINode *> &TrivialPHIs,
+ DominatorTree &DT) {
+ auto IT = HoistStopMap.find(R);
+ assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+ DenseSet<Instruction *> &HoistStops = IT->second;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I == HoistPoint)
+ return;
+ if (HoistStops.count(I))
+ return;
+ if (auto *PN = dyn_cast<PHINode>(I))
+ if (TrivialPHIs.count(PN))
+ // The trivial phi inserted by the previous CHR scope could replace a
+ // non-phi in HoistStops. Note that since this phi is at the exit of a
+ // previous CHR scope, which dominates this scope, it's safe to stop
+ // hoisting there.
+ return;
+ if (HoistedSet.count(I))
+ // Already hoisted, return.
+ return;
+ assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+ assert(DT.getNode(I->getParent()) && "DT must contain I's block");
+ assert(DT.getNode(HoistPoint->getParent()) &&
+ "DT must contain HoistPoint block");
+ if (DT.dominates(I, HoistPoint))
+ // We are already above the hoist point. Stop here. This may be necessary
+ // when multiple scopes would independently hoist the same
+ // instruction. Since an outer (dominating) scope would hoist it to its
+ // entry before an inner (dominated) scope would to its entry, the inner
+ // scope may see the instruction already hoisted, in which case it
+ // potentially wrong for the inner scope to hoist it and could cause bad
+ // IR (non-dominating def), but safe to skip hoisting it instead because
+ // it's already in a block that dominates the inner scope.
+ return;
+ for (Value *Op : I->operands()) {
+ hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
+ }
+ I->moveBefore(HoistPoint);
+ HoistedSet.insert(I);
+ CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+ }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+ DenseSet<PHINode *> &TrivialPHIs,
+ DominatorTree &DT) {
+ DenseSet<Instruction *> HoistedSet;
+ for (const RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs, DT);
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs, DT);
+ }
+ }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+ Instruction *ExcludedUser,
+ CHRScope *Scope) {
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+ continue;
+ return false;
+ }
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ // Don't need to swap this in terms of
+ // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+ // mean whehter the branch is likely go into the if-then rather than
+ // successor0/successor1 and because we can tell which edge is the then or
+ // the else one by comparing the destination to the region exit block.
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ SI->swapValues();
+ SI->swapProfMetadata();
+ if (Scope->TrueBiasedSelects.count(SI)) {
+ assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->FalseBiasedSelects.insert(SI);
+ } else if (Scope->FalseBiasedSelects.count(SI)) {
+ assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->TrueBiasedSelects.insert(SI);
+ }
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+ return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+ BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<BasicBlock *> BlocksInScopeSet;
+ SmallVector<BasicBlock *, 8> BlocksInScopeVec;
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ BlocksInScopeSet.insert(BB);
+ BlocksInScopeVec.push_back(BB);
+ }
+ }
+ CHR_DEBUG(
+ dbgs() << "Inserting redudant phis\n";
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ dbgs() << "BlockInScope " << BB->getName() << "\n";
+ });
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ for (Instruction &I : *BB) {
+ SmallVector<Instruction *, 8> Users;
+ for (User *U : I.users()) {
+ if (auto *UI = dyn_cast<Instruction>(U)) {
+ if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
+ // Unless there's already a phi for I at the exit block.
+ !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+ Users.push_back(UI);
+ } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+ // There's a loop backedge from a block that's dominated by this
+ // scope to the entry block.
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs()
+ << "Used at entry block (for a back edge) by a phi user "
+ << *UI << "\n");
+ Users.push_back(UI);
+ }
+ }
+ }
+ if (Users.size() > 0) {
+ // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+ // ExitBlock. Replace I with the new phi in UI unless UI is another
+ // phi at ExitBlock.
+ unsigned PredCount = std::distance(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+ PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+ &ExitBlock->front());
+ for (BasicBlock *Pred : predecessors(ExitBlock)) {
+ PN->addIncoming(&I, Pred);
+ }
+ TrivialPHIs.insert(PN);
+ CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+ for (Instruction *UI : Users) {
+ for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+ if (UI->getOperand(J) == &I) {
+ UI->setOperand(J, PN);
+ }
+ }
+ CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+ }
+ }
+ }
+ }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void LLVM_ATTRIBUTE_UNUSED
+assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+ auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+ if (Scope->TrueBiasedRegions.count(RI.R) ||
+ Scope->FalseBiasedRegions.count(RI.R))
+ return true;
+ for (SelectInst *SI : RI.Selects)
+ if (Scope->TrueBiasedSelects.count(SI) ||
+ Scope->FalseBiasedSelects.count(SI))
+ return true;
+ return false;
+ };
+ for (RegInfo &RI : Scope->CHRRegions) {
+ assert(HasBiasedBranchOrSelect(RI, Scope) &&
+ "Must have biased branch or select");
+ }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
+ CHRScope *Scope, BasicBlock *PreEntryBlock) {
+ CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ Value *V = BI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ (void)(I); // Unused in release build.
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ Value *V = SI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ (void)(I); // Unused in release build.
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+ CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+ assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+ Region *FirstRegion = Scope->RegInfos[0].R;
+ BasicBlock *EntryBlock = FirstRegion->getEntry();
+ Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+ BasicBlock *ExitBlock = LastRegion->getExit();
+ Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+ if (ExitBlock) {
+ // Insert a trivial phi at the exit block (where the CHR hot path and the
+ // cold path merges) for a value that's defined in the scope but used
+ // outside it (meaning it's alive at the exit block). We will add the
+ // incoming values for the CHR cold paths to it below. Without this, we'd
+ // miss updating phi's for such values unless there happens to already be a
+ // phi for that value there.
+ insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+ }
+
+ // Split the entry block of the first region. The new block becomes the new
+ // entry block of the first region. The old entry block becomes the block to
+ // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+ // through the split, we update the entry of the first region after the split,
+ // and Region only points to the entry and the exit blocks, rather than
+ // keeping everything in a list or set, the blocks membership and the
+ // entry/exit blocks of the region are still valid after the split.
+ CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+ << " at " << *Scope->BranchInsertPoint << "\n");
+ BasicBlock *NewEntryBlock =
+ SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ FirstRegion->replaceEntryRecursive(NewEntryBlock);
+ BasicBlock *PreEntryBlock = EntryBlock;
+
+ ValueToValueMapTy VMap;
+ // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+ // hot path (originals) and a cold path (clones) and update the PHIs at the
+ // exit block.
+ cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+ // Replace the old (placeholder) branch with the new (merged) conditional
+ // branch.
+ BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+ NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+ assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+ // Hoist the conditional values of the branches/selects.
+ hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
+
+#ifndef NDEBUG
+ assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+ // Create the combined branch condition and constant-fold the branches/selects
+ // in the hot path.
+ fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+ ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap) {
+ // Clone all the blocks. The original blocks will be the hot-path
+ // CHR-optimized code and the cloned blocks will be the original unoptimized
+ // code. This is so that the block pointers from the
+ // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+ // which CHR should apply to.
+ SmallVector<BasicBlock*, 8> NewBlocks;
+ for (RegInfo &RI : Scope->RegInfos)
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+ NewBlocks.push_back(NewBB);
+ VMap[BB] = NewBB;
+ }
+
+ // Place the cloned blocks right after the original blocks (right before the
+ // exit block of.)
+ if (ExitBlock)
+ F.getBasicBlockList().splice(ExitBlock->getIterator(),
+ F.getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F.end());
+
+ // Update the cloned blocks/instructions to refer to themselves.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (Instruction &I : *NewBlocks[i])
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+ // the top-level region but we don't need to add PHIs. The trivial PHIs
+ // inserted above will be updated here.
+ if (ExitBlock)
+ for (PHINode &PN : ExitBlock->phis())
+ for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+ ++I) {
+ BasicBlock *Pred = PN.getIncomingBlock(I);
+ if (LastRegion->contains(Pred)) {
+ Value *V = PN.getIncomingValue(I);
+ auto It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+ PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+ }
+ }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap) {
+ BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+ "SplitBlock did not work correctly!");
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ assert(VMap.find(NewEntryBlock) != VMap.end() &&
+ "NewEntryBlock must have been copied");
+ OldBR->dropAllReferences();
+ OldBR->eraseFromParent();
+ // The true predicate is a placeholder. It will be replaced later in
+ // fixupBranchesAndSelects().
+ BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+ cast<BasicBlock>(VMap[NewEntryBlock]),
+ ConstantInt::getTrue(F.getContext()));
+ PreEntryBlock->getInstList().push_back(NewBR);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount) {
+ Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+ BranchProbability CHRBranchBias(1, 1);
+ uint64_t NumCHRedBranches = 0;
+ IRBuilder<> IRB(PreEntryBlock->getTerminator());
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ if (RI.HasBranch) {
+ fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ }
+ Stats.NumBranchesDelta += NumCHRedBranches - 1;
+ Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE,
+ "CHR",
+ // Refer to the hot (original) path
+ MergedBR->getSuccessor(0)->getTerminator())
+ << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
+ << " branches or selects";
+ });
+ MergedBR->setCondition(MergedCondition);
+ SmallVector<uint32_t, 2> Weights;
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
+ MDBuilder MDB(F.getContext());
+ MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+ << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = BranchBiasMap[R];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ BasicBlock *IfThen = BI->getSuccessor(1);
+ BasicBlock *IfElse = BI->getSuccessor(0);
+ BasicBlock *RegionExitBlock = R->getExit();
+ assert(RegionExitBlock && "Null ExitBlock");
+ assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+ IfThen != IfElse && "Invariant from findScopes");
+ if (IfThen == RegionExitBlock) {
+ // Swap them so that IfThen means going into it and IfElse means skipping
+ // it.
+ std::swap(IfThen, IfElse);
+ }
+ CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+ << " IfElse " << IfElse->getName() << "\n");
+ Value *Cond = BI->getCondition();
+ BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+ bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+ addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+ MergedCondition);
+ // Constant-fold the branch at ClonedEntryBlock.
+ assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+ "The successor shouldn't change");
+ Value *NewCondition = ConditionTrue ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ assert((IsTrueBiased ||
+ Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+ assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = SelectBiasMap[SI];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ Value *Cond = SI->getCondition();
+ addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+ MergedCondition);
+ Value *NewCondition = IsTrueBiased ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition) {
+ if (IsTrueBiased) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ } else {
+ // If Cond is an icmp and all users of V except for BranchOrSelect is a
+ // branch, negate the icmp predicate and swap the branch targets and avoid
+ // inserting an Xor to negate Cond.
+ bool Done = false;
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+ if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ Done = true;
+ }
+ if (!Done) {
+ Value *Negate = IRB.CreateXor(
+ ConstantInt::getTrue(F.getContext()), Cond);
+ MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+ }
+ }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+ unsigned I = 0;
+ DenseSet<PHINode *> TrivialPHIs;
+ for (CHRScope *Scope : CHRScopes) {
+ transformScopes(Scope, TrivialPHIs);
+ CHR_DEBUG(
+ std::ostringstream oss;
+ oss << " after transformScopes " << I++;
+ dumpIR(F, oss.str().c_str(), nullptr));
+ (void)I;
+ }
+}
+
+static void LLVM_ATTRIBUTE_UNUSED
+dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+ dbgs() << Label << " " << Scopes.size() << "\n";
+ for (CHRScope *Scope : Scopes) {
+ dbgs() << *Scope << "\n";
+ }
+}
+
+bool CHR::run() {
+ if (!shouldApply(F, PSI))
+ return false;
+
+ CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+ bool Changed = false;
+ {
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Recursively traverse the region tree and find regions that have biased
+ // branches and/or selects and create scopes.
+ SmallVector<CHRScope *, 8> AllScopes;
+ findScopes(AllScopes);
+ CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+ // Split the scopes if 1) the conditiona values of the biased
+ // branches/selects of the inner/lower scope can't be hoisted up to the
+ // outermost/uppermost scope entry, or 2) the condition values of the biased
+ // branches/selects in a scope (including subscopes) don't share at least
+ // one common value.
+ SmallVector<CHRScope *, 8> SplitScopes;
+ splitScopes(AllScopes, SplitScopes);
+ CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+ // After splitting, set the biased regions and selects of a scope (a tree
+ // root) that include those of the subscopes.
+ classifyBiasedScopes(SplitScopes);
+ CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+ // Filter out the scopes that has only one biased region or select (CHR
+ // isn't useful in such a case).
+ SmallVector<CHRScope *, 8> FilteredScopes;
+ filterScopes(SplitScopes, FilteredScopes);
+ CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+ // Set the regions to be CHR'ed and their hoist stops for each scope.
+ SmallVector<CHRScope *, 8> SetScopes;
+ setCHRRegions(FilteredScopes, SetScopes);
+ CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+ // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+ // ones. We need to apply CHR from outer to inner so that we apply CHR only
+ // to the hot path, rather than both hot and cold paths.
+ SmallVector<CHRScope *, 8> SortedScopes;
+ sortScopes(SetScopes, SortedScopes);
+ CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Apply the CHR transformation.
+ if (!SortedScopes.empty()) {
+ transformScopes(SortedScopes);
+ Changed = true;
+ }
+ }
+
+ if (Changed) {
+ CHR_DEBUG(dumpIR(F, "after", &Stats));
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
+ << ore::NV("Function", &F) << " "
+ << "Reduced the number of branches in hot paths by "
+ << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
+ << " (static) and "
+ << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
+ << " (weighted by PGO count)";
+ });
+ }
+
+ return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ProfileSummaryInfo &PSI =
+ getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
+ std::make_unique<OptimizationRemarkEmitter>(&F);
+ return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+ parseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+ Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
new file mode 100644
index 000000000000..c0353cba0b2f
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -0,0 +1,1778 @@
+//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
+/// analysis.
+///
+/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
+/// class of bugs on its own. Instead, it provides a generic dynamic data flow
+/// analysis framework to be used by clients to help detect application-specific
+/// issues within their own code.
+///
+/// The analysis is based on automatic propagation of data flow labels (also
+/// known as taint labels) through a program as it performs computation. Each
+/// byte of application memory is backed by two bytes of shadow memory which
+/// hold the label. On Linux/x86_64, memory is laid out as follows:
+///
+/// +--------------------+ 0x800000000000 (top of memory)
+/// | application memory |
+/// +--------------------+ 0x700000008000 (kAppAddr)
+/// | |
+/// | unused |
+/// | |
+/// +--------------------+ 0x200200000000 (kUnusedAddr)
+/// | union table |
+/// +--------------------+ 0x200000000000 (kUnionTableAddr)
+/// | shadow memory |
+/// +--------------------+ 0x000000010000 (kShadowAddr)
+/// | reserved by kernel |
+/// +--------------------+ 0x000000000000
+///
+/// To derive a shadow memory address from an application memory address,
+/// bits 44-46 are cleared to bring the address into the range
+/// [0x000000008000,0x100000000000). Then the address is shifted left by 1 to
+/// account for the double byte representation of shadow labels and move the
+/// address into the shadow memory range. See the function
+/// DataFlowSanitizer::getShadowAddress below.
+///
+/// For more information, please refer to the design document:
+/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+// External symbol to be used when generating the shadow address for
+// architectures with multiple VMAs. Instead of using a constant integer
+// the runtime will set the external mask based on the VMA range.
+static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
+
+// The -dfsan-preserve-alignment flag controls whether this pass assumes that
+// alignment requirements provided by the input IR are correct. For example,
+// if the input IR contains a load with alignment 8, this flag will cause
+// the shadow load to have alignment 16. This flag is disabled by default as
+// we have unfortunately encountered too much code (including Clang itself;
+// see PR14291) which performs misaligned access.
+static cl::opt<bool> ClPreserveAlignment(
+ "dfsan-preserve-alignment",
+ cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
+ cl::init(false));
+
+// The ABI list files control how shadow parameters are passed. The pass treats
+// every function labelled "uninstrumented" in the ABI list file as conforming
+// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
+// additional annotations for those functions, a call to one of those functions
+// will produce a warning message, as the labelling behaviour of the function is
+// unknown. The other supported annotations are "functional" and "discard",
+// which are described below under DataFlowSanitizer::WrapperKind.
+static cl::list<std::string> ClABIListFiles(
+ "dfsan-abilist",
+ cl::desc("File listing native ABI functions and how the pass treats them"),
+ cl::Hidden);
+
+// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
+// functions (see DataFlowSanitizer::InstrumentedABI below).
+static cl::opt<bool> ClArgsABI(
+ "dfsan-args-abi",
+ cl::desc("Use the argument ABI rather than the TLS ABI"),
+ cl::Hidden);
+
+// Controls whether the pass includes or ignores the labels of pointers in load
+// instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnLoad(
+ "dfsan-combine-pointer-labels-on-load",
+ cl::desc("Combine the label of the pointer with the label of the data when "
+ "loading from memory."),
+ cl::Hidden, cl::init(true));
+
+// Controls whether the pass includes or ignores the labels of pointers in
+// stores instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnStore(
+ "dfsan-combine-pointer-labels-on-store",
+ cl::desc("Combine the label of the pointer with the label of the data when "
+ "storing in memory."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDebugNonzeroLabels(
+ "dfsan-debug-nonzero-labels",
+ cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
+ "load or return with a nonzero label"),
+ cl::Hidden);
+
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getValueType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+namespace {
+
+class DFSanABIList {
+ std::unique_ptr<SpecialCaseList> SCL;
+
+ public:
+ DFSanABIList() = default;
+
+ void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
+
+ /// Returns whether either this function or its source file are listed in the
+ /// given category.
+ bool isIn(const Function &F, StringRef Category) const {
+ return isIn(*F.getParent(), Category) ||
+ SCL->inSection("dataflow", "fun", F.getName(), Category);
+ }
+
+ /// Returns whether this global alias is listed in the given category.
+ ///
+ /// If GA aliases a function, the alias's name is matched as a function name
+ /// would be. Similarly, aliases of globals are matched like globals.
+ bool isIn(const GlobalAlias &GA, StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getValueType()))
+ return SCL->inSection("dataflow", "fun", GA.getName(), Category);
+
+ return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
+ SCL->inSection("dataflow", "type", GetGlobalTypeString(GA),
+ Category);
+ }
+
+ /// Returns whether this module is listed in the given category.
+ bool isIn(const Module &M, StringRef Category) const {
+ return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
+ }
+};
+
+/// TransformedFunction is used to express the result of transforming one
+/// function type into another. This struct is immutable. It holds metadata
+/// useful for updating calls of the old function to the new type.
+struct TransformedFunction {
+ TransformedFunction(FunctionType* OriginalType,
+ FunctionType* TransformedType,
+ std::vector<unsigned> ArgumentIndexMapping)
+ : OriginalType(OriginalType),
+ TransformedType(TransformedType),
+ ArgumentIndexMapping(ArgumentIndexMapping) {}
+
+ // Disallow copies.
+ TransformedFunction(const TransformedFunction&) = delete;
+ TransformedFunction& operator=(const TransformedFunction&) = delete;
+
+ // Allow moves.
+ TransformedFunction(TransformedFunction&&) = default;
+ TransformedFunction& operator=(TransformedFunction&&) = default;
+
+ /// Type of the function before the transformation.
+ FunctionType *OriginalType;
+
+ /// Type of the function after the transformation.
+ FunctionType *TransformedType;
+
+ /// Transforming a function may change the position of arguments. This
+ /// member records the mapping from each argument's old position to its new
+ /// position. Argument positions are zero-indexed. If the transformation
+ /// from F to F' made the first argument of F into the third argument of F',
+ /// then ArgumentIndexMapping[0] will equal 2.
+ std::vector<unsigned> ArgumentIndexMapping;
+};
+
+/// Given function attributes from a call site for the original function,
+/// return function attributes appropriate for a call to the transformed
+/// function.
+AttributeList TransformFunctionAttributes(
+ const TransformedFunction& TransformedFunction,
+ LLVMContext& Ctx, AttributeList CallSiteAttrs) {
+
+ // Construct a vector of AttributeSet for each function argument.
+ std::vector<llvm::AttributeSet> ArgumentAttributes(
+ TransformedFunction.TransformedType->getNumParams());
+
+ // Copy attributes from the parameter of the original function to the
+ // transformed version. 'ArgumentIndexMapping' holds the mapping from
+ // old argument position to new.
+ for (unsigned i=0, ie = TransformedFunction.ArgumentIndexMapping.size();
+ i < ie; ++i) {
+ unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[i];
+ ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(i);
+ }
+
+ // Copy annotations on varargs arguments.
+ for (unsigned i = TransformedFunction.OriginalType->getNumParams(),
+ ie = CallSiteAttrs.getNumAttrSets(); i<ie; ++i) {
+ ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(i));
+ }
+
+ return AttributeList::get(
+ Ctx,
+ CallSiteAttrs.getFnAttributes(),
+ CallSiteAttrs.getRetAttributes(),
+ llvm::makeArrayRef(ArgumentAttributes));
+}
+
+class DataFlowSanitizer : public ModulePass {
+ friend struct DFSanFunction;
+ friend class DFSanVisitor;
+
+ enum {
+ ShadowWidth = 16
+ };
+
+ /// Which ABI should be used for instrumented functions?
+ enum InstrumentedABI {
+ /// Argument and return value labels are passed through additional
+ /// arguments and by modifying the return type.
+ IA_Args,
+
+ /// Argument and return value labels are passed through TLS variables
+ /// __dfsan_arg_tls and __dfsan_retval_tls.
+ IA_TLS
+ };
+
+ /// How should calls to uninstrumented functions be handled?
+ enum WrapperKind {
+ /// This function is present in an uninstrumented form but we don't know
+ /// how it should be handled. Print a warning and call the function anyway.
+ /// Don't label the return value.
+ WK_Warning,
+
+ /// This function does not write to (user-accessible) memory, and its return
+ /// value is unlabelled.
+ WK_Discard,
+
+ /// This function does not write to (user-accessible) memory, and the label
+ /// of its return value is the union of the label of its arguments.
+ WK_Functional,
+
+ /// Instead of calling the function, a custom wrapper __dfsw_F is called,
+ /// where F is the name of the function. This function may wrap the
+ /// original function or provide its own implementation. This is similar to
+ /// the IA_Args ABI, except that IA_Args uses a struct return type to
+ /// pass the return value shadow in a register, while WK_Custom uses an
+ /// extra pointer argument to return the shadow. This allows the wrapped
+ /// form of the function type to be expressed in C.
+ WK_Custom
+ };
+
+ Module *Mod;
+ LLVMContext *Ctx;
+ IntegerType *ShadowTy;
+ PointerType *ShadowPtrTy;
+ IntegerType *IntptrTy;
+ ConstantInt *ZeroShadow;
+ ConstantInt *ShadowPtrMask;
+ ConstantInt *ShadowPtrMul;
+ Constant *ArgTLS;
+ Constant *RetvalTLS;
+ void *(*GetArgTLSPtr)();
+ void *(*GetRetvalTLSPtr)();
+ FunctionType *GetArgTLSTy;
+ FunctionType *GetRetvalTLSTy;
+ Constant *GetArgTLS;
+ Constant *GetRetvalTLS;
+ Constant *ExternalShadowMask;
+ FunctionType *DFSanUnionFnTy;
+ FunctionType *DFSanUnionLoadFnTy;
+ FunctionType *DFSanUnimplementedFnTy;
+ FunctionType *DFSanSetLabelFnTy;
+ FunctionType *DFSanNonzeroLabelFnTy;
+ FunctionType *DFSanVarargWrapperFnTy;
+ FunctionCallee DFSanUnionFn;
+ FunctionCallee DFSanCheckedUnionFn;
+ FunctionCallee DFSanUnionLoadFn;
+ FunctionCallee DFSanUnimplementedFn;
+ FunctionCallee DFSanSetLabelFn;
+ FunctionCallee DFSanNonzeroLabelFn;
+ FunctionCallee DFSanVarargWrapperFn;
+ MDNode *ColdCallWeights;
+ DFSanABIList ABIList;
+ DenseMap<Value *, Function *> UnwrappedFnMap;
+ AttrBuilder ReadOnlyNoneAttrs;
+ bool DFSanRuntimeShadowMask = false;
+
+ Value *getShadowAddress(Value *Addr, Instruction *Pos);
+ bool isInstrumented(const Function *F);
+ bool isInstrumented(const GlobalAlias *GA);
+ FunctionType *getArgsFunctionType(FunctionType *T);
+ FunctionType *getTrampolineFunctionType(FunctionType *T);
+ TransformedFunction getCustomFunctionType(FunctionType *T);
+ InstrumentedABI getInstrumentedABI();
+ WrapperKind getWrapperKind(Function *F);
+ void addGlobalNamePrefix(GlobalValue *GV);
+ Function *buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT);
+ Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
+
+public:
+ static char ID;
+
+ DataFlowSanitizer(
+ const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
+ void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
+
+ bool doInitialization(Module &M) override;
+ bool runOnModule(Module &M) override;
+};
+
+struct DFSanFunction {
+ DataFlowSanitizer &DFS;
+ Function *F;
+ DominatorTree DT;
+ DataFlowSanitizer::InstrumentedABI IA;
+ bool IsNativeABI;
+ Value *ArgTLSPtr = nullptr;
+ Value *RetvalTLSPtr = nullptr;
+ AllocaInst *LabelReturnAlloca = nullptr;
+ DenseMap<Value *, Value *> ValShadowMap;
+ DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
+ std::vector<std::pair<PHINode *, PHINode *>> PHIFixups;
+ DenseSet<Instruction *> SkipInsts;
+ std::vector<Value *> NonZeroChecks;
+ bool AvoidNewBlocks;
+
+ struct CachedCombinedShadow {
+ BasicBlock *Block;
+ Value *Shadow;
+ };
+ DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow>
+ CachedCombinedShadows;
+ DenseMap<Value *, std::set<Value *>> ShadowElements;
+
+ DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
+ : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+ DT.recalculate(*F);
+ // FIXME: Need to track down the register allocator issue which causes poor
+ // performance in pathological cases with large numbers of basic blocks.
+ AvoidNewBlocks = F->size() > 1000;
+ }
+
+ Value *getArgTLSPtr();
+ Value *getArgTLS(unsigned Index, Instruction *Pos);
+ Value *getRetvalTLS();
+ Value *getShadow(Value *V);
+ void setShadow(Instruction *I, Value *Shadow);
+ Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
+ Value *combineOperandShadows(Instruction *Inst);
+ Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
+ Instruction *Pos);
+ void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow,
+ Instruction *Pos);
+};
+
+class DFSanVisitor : public InstVisitor<DFSanVisitor> {
+public:
+ DFSanFunction &DFSF;
+
+ DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+
+ const DataLayout &getDataLayout() const {
+ return DFSF.F->getParent()->getDataLayout();
+ }
+
+ void visitOperandShadowInst(Instruction &I);
+ void visitUnaryOperator(UnaryOperator &UO);
+ void visitBinaryOperator(BinaryOperator &BO);
+ void visitCastInst(CastInst &CI);
+ void visitCmpInst(CmpInst &CI);
+ void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+ void visitLoadInst(LoadInst &LI);
+ void visitStoreInst(StoreInst &SI);
+ void visitReturnInst(ReturnInst &RI);
+ void visitCallSite(CallSite CS);
+ void visitPHINode(PHINode &PN);
+ void visitExtractElementInst(ExtractElementInst &I);
+ void visitInsertElementInst(InsertElementInst &I);
+ void visitShuffleVectorInst(ShuffleVectorInst &I);
+ void visitExtractValueInst(ExtractValueInst &I);
+ void visitInsertValueInst(InsertValueInst &I);
+ void visitAllocaInst(AllocaInst &I);
+ void visitSelectInst(SelectInst &I);
+ void visitMemSetInst(MemSetInst &I);
+ void visitMemTransferInst(MemTransferInst &I);
+};
+
+} // end anonymous namespace
+
+char DataFlowSanitizer::ID;
+
+INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
+ "DataFlowSanitizer: dynamic data flow analysis.", false, false)
+
+ModulePass *
+llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
+ void *(*getArgTLS)(),
+ void *(*getRetValTLS)()) {
+ return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS);
+}
+
+DataFlowSanitizer::DataFlowSanitizer(
+ const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
+ void *(*getRetValTLS)())
+ : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+ std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
+ AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
+ ClABIListFiles.end());
+ ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles));
+}
+
+FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
+ SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
+ ArgTypes.append(T->getNumParams(), ShadowTy);
+ if (T->isVarArg())
+ ArgTypes.push_back(ShadowPtrTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ RetType = StructType::get(RetType, ShadowTy);
+ return FunctionType::get(RetType, ArgTypes, T->isVarArg());
+}
+
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
+ assert(!T->isVarArg());
+ SmallVector<Type *, 4> ArgTypes;
+ ArgTypes.push_back(T->getPointerTo());
+ ArgTypes.append(T->param_begin(), T->param_end());
+ ArgTypes.append(T->getNumParams(), ShadowTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
+TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+ SmallVector<Type *, 4> ArgTypes;
+
+ // Some parameters of the custom function being constructed are
+ // parameters of T. Record the mapping from parameters of T to
+ // parameters of the custom function, so that parameter attributes
+ // at call sites can be updated.
+ std::vector<unsigned> ArgumentIndexMapping;
+ for (unsigned i = 0, ie = T->getNumParams(); i != ie; ++i) {
+ Type* param_type = T->getParamType(i);
+ FunctionType *FT;
+ if (isa<PointerType>(param_type) && (FT = dyn_cast<FunctionType>(
+ cast<PointerType>(param_type)->getElementType()))) {
+ ArgumentIndexMapping.push_back(ArgTypes.size());
+ ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+ ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+ } else {
+ ArgumentIndexMapping.push_back(ArgTypes.size());
+ ArgTypes.push_back(param_type);
+ }
+ }
+ for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+ ArgTypes.push_back(ShadowTy);
+ if (T->isVarArg())
+ ArgTypes.push_back(ShadowPtrTy);
+ Type *RetType = T->getReturnType();
+ if (!RetType->isVoidTy())
+ ArgTypes.push_back(ShadowPtrTy);
+ return TransformedFunction(
+ T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
+ ArgumentIndexMapping);
+}
+
+bool DataFlowSanitizer::doInitialization(Module &M) {
+ Triple TargetTriple(M.getTargetTriple());
+ bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+ bool IsMIPS64 = TargetTriple.isMIPS64();
+ bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 ||
+ TargetTriple.getArch() == Triple::aarch64_be;
+
+ const DataLayout &DL = M.getDataLayout();
+
+ Mod = &M;
+ Ctx = &M.getContext();
+ ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
+ ShadowPtrTy = PointerType::getUnqual(ShadowTy);
+ IntptrTy = DL.getIntPtrType(*Ctx);
+ ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
+ ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
+ if (IsX86_64)
+ ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
+ else if (IsMIPS64)
+ ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
+ // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
+ else if (IsAArch64)
+ DFSanRuntimeShadowMask = true;
+ else
+ report_fatal_error("unsupported triple");
+
+ Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
+ DFSanUnionFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
+ Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
+ DFSanUnionLoadFnTy =
+ FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+ DFSanUnimplementedFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+ Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
+ DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
+ DFSanSetLabelArgs, /*isVarArg=*/false);
+ DFSanNonzeroLabelFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
+ DFSanVarargWrapperFnTy = FunctionType::get(
+ Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+
+ if (GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = nullptr;
+ GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
+ GetArgTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
+ PointerType::getUnqual(GetArgTLSTy));
+ }
+ if (GetRetvalTLSPtr) {
+ RetvalTLS = nullptr;
+ GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
+ GetRetvalTLS = ConstantExpr::getIntToPtr(
+ ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
+ PointerType::getUnqual(GetRetvalTLSTy));
+ }
+
+ ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+ return true;
+}
+
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
+ return !ABIList.isIn(*F, "uninstrumented");
+}
+
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+ return !ABIList.isIn(*GA, "uninstrumented");
+}
+
+DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
+ return ClArgsABI ? IA_Args : IA_TLS;
+}
+
+DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
+ if (ABIList.isIn(*F, "functional"))
+ return WK_Functional;
+ if (ABIList.isIn(*F, "discard"))
+ return WK_Discard;
+ if (ABIList.isIn(*F, "custom"))
+ return WK_Custom;
+
+ return WK_Warning;
+}
+
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+ std::string GVName = GV->getName(), Prefix = "dfs$";
+ GV->setName(Prefix + GVName);
+
+ // Try to change the name of the function in module inline asm. We only do
+ // this for specific asm directives, currently only ".symver", to try to avoid
+ // corrupting asm which happens to contain the symbol name as a substring.
+ // Note that the substitution for .symver assumes that the versioned symbol
+ // also has an instrumented name.
+ std::string Asm = GV->getParent()->getModuleInlineAsm();
+ std::string SearchStr = ".symver " + GVName + ",";
+ size_t Pos = Asm.find(SearchStr);
+ if (Pos != std::string::npos) {
+ Asm.replace(Pos, SearchStr.size(),
+ ".symver " + Prefix + GVName + "," + Prefix);
+ GV->getParent()->setModuleInlineAsm(Asm);
+ }
+}
+
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+ GlobalValue::LinkageTypes NewFLink,
+ FunctionType *NewFT) {
+ FunctionType *FT = F->getFunctionType();
+ Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
+ NewFName, F->getParent());
+ NewF->copyAttributesFrom(F);
+ NewF->removeAttributes(
+ AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+ if (F->isVarArg()) {
+ NewF->removeAttributes(AttributeList::FunctionIndex,
+ AttrBuilder().addAttribute("split-stack"));
+ CallInst::Create(DFSanVarargWrapperFn,
+ IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
+ BB);
+ new UnreachableInst(*Ctx, BB);
+ } else {
+ std::vector<Value *> Args;
+ unsigned n = FT->getNumParams();
+ for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+ Args.push_back(&*ai);
+ CallInst *CI = CallInst::Create(F, Args, "", BB);
+ if (FT->getReturnType()->isVoidTy())
+ ReturnInst::Create(*Ctx, BB);
+ else
+ ReturnInst::Create(*Ctx, CI, BB);
+ }
+
+ return NewF;
+}
+
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+ StringRef FName) {
+ FunctionType *FTT = getTrampolineFunctionType(FT);
+ FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
+ Function *F = dyn_cast<Function>(C.getCallee());
+ if (F && F->isDeclaration()) {
+ F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ std::vector<Value *> Args;
+ Function::arg_iterator AI = F->arg_begin(); ++AI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+ Args.push_back(&*AI);
+ CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
+ ReturnInst *RI;
+ if (FT->getReturnType()->isVoidTy())
+ RI = ReturnInst::Create(*Ctx, BB);
+ else
+ RI = ReturnInst::Create(*Ctx, CI, BB);
+
+ DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+ Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
+ for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
+ DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
+ DFSanVisitor(DFSF).visitCallInst(*CI);
+ if (!FT->getReturnType()->isVoidTy())
+ new StoreInst(DFSF.getShadow(RI->getReturnValue()),
+ &*std::prev(F->arg_end()), RI);
+ }
+
+ return cast<Constant>(C.getCallee());
+}
+
+bool DataFlowSanitizer::runOnModule(Module &M) {
+ if (ABIList.isIn(M, "skip"))
+ return false;
+
+ if (!GetArgTLSPtr) {
+ Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+ ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+ if (!GetRetvalTLSPtr) {
+ RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS))
+ G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ }
+
+ ExternalShadowMask =
+ Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
+
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadNone);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+ DFSanUnionFn =
+ Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
+ }
+
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadNone);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
+ DFSanCheckedUnionFn =
+ Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
+ }
+ {
+ AttributeList AL;
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::ReadOnly);
+ AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
+ Attribute::ZExt);
+ DFSanUnionLoadFn =
+ Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
+ }
+ DFSanUnimplementedFn =
+ Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+ {
+ AttributeList AL;
+ AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
+ DFSanSetLabelFn =
+ Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
+ }
+ DFSanNonzeroLabelFn =
+ Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
+ DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
+ DFSanVarargWrapperFnTy);
+
+ std::vector<Function *> FnsToInstrument;
+ SmallPtrSet<Function *, 2> FnsWithNativeABI;
+ for (Function &i : M) {
+ if (!i.isIntrinsic() &&
+ &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
+ &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
+ FnsToInstrument.push_back(&i);
+ }
+
+ // Give function aliases prefixes when necessary, and build wrappers where the
+ // instrumentedness is inconsistent.
+ for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+ GlobalAlias *GA = &*i;
+ ++i;
+ // Don't stop on weak. We assume people aren't playing games with the
+ // instrumentedness of overridden weak aliases.
+ if (auto F = dyn_cast<Function>(GA->getBaseObject())) {
+ bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+ if (GAInst && FInst) {
+ addGlobalNamePrefix(GA);
+ } else if (GAInst != FInst) {
+ // Non-instrumented alias of an instrumented function, or vice versa.
+ // Replace the alias with a native-ABI wrapper of the aliasee. The pass
+ // below will take care of instrumenting it.
+ Function *NewF =
+ buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+ GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
+ NewF->takeName(GA);
+ GA->eraseFromParent();
+ FnsToInstrument.push_back(NewF);
+ }
+ }
+ }
+
+ ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
+
+ // First, change the ABI of every function in the module. ABI-listed
+ // functions keep their original ABI and get a wrapper function.
+ for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+ e = FnsToInstrument.end();
+ i != e; ++i) {
+ Function &F = **i;
+ FunctionType *FT = F.getFunctionType();
+
+ bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+ FT->getReturnType()->isVoidTy());
+
+ if (isInstrumented(&F)) {
+ // Instrumented functions get a 'dfs$' prefix. This allows us to more
+ // easily identify cases of mismatching ABIs.
+ if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
+ FunctionType *NewFT = getArgsFunctionType(FT);
+ Function *NewF = Function::Create(NewFT, F.getLinkage(),
+ F.getAddressSpace(), "", &M);
+ NewF->copyAttributesFrom(&F);
+ NewF->removeAttributes(
+ AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
+ for (Function::arg_iterator FArg = F.arg_begin(),
+ NewFArg = NewF->arg_begin(),
+ FArgEnd = F.arg_end();
+ FArg != FArgEnd; ++FArg, ++NewFArg) {
+ FArg->replaceAllUsesWith(&*NewFArg);
+ }
+ NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
+
+ for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
+ UI != UE;) {
+ BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
+ ++UI;
+ if (BA) {
+ BA->replaceAllUsesWith(
+ BlockAddress::get(NewF, BA->getBasicBlock()));
+ delete BA;
+ }
+ }
+ F.replaceAllUsesWith(
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
+ NewF->takeName(&F);
+ F.eraseFromParent();
+ *i = NewF;
+ addGlobalNamePrefix(NewF);
+ } else {
+ addGlobalNamePrefix(&F);
+ }
+ } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
+ // Build a wrapper function for F. The wrapper simply calls F, and is
+ // added to FnsToInstrument so that any instrumentation according to its
+ // WrapperKind is done in the second pass below.
+ FunctionType *NewFT = getInstrumentedABI() == IA_Args
+ ? getArgsFunctionType(FT)
+ : FT;
+
+ // If the function being wrapped has local linkage, then preserve the
+ // function's linkage in the wrapper function.
+ GlobalValue::LinkageTypes wrapperLinkage =
+ F.hasLocalLinkage()
+ ? F.getLinkage()
+ : GlobalValue::LinkOnceODRLinkage;
+
+ Function *NewF = buildWrapperFunction(
+ &F, std::string("dfsw$") + std::string(F.getName()),
+ wrapperLinkage, NewFT);
+ if (getInstrumentedABI() == IA_TLS)
+ NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
+
+ Value *WrappedFnCst =
+ ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+ F.replaceAllUsesWith(WrappedFnCst);
+
+ UnwrappedFnMap[WrappedFnCst] = &F;
+ *i = NewF;
+
+ if (!F.isDeclaration()) {
+ // This function is probably defining an interposition of an
+ // uninstrumented function and hence needs to keep the original ABI.
+ // But any functions it may call need to use the instrumented ABI, so
+ // we instrument it in a mode which preserves the original ABI.
+ FnsWithNativeABI.insert(&F);
+
+ // This code needs to rebuild the iterators, as they may be invalidated
+ // by the push_back, taking care that the new range does not include
+ // any functions added by this code.
+ size_t N = i - FnsToInstrument.begin(),
+ Count = e - FnsToInstrument.begin();
+ FnsToInstrument.push_back(&F);
+ i = FnsToInstrument.begin() + N;
+ e = FnsToInstrument.begin() + Count;
+ }
+ // Hopefully, nobody will try to indirectly call a vararg
+ // function... yet.
+ } else if (FT->isVarArg()) {
+ UnwrappedFnMap[&F] = &F;
+ *i = nullptr;
+ }
+ }
+
+ for (Function *i : FnsToInstrument) {
+ if (!i || i->isDeclaration())
+ continue;
+
+ removeUnreachableBlocks(*i);
+
+ DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i));
+
+ // DFSanVisitor may create new basic blocks, which confuses df_iterator.
+ // Build a copy of the list before iterating over it.
+ SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock()));
+
+ for (BasicBlock *i : BBList) {
+ Instruction *Inst = &i->front();
+ while (true) {
+ // DFSanVisitor may split the current basic block, changing the current
+ // instruction's next pointer and moving the next instruction to the
+ // tail block from which we should continue.
+ Instruction *Next = Inst->getNextNode();
+ // DFSanVisitor may delete Inst, so keep track of whether it was a
+ // terminator.
+ bool IsTerminator = Inst->isTerminator();
+ if (!DFSF.SkipInsts.count(Inst))
+ DFSanVisitor(DFSF).visit(Inst);
+ if (IsTerminator)
+ break;
+ Inst = Next;
+ }
+ }
+
+ // We will not necessarily be able to compute the shadow for every phi node
+ // until we have visited every block. Therefore, the code that handles phi
+ // nodes adds them to the PHIFixups list so that they can be properly
+ // handled here.
+ for (std::vector<std::pair<PHINode *, PHINode *>>::iterator
+ i = DFSF.PHIFixups.begin(),
+ e = DFSF.PHIFixups.end();
+ i != e; ++i) {
+ for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
+ ++val) {
+ i->second->setIncomingValue(
+ val, DFSF.getShadow(i->first->getIncomingValue(val)));
+ }
+ }
+
+ // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
+ // places (i.e. instructions in basic blocks we haven't even begun visiting
+ // yet). To make our life easier, do this work in a pass after the main
+ // instrumentation.
+ if (ClDebugNonzeroLabels) {
+ for (Value *V : DFSF.NonZeroChecks) {
+ Instruction *Pos;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ Pos = I->getNextNode();
+ else
+ Pos = &DFSF.F->getEntryBlock().front();
+ while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
+ Pos = Pos->getNextNode();
+ IRBuilder<> IRB(Pos);
+ Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow);
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
+ IRBuilder<> ThenIRB(BI);
+ ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
+ }
+ }
+ }
+
+ return false;
+}
+
+Value *DFSanFunction::getArgTLSPtr() {
+ if (ArgTLSPtr)
+ return ArgTLSPtr;
+ if (DFS.ArgTLS)
+ return ArgTLSPtr = DFS.ArgTLS;
+
+ IRBuilder<> IRB(&F->getEntryBlock().front());
+ return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
+}
+
+Value *DFSanFunction::getRetvalTLS() {
+ if (RetvalTLSPtr)
+ return RetvalTLSPtr;
+ if (DFS.RetvalTLS)
+ return RetvalTLSPtr = DFS.RetvalTLS;
+
+ IRBuilder<> IRB(&F->getEntryBlock().front());
+ return RetvalTLSPtr =
+ IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
+}
+
+Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
+ getArgTLSPtr(), 0, Idx);
+}
+
+Value *DFSanFunction::getShadow(Value *V) {
+ if (!isa<Argument>(V) && !isa<Instruction>(V))
+ return DFS.ZeroShadow;
+ Value *&Shadow = ValShadowMap[V];
+ if (!Shadow) {
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ if (IsNativeABI)
+ return DFS.ZeroShadow;
+ switch (IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *ArgTLSPtr = getArgTLSPtr();
+ Instruction *ArgTLSPos =
+ DFS.ArgTLS ? &*F->getEntryBlock().begin()
+ : cast<Instruction>(ArgTLSPtr)->getNextNode();
+ IRBuilder<> IRB(ArgTLSPos);
+ Shadow =
+ IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
+ Function::arg_iterator i = F->arg_begin();
+ while (ArgIdx--)
+ ++i;
+ Shadow = &*i;
+ assert(Shadow->getType() == DFS.ShadowTy);
+ break;
+ }
+ }
+ NonZeroChecks.push_back(Shadow);
+ } else {
+ Shadow = DFS.ZeroShadow;
+ }
+ }
+ return Shadow;
+}
+
+void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
+ assert(!ValShadowMap.count(I));
+ assert(Shadow->getType() == DFS.ShadowTy);
+ ValShadowMap[I] = Shadow;
+}
+
+Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
+ assert(Addr != RetvalTLS && "Reinstrumenting?");
+ IRBuilder<> IRB(Pos);
+ Value *ShadowPtrMaskValue;
+ if (DFSanRuntimeShadowMask)
+ ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
+ else
+ ShadowPtrMaskValue = ShadowPtrMask;
+ return IRB.CreateIntToPtr(
+ IRB.CreateMul(
+ IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
+ IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
+ ShadowPtrMul),
+ ShadowPtrTy);
+}
+
+// Generates IR to compute the union of the two given shadows, inserting it
+// before Pos. Returns the computed union Value.
+Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
+ if (V1 == DFS.ZeroShadow)
+ return V2;
+ if (V2 == DFS.ZeroShadow)
+ return V1;
+ if (V1 == V2)
+ return V1;
+
+ auto V1Elems = ShadowElements.find(V1);
+ auto V2Elems = ShadowElements.find(V2);
+ if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
+ if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
+ V2Elems->second.begin(), V2Elems->second.end())) {
+ return V1;
+ } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
+ V1Elems->second.begin(), V1Elems->second.end())) {
+ return V2;
+ }
+ } else if (V1Elems != ShadowElements.end()) {
+ if (V1Elems->second.count(V2))
+ return V1;
+ } else if (V2Elems != ShadowElements.end()) {
+ if (V2Elems->second.count(V1))
+ return V2;
+ }
+
+ auto Key = std::make_pair(V1, V2);
+ if (V1 > V2)
+ std::swap(Key.first, Key.second);
+ CachedCombinedShadow &CCS = CachedCombinedShadows[Key];
+ if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
+ return CCS.Shadow;
+
+ IRBuilder<> IRB(Pos);
+ if (AvoidNewBlocks) {
+ CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
+ Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ Call->addParamAttr(0, Attribute::ZExt);
+ Call->addParamAttr(1, Attribute::ZExt);
+
+ CCS.Block = Pos->getParent();
+ CCS.Shadow = Call;
+ } else {
+ BasicBlock *Head = Pos->getParent();
+ Value *Ne = IRB.CreateICmpNE(V1, V2);
+ BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
+ IRBuilder<> ThenIRB(BI);
+ CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
+ Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ Call->addParamAttr(0, Attribute::ZExt);
+ Call->addParamAttr(1, Attribute::ZExt);
+
+ BasicBlock *Tail = BI->getSuccessor(0);
+ PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ Phi->addIncoming(Call, Call->getParent());
+ Phi->addIncoming(V1, Head);
+
+ CCS.Block = Tail;
+ CCS.Shadow = Phi;
+ }
+
+ std::set<Value *> UnionElems;
+ if (V1Elems != ShadowElements.end()) {
+ UnionElems = V1Elems->second;
+ } else {
+ UnionElems.insert(V1);
+ }
+ if (V2Elems != ShadowElements.end()) {
+ UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
+ } else {
+ UnionElems.insert(V2);
+ }
+ ShadowElements[CCS.Shadow] = std::move(UnionElems);
+
+ return CCS.Shadow;
+}
+
+// A convenience function which folds the shadows of each of the operands
+// of the provided instruction Inst, inserting the IR before Inst. Returns
+// the computed union Value.
+Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
+ if (Inst->getNumOperands() == 0)
+ return DFS.ZeroShadow;
+
+ Value *Shadow = getShadow(Inst->getOperand(0));
+ for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
+ Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+ }
+ return Shadow;
+}
+
+void DFSanVisitor::visitOperandShadowInst(Instruction &I) {
+ Value *CombinedShadow = DFSF.combineOperandShadows(&I);
+ DFSF.setShadow(&I, CombinedShadow);
+}
+
+// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
+// Addr has alignment Align, and take the union of each of those shadows.
+Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ const auto i = AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ return IRB.CreateLoad(DFS.ShadowTy, i->second);
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ SmallVector<const Value *, 2> Objs;
+ GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
+ bool AllConstants = true;
+ for (const Value *Obj : Objs) {
+ if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
+ continue;
+ if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
+ continue;
+
+ AllConstants = false;
+ break;
+ }
+ if (AllConstants)
+ return DFS.ZeroShadow;
+
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ switch (Size) {
+ case 0:
+ return DFS.ZeroShadow;
+ case 1: {
+ LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
+ LI->setAlignment(MaybeAlign(ShadowAlign));
+ return LI;
+ }
+ case 2: {
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
+ ConstantInt::get(DFS.IntptrTy, 1));
+ return combineShadows(
+ IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
+ IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
+ }
+ }
+ if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidth) == 0) {
+ // Fast path for the common case where each byte has identical shadow: load
+ // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
+ // shadow is non-equal.
+ BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ IRBuilder<> FallbackIRB(FallbackBB);
+ CallInst *FallbackCall = FallbackIRB.CreateCall(
+ DFS.DFSanUnionLoadFn,
+ {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
+ FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+
+ // Compare each of the shadows stored in the loaded 64 bits to each other,
+ // by computing (WideShadow rotl ShadowWidth) == WideShadow.
+ IRBuilder<> IRB(Pos);
+ Value *WideAddr =
+ IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+ Value *WideShadow =
+ IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
+ Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
+ Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
+ Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
+ Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
+ Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
+
+ BasicBlock *Head = Pos->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
+
+ if (DomTreeNode *OldNode = DT.getNode(Head)) {
+ std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
+
+ DomTreeNode *NewNode = DT.addNewBlock(Tail, Head);
+ for (auto Child : Children)
+ DT.changeImmediateDominator(Child, NewNode);
+ }
+
+ // In the following code LastBr will refer to the previous basic block's
+ // conditional branch instruction, whose true successor is fixed up to point
+ // to the next block during the loop below or to the tail after the final
+ // iteration.
+ BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
+ ReplaceInstWithInst(Head->getTerminator(), LastBr);
+ DT.addNewBlock(FallbackBB, Head);
+
+ for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
+ Ofs += 64 / DFS.ShadowWidth) {
+ BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+ DT.addNewBlock(NextBB, LastBr->getParent());
+ IRBuilder<> NextIRB(NextBB);
+ WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+ ConstantInt::get(DFS.IntptrTy, 1));
+ Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
+ WideAddr, ShadowAlign);
+ ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
+ LastBr->setSuccessor(0, NextBB);
+ LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
+ }
+
+ LastBr->setSuccessor(0, Tail);
+ FallbackIRB.CreateBr(Tail);
+ PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+ Shadow->addIncoming(FallbackCall, FallbackBB);
+ Shadow->addIncoming(TruncShadow, LastBr->getParent());
+ return Shadow;
+ }
+
+ IRBuilder<> IRB(Pos);
+ CallInst *FallbackCall = IRB.CreateCall(
+ DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
+ FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ return FallbackCall;
+}
+
+void DFSanVisitor::visitLoadInst(LoadInst &LI) {
+ auto &DL = LI.getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ if (Size == 0) {
+ DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
+ return;
+ }
+
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = LI.getAlignment();
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(LI.getType());
+ } else {
+ Align = 1;
+ }
+ IRBuilder<> IRB(&LI);
+ Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+ if (ClCombinePointerLabelsOnLoad) {
+ Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+ Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI);
+ }
+ if (Shadow != DFSF.DFS.ZeroShadow)
+ DFSF.NonZeroChecks.push_back(Shadow);
+
+ DFSF.setShadow(&LI, Shadow);
+}
+
+void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
+ Value *Shadow, Instruction *Pos) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+ const auto i = AllocaShadowMap.find(AI);
+ if (i != AllocaShadowMap.end()) {
+ IRBuilder<> IRB(Pos);
+ IRB.CreateStore(Shadow, i->second);
+ return;
+ }
+ }
+
+ uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+ IRBuilder<> IRB(Pos);
+ Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+ if (Shadow == DFS.ZeroShadow) {
+ IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth);
+ Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
+ Value *ExtShadowAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
+ IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+ return;
+ }
+
+ const unsigned ShadowVecSize = 128 / DFS.ShadowWidth;
+ uint64_t Offset = 0;
+ if (Size >= ShadowVecSize) {
+ VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize);
+ Value *ShadowVec = UndefValue::get(ShadowVecTy);
+ for (unsigned i = 0; i != ShadowVecSize; ++i) {
+ ShadowVec = IRB.CreateInsertElement(
+ ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
+ }
+ Value *ShadowVecAddr =
+ IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
+ do {
+ Value *CurShadowVecAddr =
+ IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
+ IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
+ Size -= ShadowVecSize;
+ ++Offset;
+ } while (Size >= ShadowVecSize);
+ Offset *= ShadowVecSize;
+ }
+ while (Size > 0) {
+ Value *CurShadowAddr =
+ IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
+ IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
+ --Size;
+ ++Offset;
+ }
+}
+
+void DFSanVisitor::visitStoreInst(StoreInst &SI) {
+ auto &DL = SI.getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
+ if (Size == 0)
+ return;
+
+ uint64_t Align;
+ if (ClPreserveAlignment) {
+ Align = SI.getAlignment();
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(SI.getValueOperand()->getType());
+ } else {
+ Align = 1;
+ }
+
+ Value* Shadow = DFSF.getShadow(SI.getValueOperand());
+ if (ClCombinePointerLabelsOnStore) {
+ Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
+ Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
+ }
+ DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
+}
+
+void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
+ visitOperandShadowInst(UO);
+}
+
+void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
+ visitOperandShadowInst(BO);
+}
+
+void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+ visitOperandShadowInst(GEPI);
+}
+
+void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
+ visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
+ bool AllLoadsStores = true;
+ for (User *U : I.users()) {
+ if (isa<LoadInst>(U))
+ continue;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getPointerOperand() == &I)
+ continue;
+ }
+
+ AllLoadsStores = false;
+ break;
+ }
+ if (AllLoadsStores) {
+ IRBuilder<> IRB(&I);
+ DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
+ }
+ DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
+}
+
+void DFSanVisitor::visitSelectInst(SelectInst &I) {
+ Value *CondShadow = DFSF.getShadow(I.getCondition());
+ Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
+ Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
+
+ if (isa<VectorType>(I.getCondition()->getType())) {
+ DFSF.setShadow(
+ &I,
+ DFSF.combineShadows(
+ CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I));
+ } else {
+ Value *ShadowSel;
+ if (TrueShadow == FalseShadow) {
+ ShadowSel = TrueShadow;
+ } else {
+ ShadowSel =
+ SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
+ }
+ DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I));
+ }
+}
+
+void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *ValShadow = DFSF.getShadow(I.getValue());
+ IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
+ {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(
+ *DFSF.DFS.Ctx)),
+ IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
+}
+
+void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+ Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
+ Value *LenShadow = IRB.CreateMul(
+ I.getLength(),
+ ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
+ Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
+ DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
+ SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
+ auto *MTI = cast<MemTransferInst>(
+ IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
+ {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
+ if (ClPreserveAlignment) {
+ MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8));
+ MTI->setSourceAlignment(I.getSourceAlignment() * (DFSF.DFS.ShadowWidth / 8));
+ } else {
+ MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8);
+ MTI->setSourceAlignment(DFSF.DFS.ShadowWidth / 8);
+ }
+}
+
+void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
+ if (!DFSF.IsNativeABI && RI.getReturnValue()) {
+ switch (DFSF.IA) {
+ case DataFlowSanitizer::IA_TLS: {
+ Value *S = DFSF.getShadow(RI.getReturnValue());
+ IRBuilder<> IRB(&RI);
+ IRB.CreateStore(S, DFSF.getRetvalTLS());
+ break;
+ }
+ case DataFlowSanitizer::IA_Args: {
+ IRBuilder<> IRB(&RI);
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ Value *InsVal =
+ IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
+ Value *InsShadow =
+ IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
+ RI.setOperand(0, InsShadow);
+ break;
+ }
+ }
+ }
+}
+
+void DFSanVisitor::visitCallSite(CallSite CS) {
+ Function *F = CS.getCalledFunction();
+ if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) {
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ }
+
+ // Calls to this function are synthesized in wrappers, and we shouldn't
+ // instrument them.
+ if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
+ return;
+
+ IRBuilder<> IRB(CS.getInstruction());
+
+ DenseMap<Value *, Function *>::iterator i =
+ DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
+ if (i != DFSF.DFS.UnwrappedFnMap.end()) {
+ Function *F = i->second;
+ switch (DFSF.DFS.getWrapperKind(F)) {
+ case DataFlowSanitizer::WK_Warning:
+ CS.setCalledFunction(F);
+ IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
+ IRB.CreateGlobalStringPtr(F->getName()));
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ case DataFlowSanitizer::WK_Discard:
+ CS.setCalledFunction(F);
+ DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+ return;
+ case DataFlowSanitizer::WK_Functional:
+ CS.setCalledFunction(F);
+ visitOperandShadowInst(*CS.getInstruction());
+ return;
+ case DataFlowSanitizer::WK_Custom:
+ // Don't try to handle invokes of custom functions, it's too complicated.
+ // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
+ // wrapper.
+ if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+ FunctionType *FT = F->getFunctionType();
+ TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
+ std::string CustomFName = "__dfsw_";
+ CustomFName += F->getName();
+ FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
+ CustomFName, CustomFn.TransformedType);
+ if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
+ CustomFn->copyAttributesFrom(F);
+
+ // Custom functions returning non-void will write to the return label.
+ if (!FT->getReturnType()->isVoidTy()) {
+ CustomFn->removeAttributes(AttributeList::FunctionIndex,
+ DFSF.DFS.ReadOnlyNoneAttrs);
+ }
+ }
+
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+ Type *T = (*i)->getType();
+ FunctionType *ParamFT;
+ if (isa<PointerType>(T) &&
+ (ParamFT = dyn_cast<FunctionType>(
+ cast<PointerType>(T)->getElementType()))) {
+ std::string TName = "dfst";
+ TName += utostr(FT->getNumParams() - n);
+ TName += "$";
+ TName += F->getName();
+ Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+ Args.push_back(T);
+ Args.push_back(
+ IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+ } else {
+ Args.push_back(*i);
+ }
+ }
+
+ i = CS.arg_begin();
+ const unsigned ShadowArgStart = Args.size();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (FT->isVarArg()) {
+ auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
+ CS.arg_size() - FT->getNumParams());
+ auto *LabelVAAlloca = new AllocaInst(
+ LabelVATy, getDataLayout().getAllocaAddrSpace(),
+ "labelva", &DFSF.F->getEntryBlock().front());
+
+ for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
+ auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
+ IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
+ }
+
+ Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
+ }
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ if (!DFSF.LabelReturnAlloca) {
+ DFSF.LabelReturnAlloca =
+ new AllocaInst(DFSF.DFS.ShadowTy,
+ getDataLayout().getAllocaAddrSpace(),
+ "labelreturn", &DFSF.F->getEntryBlock().front());
+ }
+ Args.push_back(DFSF.LabelReturnAlloca);
+ }
+
+ for (i = CS.arg_begin() + FT->getNumParams(); i != CS.arg_end(); ++i)
+ Args.push_back(*i);
+
+ CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
+ CustomCI->setCallingConv(CI->getCallingConv());
+ CustomCI->setAttributes(TransformFunctionAttributes(CustomFn,
+ CI->getContext(), CI->getAttributes()));
+
+ // Update the parameter attributes of the custom call instruction to
+ // zero extend the shadow parameters. This is required for targets
+ // which consider ShadowTy an illegal type.
+ for (unsigned n = 0; n < FT->getNumParams(); n++) {
+ const unsigned ArgNo = ShadowArgStart + n;
+ if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy)
+ CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
+ }
+
+ if (!FT->getReturnType()->isVoidTy()) {
+ LoadInst *LabelLoad =
+ IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
+ DFSF.setShadow(CustomCI, LabelLoad);
+ }
+
+ CI->replaceAllUsesWith(CustomCI);
+ CI->eraseFromParent();
+ return;
+ }
+ break;
+ }
+ }
+
+ FunctionType *FT = cast<FunctionType>(
+ CS.getCalledValue()->getType()->getPointerElementType());
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
+ IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
+ DFSF.getArgTLS(i, CS.getInstruction()));
+ }
+ }
+
+ Instruction *Next = nullptr;
+ if (!CS.getType()->isVoidTy()) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ if (II->getNormalDest()->getSinglePredecessor()) {
+ Next = &II->getNormalDest()->front();
+ } else {
+ BasicBlock *NewBB =
+ SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
+ Next = &NewBB->front();
+ }
+ } else {
+ assert(CS->getIterator() != CS->getParent()->end());
+ Next = CS->getNextNode();
+ }
+
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+ IRBuilder<> NextIRB(Next);
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(CS.getInstruction(), LI);
+ DFSF.NonZeroChecks.push_back(LI);
+ }
+ }
+
+ // Do all instrumentation for IA_Args down here to defer tampering with the
+ // CFG in a way that SplitEdge may be able to detect.
+ if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+ FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
+ Value *Func =
+ IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
+ std::vector<Value *> Args;
+
+ CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(*i);
+
+ i = CS.arg_begin();
+ for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+ Args.push_back(DFSF.getShadow(*i));
+
+ if (FT->isVarArg()) {
+ unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
+ ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
+ AllocaInst *VarArgShadow =
+ new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
+ "", &DFSF.F->getEntryBlock().front());
+ Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
+ for (unsigned n = 0; i != e; ++i, ++n) {
+ IRB.CreateStore(
+ DFSF.getShadow(*i),
+ IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
+ Args.push_back(*i);
+ }
+ }
+
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+ NewCS = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
+ II->getUnwindDest(), Args);
+ } else {
+ NewCS = IRB.CreateCall(NewFT, Func, Args);
+ }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(CS.getAttributes().removeAttributes(
+ *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType())));
+
+ if (Next) {
+ ExtractValueInst *ExVal =
+ ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next);
+ DFSF.SkipInsts.insert(ExVal);
+ ExtractValueInst *ExShadow =
+ ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
+ DFSF.SkipInsts.insert(ExShadow);
+ DFSF.setShadow(ExVal, ExShadow);
+ DFSF.NonZeroChecks.push_back(ExShadow);
+
+ CS.getInstruction()->replaceAllUsesWith(ExVal);
+ }
+
+ CS.getInstruction()->eraseFromParent();
+ }
+}
+
+void DFSanVisitor::visitPHINode(PHINode &PN) {
+ PHINode *ShadowPN =
+ PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
+
+ // Give the shadow phi node valid predecessors to fool SplitEdge into working.
+ Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
+ for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
+ ++i) {
+ ShadowPN->addIncoming(UndefShadow, *i);
+ }
+
+ DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
+ DFSF.setShadow(&PN, ShadowPN);
+}
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
new file mode 100644
index 000000000000..ac6082441eae
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -0,0 +1,1229 @@
+//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements GCOV-style profiling. When this pass is run it emits
+// "gcno" files next to the existing source, and instruments the code that runs
+// to records the edges between blocks that run and emit a complementary "gcda"
+// file on exit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+using namespace llvm;
+
+#define DEBUG_TYPE "insert-gcov-profiling"
+
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body",
+ cl::init(false), cl::Hidden);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+ Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
+namespace {
+class GCOVFunction;
+
+class GCOVProfiler {
+public:
+ GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
+ GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
+ }
+ bool
+ runOnModule(Module &M,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
+
+private:
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
+
+ // Modify the program to track transitions along edges and call into the
+ // profiling runtime to emit .gcda files when run.
+ bool emitProfileArcs();
+
+ bool isFunctionInstrumented(const Function &F);
+ std::vector<Regex> createRegexesFromString(StringRef RegexesStr);
+ static bool doesFilenameMatchARegex(StringRef Filename,
+ std::vector<Regex> &Regexes);
+
+ // Get pointers to the functions in the runtime library.
+ FunctionCallee getStartFileFunc(const TargetLibraryInfo *TLI);
+ FunctionCallee getEmitFunctionFunc(const TargetLibraryInfo *TLI);
+ FunctionCallee getEmitArcsFunc(const TargetLibraryInfo *TLI);
+ FunctionCallee getSummaryInfoFunc();
+ FunctionCallee getEndFileFunc();
+
+ // Add the function to write out all our counters to the global destructor
+ // list.
+ Function *
+ insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
+
+ void AddFlushBeforeForkAndExec();
+
+ enum class GCovFileType { GCNO, GCDA };
+ std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
+
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
+ // Checksum, produced by hash of EdgeDestinations
+ SmallVector<uint32_t, 4> FileChecksums;
+
+ Module *M;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
+ LLVMContext *Ctx;
+ SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+ std::vector<Regex> FilterRe;
+ std::vector<Regex> ExcludeRe;
+ StringMap<bool> InstrumentedFiles;
+};
+
+class GCOVProfilerLegacyPass : public ModulePass {
+public:
+ static char ID;
+ GCOVProfilerLegacyPass()
+ : GCOVProfilerLegacyPass(GCOVOptions::getDefault()) {}
+ GCOVProfilerLegacyPass(const GCOVOptions &Opts)
+ : ModulePass(ID), Profiler(Opts) {
+ initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ StringRef getPassName() const override { return "GCOV Profiler"; }
+
+ bool runOnModule(Module &M) override {
+ return Profiler.runOnModule(M, [this](Function &F) -> TargetLibraryInfo & {
+ return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ });
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+private:
+ GCOVProfiler Profiler;
+};
+}
+
+char GCOVProfilerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ GCOVProfilerLegacyPass, "insert-gcov-profiling",
+ "Insert instrumentation for GCOV profiling", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+ GCOVProfilerLegacyPass, "insert-gcov-profiling",
+ "Insert instrumentation for GCOV profiling", false, false)
+
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfilerLegacyPass(Options);
+}
+
+static StringRef getFunctionName(const DISubprogram *SP) {
+ if (!SP->getLinkageName().empty())
+ return SP->getLinkageName();
+ return SP->getName();
+}
+
+/// Extract a filename for a DISubprogram.
+///
+/// Prefer relative paths in the coverage notes. Clang also may split
+/// up absolute paths into a directory and filename component. When
+/// the relative path doesn't exist, reconstruct the absolute path.
+static SmallString<128> getFilename(const DISubprogram *SP) {
+ SmallString<128> Path;
+ StringRef RelPath = SP->getFilename();
+ if (sys::fs::exists(RelPath))
+ Path = RelPath;
+ else
+ sys::path::append(Path, SP->getDirectory(), SP->getFilename());
+ return Path;
+}
+
+namespace {
+ class GCOVRecord {
+ protected:
+ static const char *const LinesTag;
+ static const char *const FunctionTag;
+ static const char *const BlockTag;
+ static const char *const EdgeTag;
+
+ GCOVRecord() = default;
+
+ void writeBytes(const char *Bytes, int Size) {
+ os->write(Bytes, Size);
+ }
+
+ void write(uint32_t i) {
+ writeBytes(reinterpret_cast<char*>(&i), 4);
+ }
+
+ // Returns the length measured in 4-byte blocks that will be used to
+ // represent this string in a GCOV file
+ static unsigned lengthOfGCOVString(StringRef s) {
+ // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
+ // padding out to the next 4-byte word. The length is measured in 4-byte
+ // words including padding, not bytes of actual string.
+ return (s.size() / 4) + 1;
+ }
+
+ void writeGCOVString(StringRef s) {
+ uint32_t Len = lengthOfGCOVString(s);
+ write(Len);
+ writeBytes(s.data(), s.size());
+
+ // Write 1 to 4 bytes of NUL padding.
+ assert((unsigned)(4 - (s.size() % 4)) > 0);
+ assert((unsigned)(4 - (s.size() % 4)) <= 4);
+ writeBytes("\0\0\0\0", 4 - (s.size() % 4));
+ }
+
+ raw_ostream *os;
+ };
+ const char *const GCOVRecord::LinesTag = "\0\0\x45\x01";
+ const char *const GCOVRecord::FunctionTag = "\0\0\0\1";
+ const char *const GCOVRecord::BlockTag = "\0\0\x41\x01";
+ const char *const GCOVRecord::EdgeTag = "\0\0\x43\x01";
+
+ class GCOVFunction;
+ class GCOVBlock;
+
+ // Constructed only by requesting it from a GCOVBlock, this object stores a
+ // list of line numbers and a single filename, representing lines that belong
+ // to the block.
+ class GCOVLines : public GCOVRecord {
+ public:
+ void addLine(uint32_t Line) {
+ assert(Line != 0 && "Line zero is not a valid real line number.");
+ Lines.push_back(Line);
+ }
+
+ uint32_t length() const {
+ // Here 2 = 1 for string length + 1 for '0' id#.
+ return lengthOfGCOVString(Filename) + 2 + Lines.size();
+ }
+
+ void writeOut() {
+ write(0);
+ writeGCOVString(Filename);
+ for (int i = 0, e = Lines.size(); i != e; ++i)
+ write(Lines[i]);
+ }
+
+ GCOVLines(StringRef F, raw_ostream *os)
+ : Filename(F) {
+ this->os = os;
+ }
+
+ private:
+ std::string Filename;
+ SmallVector<uint32_t, 32> Lines;
+ };
+
+
+ // Represent a basic block in GCOV. Each block has a unique number in the
+ // function, number of lines belonging to each block, and a set of edges to
+ // other blocks.
+ class GCOVBlock : public GCOVRecord {
+ public:
+ GCOVLines &getFile(StringRef Filename) {
+ return LinesByFile.try_emplace(Filename, Filename, os).first->second;
+ }
+
+ void addEdge(GCOVBlock &Successor) {
+ OutEdges.push_back(&Successor);
+ }
+
+ void writeOut() {
+ uint32_t Len = 3;
+ SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
+ for (auto &I : LinesByFile) {
+ Len += I.second.length();
+ SortedLinesByFile.push_back(&I);
+ }
+
+ writeBytes(LinesTag, 4);
+ write(Len);
+ write(Number);
+
+ llvm::sort(SortedLinesByFile, [](StringMapEntry<GCOVLines> *LHS,
+ StringMapEntry<GCOVLines> *RHS) {
+ return LHS->getKey() < RHS->getKey();
+ });
+ for (auto &I : SortedLinesByFile)
+ I->getValue().writeOut();
+ write(0);
+ write(0);
+ }
+
+ GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
+ // Only allow copy before edges and lines have been added. After that,
+ // there are inter-block pointers (eg: edges) that won't take kindly to
+ // blocks being copied or moved around.
+ assert(LinesByFile.empty());
+ assert(OutEdges.empty());
+ }
+
+ private:
+ friend class GCOVFunction;
+
+ GCOVBlock(uint32_t Number, raw_ostream *os)
+ : Number(Number) {
+ this->os = os;
+ }
+
+ uint32_t Number;
+ StringMap<GCOVLines> LinesByFile;
+ SmallVector<GCOVBlock *, 4> OutEdges;
+ };
+
+ // A function has a unique identifier, a checksum (we leave as zero) and a
+ // set of blocks and a map of edges between blocks. This is the only GCOV
+ // object users can construct, the blocks and lines will be rooted here.
+ class GCOVFunction : public GCOVRecord {
+ public:
+ GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os,
+ uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody)
+ : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
+ ReturnBlock(1, os) {
+ this->os = os;
+
+ LLVM_DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
+
+ uint32_t i = 0;
+ for (auto &BB : *F) {
+ // Skip index 1 if it's assigned to the ReturnBlock.
+ if (i == 1 && ExitBlockBeforeBody)
+ ++i;
+ Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os)));
+ }
+ if (!ExitBlockBeforeBody)
+ ReturnBlock.Number = i;
+
+ std::string FunctionNameAndLine;
+ raw_string_ostream FNLOS(FunctionNameAndLine);
+ FNLOS << getFunctionName(SP) << SP->getLine();
+ FNLOS.flush();
+ FuncChecksum = hash_value(FunctionNameAndLine);
+ }
+
+ GCOVBlock &getBlock(BasicBlock *BB) {
+ return Blocks.find(BB)->second;
+ }
+
+ GCOVBlock &getReturnBlock() {
+ return ReturnBlock;
+ }
+
+ std::string getEdgeDestinations() {
+ std::string EdgeDestinations;
+ raw_string_ostream EDOS(EdgeDestinations);
+ Function *F = Blocks.begin()->first->getParent();
+ for (BasicBlock &I : *F) {
+ GCOVBlock &Block = getBlock(&I);
+ for (int i = 0, e = Block.OutEdges.size(); i != e; ++i)
+ EDOS << Block.OutEdges[i]->Number;
+ }
+ return EdgeDestinations;
+ }
+
+ uint32_t getFuncChecksum() {
+ return FuncChecksum;
+ }
+
+ void setCfgChecksum(uint32_t Checksum) {
+ CfgChecksum = Checksum;
+ }
+
+ void writeOut() {
+ writeBytes(FunctionTag, 4);
+ SmallString<128> Filename = getFilename(SP);
+ uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
+ 1 + lengthOfGCOVString(Filename) + 1;
+ if (UseCfgChecksum)
+ ++BlockLen;
+ write(BlockLen);
+ write(Ident);
+ write(FuncChecksum);
+ if (UseCfgChecksum)
+ write(CfgChecksum);
+ writeGCOVString(getFunctionName(SP));
+ writeGCOVString(Filename);
+ write(SP->getLine());
+
+ // Emit count of blocks.
+ writeBytes(BlockTag, 4);
+ write(Blocks.size() + 1);
+ for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+ write(0); // No flags on our blocks.
+ }
+ LLVM_DEBUG(dbgs() << Blocks.size() << " blocks.\n");
+
+ // Emit edges between blocks.
+ if (Blocks.empty()) return;
+ Function *F = Blocks.begin()->first->getParent();
+ for (BasicBlock &I : *F) {
+ GCOVBlock &Block = getBlock(&I);
+ if (Block.OutEdges.empty()) continue;
+
+ writeBytes(EdgeTag, 4);
+ write(Block.OutEdges.size() * 2 + 1);
+ write(Block.Number);
+ for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+ LLVM_DEBUG(dbgs() << Block.Number << " -> "
+ << Block.OutEdges[i]->Number << "\n");
+ write(Block.OutEdges[i]->Number);
+ write(0); // no flags
+ }
+ }
+
+ // Emit lines for each block.
+ for (BasicBlock &I : *F)
+ getBlock(&I).writeOut();
+ }
+
+ private:
+ const DISubprogram *SP;
+ uint32_t Ident;
+ uint32_t FuncChecksum;
+ bool UseCfgChecksum;
+ uint32_t CfgChecksum;
+ DenseMap<BasicBlock *, GCOVBlock> Blocks;
+ GCOVBlock ReturnBlock;
+ };
+}
+
+// RegexesStr is a string containing differents regex separated by a semi-colon.
+// For example "foo\..*$;bar\..*$".
+std::vector<Regex> GCOVProfiler::createRegexesFromString(StringRef RegexesStr) {
+ std::vector<Regex> Regexes;
+ while (!RegexesStr.empty()) {
+ std::pair<StringRef, StringRef> HeadTail = RegexesStr.split(';');
+ if (!HeadTail.first.empty()) {
+ Regex Re(HeadTail.first);
+ std::string Err;
+ if (!Re.isValid(Err)) {
+ Ctx->emitError(Twine("Regex ") + HeadTail.first +
+ " is not valid: " + Err);
+ }
+ Regexes.emplace_back(std::move(Re));
+ }
+ RegexesStr = HeadTail.second;
+ }
+ return Regexes;
+}
+
+bool GCOVProfiler::doesFilenameMatchARegex(StringRef Filename,
+ std::vector<Regex> &Regexes) {
+ for (Regex &Re : Regexes) {
+ if (Re.match(Filename)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
+ if (FilterRe.empty() && ExcludeRe.empty()) {
+ return true;
+ }
+ SmallString<128> Filename = getFilename(F.getSubprogram());
+ auto It = InstrumentedFiles.find(Filename);
+ if (It != InstrumentedFiles.end()) {
+ return It->second;
+ }
+
+ SmallString<256> RealPath;
+ StringRef RealFilename;
+
+ // Path can be
+ // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
+ // such a case we must get the real_path.
+ if (sys::fs::real_path(Filename, RealPath)) {
+ // real_path can fail with path like "foo.c".
+ RealFilename = Filename;
+ } else {
+ RealFilename = RealPath;
+ }
+
+ bool ShouldInstrument;
+ if (FilterRe.empty()) {
+ ShouldInstrument = !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+ } else if (ExcludeRe.empty()) {
+ ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe);
+ } else {
+ ShouldInstrument = doesFilenameMatchARegex(RealFilename, FilterRe) &&
+ !doesFilenameMatchARegex(RealFilename, ExcludeRe);
+ }
+ InstrumentedFiles[Filename] = ShouldInstrument;
+ return ShouldInstrument;
+}
+
+std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
+ GCovFileType OutputType) {
+ bool Notes = OutputType == GCovFileType::GCNO;
+
+ if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
+ for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
+ MDNode *N = GCov->getOperand(i);
+ bool ThreeElement = N->getNumOperands() == 3;
+ if (!ThreeElement && N->getNumOperands() != 2)
+ continue;
+ if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
+ continue;
+
+ if (ThreeElement) {
+ // These nodes have no mangling to apply, it's stored mangled in the
+ // bitcode.
+ MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
+ MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
+ if (!NotesFile || !DataFile)
+ continue;
+ return Notes ? NotesFile->getString() : DataFile->getString();
+ }
+
+ MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+ if (!GCovFile)
+ continue;
+
+ SmallString<128> Filename = GCovFile->getString();
+ sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
+ return Filename.str();
+ }
+ }
+
+ SmallString<128> Filename = CU->getFilename();
+ sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
+ StringRef FName = sys::path::filename(Filename);
+ SmallString<128> CurPath;
+ if (sys::fs::current_path(CurPath)) return FName;
+ sys::path::append(CurPath, FName);
+ return CurPath.str();
+}
+
+bool GCOVProfiler::runOnModule(
+ Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
+ this->M = &M;
+ this->GetTLI = std::move(GetTLI);
+ Ctx = &M.getContext();
+
+ AddFlushBeforeForkAndExec();
+
+ FilterRe = createRegexesFromString(Options.Filter);
+ ExcludeRe = createRegexesFromString(Options.Exclude);
+
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
+ return false;
+}
+
+PreservedAnalyses GCOVProfilerPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+
+ GCOVProfiler Profiler(GCOVOpts);
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ if (!Profiler.runOnModule(M, [&](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ }))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+static bool functionHasLines(Function &F) {
+ // Check whether this function actually has any source lines. Not only
+ // do these waste space, they also can crash gcov.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ // Debug intrinsic locations correspond to the location of the
+ // declaration, not necessarily any statements or expressions.
+ if (isa<DbgInfoIntrinsic>(&I)) continue;
+
+ const DebugLoc &Loc = I.getDebugLoc();
+ if (!Loc)
+ continue;
+
+ // Artificial lines such as calls to the global constructors.
+ if (Loc.getLine() == 0) continue;
+
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool isUsingScopeBasedEH(Function &F) {
+ if (!F.hasPersonalityFn()) return false;
+
+ EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
+ return isScopedEHPersonality(Personality);
+}
+
+static bool shouldKeepInEntry(BasicBlock::iterator It) {
+ if (isa<AllocaInst>(*It)) return true;
+ if (isa<DbgInfoIntrinsic>(*It)) return true;
+ if (auto *II = dyn_cast<IntrinsicInst>(It)) {
+ if (II->getIntrinsicID() == llvm::Intrinsic::localescape) return true;
+ }
+
+ return false;
+}
+
+void GCOVProfiler::AddFlushBeforeForkAndExec() {
+ SmallVector<Instruction *, 2> ForkAndExecs;
+ for (auto &F : M->functions()) {
+ auto *TLI = &GetTLI(F);
+ for (auto &I : instructions(F)) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (Function *Callee = CI->getCalledFunction()) {
+ LibFunc LF;
+ if (TLI->getLibFunc(*Callee, LF) &&
+ (LF == LibFunc_fork || LF == LibFunc_execl ||
+ LF == LibFunc_execle || LF == LibFunc_execlp ||
+ LF == LibFunc_execv || LF == LibFunc_execvp ||
+ LF == LibFunc_execve || LF == LibFunc_execvpe ||
+ LF == LibFunc_execvP)) {
+ ForkAndExecs.push_back(&I);
+ }
+ }
+ }
+ }
+ }
+
+ // We need to split the block after the fork/exec call
+ // because else the counters for the lines after will be
+ // the same as before the call.
+ for (auto I : ForkAndExecs) {
+ IRBuilder<> Builder(I);
+ FunctionType *FTy = FunctionType::get(Builder.getVoidTy(), {}, false);
+ FunctionCallee GCOVFlush = M->getOrInsertFunction("__gcov_flush", FTy);
+ Builder.CreateCall(GCOVFlush);
+ I->getParent()->splitBasicBlock(I);
+ }
+}
+
+void GCOVProfiler::emitProfileNotes() {
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes) return;
+
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ // Each compile unit gets its own .gcno file. This means that whether we run
+ // this pass over the original .o's as they're produced, or run it after
+ // LTO, we'll generate the same .gcno files.
+
+ auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
+
+ // Skip module skeleton (and module) CUs.
+ if (CU->getDWOId())
+ continue;
+
+ std::error_code EC;
+ raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC,
+ sys::fs::OF_None);
+ if (EC) {
+ Ctx->emitError(Twine("failed to open coverage notes file for writing: ") +
+ EC.message());
+ continue;
+ }
+
+ std::string EdgeDestinations;
+
+ unsigned FunctionIdent = 0;
+ for (auto &F : M->functions()) {
+ DISubprogram *SP = F.getSubprogram();
+ if (!SP) continue;
+ if (!functionHasLines(F) || !isFunctionInstrumented(F))
+ continue;
+ // TODO: Functions using scope-based EH are currently not supported.
+ if (isUsingScopeBasedEH(F)) continue;
+
+ // gcov expects every function to start with an entry block that has a
+ // single successor, so split the entry block to make sure of that.
+ BasicBlock &EntryBlock = F.getEntryBlock();
+ BasicBlock::iterator It = EntryBlock.begin();
+ while (shouldKeepInEntry(It))
+ ++It;
+ EntryBlock.splitBasicBlock(It);
+
+ Funcs.push_back(std::make_unique<GCOVFunction>(SP, &F, &out, FunctionIdent++,
+ Options.UseCfgChecksum,
+ Options.ExitBlockBeforeBody));
+ GCOVFunction &Func = *Funcs.back();
+
+ // Add the function line number to the lines of the entry block
+ // to have a counter for the function definition.
+ uint32_t Line = SP->getLine();
+ auto Filename = getFilename(SP);
+ Func.getBlock(&EntryBlock).getFile(Filename).addLine(Line);
+
+ for (auto &BB : F) {
+ GCOVBlock &Block = Func.getBlock(&BB);
+ Instruction *TI = BB.getTerminator();
+ if (int successors = TI->getNumSuccessors()) {
+ for (int i = 0; i != successors; ++i) {
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
+ }
+ } else if (isa<ReturnInst>(TI)) {
+ Block.addEdge(Func.getReturnBlock());
+ }
+
+ for (auto &I : BB) {
+ // Debug intrinsic locations correspond to the location of the
+ // declaration, not necessarily any statements or expressions.
+ if (isa<DbgInfoIntrinsic>(&I)) continue;
+
+ const DebugLoc &Loc = I.getDebugLoc();
+ if (!Loc)
+ continue;
+
+ // Artificial lines such as calls to the global constructors.
+ if (Loc.getLine() == 0 || Loc.isImplicitCode())
+ continue;
+
+ if (Line == Loc.getLine()) continue;
+ Line = Loc.getLine();
+ if (SP != getDISubprogram(Loc.getScope()))
+ continue;
+
+ GCOVLines &Lines = Block.getFile(Filename);
+ Lines.addLine(Loc.getLine());
+ }
+ Line = 0;
+ }
+ EdgeDestinations += Func.getEdgeDestinations();
+ }
+
+ FileChecksums.push_back(hash_value(EdgeDestinations));
+ out.write("oncg", 4);
+ out.write(ReversedVersion, 4);
+ out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4);
+
+ for (auto &Func : Funcs) {
+ Func->setCfgChecksum(FileChecksums.back());
+ Func->writeOut();
+ }
+
+ out.write("\0\0\0\0\0\0\0\0", 8); // EOF
+ out.close();
+ }
+}
+
+bool GCOVProfiler::emitProfileArcs() {
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes) return false;
+
+ bool Result = false;
+ for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+ SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+ for (auto &F : M->functions()) {
+ DISubprogram *SP = F.getSubprogram();
+ if (!SP) continue;
+ if (!functionHasLines(F) || !isFunctionInstrumented(F))
+ continue;
+ // TODO: Functions using scope-based EH are currently not supported.
+ if (isUsingScopeBasedEH(F)) continue;
+ if (!Result) Result = true;
+
+ DenseMap<std::pair<BasicBlock *, BasicBlock *>, unsigned> EdgeToCounter;
+ unsigned Edges = 0;
+ for (auto &BB : F) {
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI)) {
+ EdgeToCounter[{&BB, nullptr}] = Edges++;
+ } else {
+ for (BasicBlock *Succ : successors(TI)) {
+ EdgeToCounter[{&BB, Succ}] = Edges++;
+ }
+ }
+ }
+
+ ArrayType *CounterTy =
+ ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+ GlobalVariable *Counters =
+ new GlobalVariable(*M, CounterTy, false,
+ GlobalValue::InternalLinkage,
+ Constant::getNullValue(CounterTy),
+ "__llvm_gcov_ctr");
+ CountersBySP.push_back(std::make_pair(Counters, SP));
+
+ // If a BB has several predecessors, use a PHINode to select
+ // the correct counter.
+ for (auto &BB : F) {
+ const unsigned EdgeCount =
+ std::distance(pred_begin(&BB), pred_end(&BB));
+ if (EdgeCount) {
+ // The phi node must be at the begin of the BB.
+ IRBuilder<> BuilderForPhi(&*BB.begin());
+ Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+ PHINode *Phi = BuilderForPhi.CreatePHI(Int64PtrTy, EdgeCount);
+ for (BasicBlock *Pred : predecessors(&BB)) {
+ auto It = EdgeToCounter.find({Pred, &BB});
+ assert(It != EdgeToCounter.end());
+ const unsigned Edge = It->second;
+ Value *EdgeCounter = BuilderForPhi.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, Edge);
+ Phi->addIncoming(EdgeCounter, Pred);
+ }
+
+ // Skip phis, landingpads.
+ IRBuilder<> Builder(&*BB.getFirstInsertionPt());
+ Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Phi);
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Phi);
+
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI)) {
+ auto It = EdgeToCounter.find({&BB, nullptr});
+ assert(It != EdgeToCounter.end());
+ const unsigned Edge = It->second;
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(
+ Counters->getValueType(), Counters, 0, Edge);
+ Value *Count = Builder.CreateLoad(Builder.getInt64Ty(), Counter);
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+ Builder.CreateStore(Count, Counter);
+ }
+ }
+ }
+ }
+
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Type *Params[] = {
+ PointerType::get(FTy, 0),
+ PointerType::get(FTy, 0)
+ };
+ FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+
+ // Initialize the environment and register the local writeout and flush
+ // functions.
+ FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
+ Builder.CreateCall(GCOVInit, {WriteoutF, FlushF});
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
+ }
+
+ return Result;
+}
+
+FunctionCallee GCOVProfiler::getStartFileFunc(const TargetLibraryInfo *TLI) {
+ Type *Args[] = {
+ Type::getInt8PtrTy(*Ctx), // const char *orig_filename
+ Type::getInt8PtrTy(*Ctx), // const char version[4]
+ Type::getInt32Ty(*Ctx), // uint32_t checksum
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(*Ctx, 2, AK);
+ FunctionCallee Res = M->getOrInsertFunction("llvm_gcda_start_file", FTy, AL);
+ return Res;
+}
+
+FunctionCallee GCOVProfiler::getEmitFunctionFunc(const TargetLibraryInfo *TLI) {
+ Type *Args[] = {
+ Type::getInt32Ty(*Ctx), // uint32_t ident
+ Type::getInt8PtrTy(*Ctx), // const char *function_name
+ Type::getInt32Ty(*Ctx), // uint32_t func_checksum
+ Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum
+ Type::getInt32Ty(*Ctx), // uint32_t cfg_checksum
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false)) {
+ AL = AL.addParamAttribute(*Ctx, 0, AK);
+ AL = AL.addParamAttribute(*Ctx, 2, AK);
+ AL = AL.addParamAttribute(*Ctx, 3, AK);
+ AL = AL.addParamAttribute(*Ctx, 4, AK);
+ }
+ return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+}
+
+FunctionCallee GCOVProfiler::getEmitArcsFunc(const TargetLibraryInfo *TLI) {
+ Type *Args[] = {
+ Type::getInt32Ty(*Ctx), // uint32_t num_counters
+ Type::getInt64PtrTy(*Ctx), // uint64_t *counters
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+ AttributeList AL;
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(*Ctx, 0, AK);
+ return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy, AL);
+}
+
+FunctionCallee GCOVProfiler::getSummaryInfoFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
+}
+
+FunctionCallee GCOVProfiler::getEndFileFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
+}
+
+Function *GCOVProfiler::insertCounterWriteout(
+ ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
+ FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ if (!WriteoutF)
+ WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_writeout", M);
+ WriteoutF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ WriteoutF->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ WriteoutF->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
+ IRBuilder<> Builder(BB);
+
+ auto *TLI = &GetTLI(*WriteoutF);
+
+ FunctionCallee StartFile = getStartFileFunc(TLI);
+ FunctionCallee EmitFunction = getEmitFunctionFunc(TLI);
+ FunctionCallee EmitArcs = getEmitArcsFunc(TLI);
+ FunctionCallee SummaryInfo = getSummaryInfoFunc();
+ FunctionCallee EndFile = getEndFileFunc();
+
+ NamedMDNode *CUNodes = M->getNamedMetadata("llvm.dbg.cu");
+ if (!CUNodes) {
+ Builder.CreateRetVoid();
+ return WriteoutF;
+ }
+
+ // Collect the relevant data into a large constant data structure that we can
+ // walk to write out everything.
+ StructType *StartFileCallArgsTy = StructType::create(
+ {Builder.getInt8PtrTy(), Builder.getInt8PtrTy(), Builder.getInt32Ty()});
+ StructType *EmitFunctionCallArgsTy = StructType::create(
+ {Builder.getInt32Ty(), Builder.getInt8PtrTy(), Builder.getInt32Ty(),
+ Builder.getInt8Ty(), Builder.getInt32Ty()});
+ StructType *EmitArcsCallArgsTy = StructType::create(
+ {Builder.getInt32Ty(), Builder.getInt64Ty()->getPointerTo()});
+ StructType *FileInfoTy =
+ StructType::create({StartFileCallArgsTy, Builder.getInt32Ty(),
+ EmitFunctionCallArgsTy->getPointerTo(),
+ EmitArcsCallArgsTy->getPointerTo()});
+
+ Constant *Zero32 = Builder.getInt32(0);
+ // Build an explicit array of two zeros for use in ConstantExpr GEP building.
+ Constant *TwoZero32s[] = {Zero32, Zero32};
+
+ SmallVector<Constant *, 8> FileInfos;
+ for (int i : llvm::seq<int>(0, CUNodes->getNumOperands())) {
+ auto *CU = cast<DICompileUnit>(CUNodes->getOperand(i));
+
+ // Skip module skeleton (and module) CUs.
+ if (CU->getDWOId())
+ continue;
+
+ std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
+ uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
+ auto *StartFileCallArgs = ConstantStruct::get(
+ StartFileCallArgsTy, {Builder.CreateGlobalStringPtr(FilenameGcda),
+ Builder.CreateGlobalStringPtr(ReversedVersion),
+ Builder.getInt32(CfgChecksum)});
+
+ SmallVector<Constant *, 8> EmitFunctionCallArgsArray;
+ SmallVector<Constant *, 8> EmitArcsCallArgsArray;
+ for (int j : llvm::seq<int>(0, CountersBySP.size())) {
+ auto *SP = cast_or_null<DISubprogram>(CountersBySP[j].second);
+ uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
+ EmitFunctionCallArgsArray.push_back(ConstantStruct::get(
+ EmitFunctionCallArgsTy,
+ {Builder.getInt32(j),
+ Options.FunctionNamesInData
+ ? Builder.CreateGlobalStringPtr(getFunctionName(SP))
+ : Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt32(FuncChecksum),
+ Builder.getInt8(Options.UseCfgChecksum),
+ Builder.getInt32(CfgChecksum)}));
+
+ GlobalVariable *GV = CountersBySP[j].first;
+ unsigned Arcs = cast<ArrayType>(GV->getValueType())->getNumElements();
+ EmitArcsCallArgsArray.push_back(ConstantStruct::get(
+ EmitArcsCallArgsTy,
+ {Builder.getInt32(Arcs), ConstantExpr::getInBoundsGetElementPtr(
+ GV->getValueType(), GV, TwoZero32s)}));
+ }
+ // Create global arrays for the two emit calls.
+ int CountersSize = CountersBySP.size();
+ assert(CountersSize == (int)EmitFunctionCallArgsArray.size() &&
+ "Mismatched array size!");
+ assert(CountersSize == (int)EmitArcsCallArgsArray.size() &&
+ "Mismatched array size!");
+ auto *EmitFunctionCallArgsArrayTy =
+ ArrayType::get(EmitFunctionCallArgsTy, CountersSize);
+ auto *EmitFunctionCallArgsArrayGV = new GlobalVariable(
+ *M, EmitFunctionCallArgsArrayTy, /*isConstant*/ true,
+ GlobalValue::InternalLinkage,
+ ConstantArray::get(EmitFunctionCallArgsArrayTy,
+ EmitFunctionCallArgsArray),
+ Twine("__llvm_internal_gcov_emit_function_args.") + Twine(i));
+ auto *EmitArcsCallArgsArrayTy =
+ ArrayType::get(EmitArcsCallArgsTy, CountersSize);
+ EmitFunctionCallArgsArrayGV->setUnnamedAddr(
+ GlobalValue::UnnamedAddr::Global);
+ auto *EmitArcsCallArgsArrayGV = new GlobalVariable(
+ *M, EmitArcsCallArgsArrayTy, /*isConstant*/ true,
+ GlobalValue::InternalLinkage,
+ ConstantArray::get(EmitArcsCallArgsArrayTy, EmitArcsCallArgsArray),
+ Twine("__llvm_internal_gcov_emit_arcs_args.") + Twine(i));
+ EmitArcsCallArgsArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ FileInfos.push_back(ConstantStruct::get(
+ FileInfoTy,
+ {StartFileCallArgs, Builder.getInt32(CountersSize),
+ ConstantExpr::getInBoundsGetElementPtr(EmitFunctionCallArgsArrayTy,
+ EmitFunctionCallArgsArrayGV,
+ TwoZero32s),
+ ConstantExpr::getInBoundsGetElementPtr(
+ EmitArcsCallArgsArrayTy, EmitArcsCallArgsArrayGV, TwoZero32s)}));
+ }
+
+ // If we didn't find anything to actually emit, bail on out.
+ if (FileInfos.empty()) {
+ Builder.CreateRetVoid();
+ return WriteoutF;
+ }
+
+ // To simplify code, we cap the number of file infos we write out to fit
+ // easily in a 32-bit signed integer. This gives consistent behavior between
+ // 32-bit and 64-bit systems without requiring (potentially very slow) 64-bit
+ // operations on 32-bit systems. It also seems unreasonable to try to handle
+ // more than 2 billion files.
+ if ((int64_t)FileInfos.size() > (int64_t)INT_MAX)
+ FileInfos.resize(INT_MAX);
+
+ // Create a global for the entire data structure so we can walk it more
+ // easily.
+ auto *FileInfoArrayTy = ArrayType::get(FileInfoTy, FileInfos.size());
+ auto *FileInfoArrayGV = new GlobalVariable(
+ *M, FileInfoArrayTy, /*isConstant*/ true, GlobalValue::InternalLinkage,
+ ConstantArray::get(FileInfoArrayTy, FileInfos),
+ "__llvm_internal_gcov_emit_file_info");
+ FileInfoArrayGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ // Create the CFG for walking this data structure.
+ auto *FileLoopHeader =
+ BasicBlock::Create(*Ctx, "file.loop.header", WriteoutF);
+ auto *CounterLoopHeader =
+ BasicBlock::Create(*Ctx, "counter.loop.header", WriteoutF);
+ auto *FileLoopLatch = BasicBlock::Create(*Ctx, "file.loop.latch", WriteoutF);
+ auto *ExitBB = BasicBlock::Create(*Ctx, "exit", WriteoutF);
+
+ // We always have at least one file, so just branch to the header.
+ Builder.CreateBr(FileLoopHeader);
+
+ // The index into the files structure is our loop induction variable.
+ Builder.SetInsertPoint(FileLoopHeader);
+ PHINode *IV =
+ Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
+ IV->addIncoming(Builder.getInt32(0), BB);
+ auto *FileInfoPtr = Builder.CreateInBoundsGEP(
+ FileInfoArrayTy, FileInfoArrayGV, {Builder.getInt32(0), IV});
+ auto *StartFileCallArgsPtr =
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 0);
+ auto *StartFileCall = Builder.CreateCall(
+ StartFile,
+ {Builder.CreateLoad(StartFileCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 0)),
+ Builder.CreateLoad(StartFileCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 1)),
+ Builder.CreateLoad(StartFileCallArgsTy->getElementType(2),
+ Builder.CreateStructGEP(StartFileCallArgsTy,
+ StartFileCallArgsPtr, 2))});
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ StartFileCall->addParamAttr(2, AK);
+ auto *NumCounters =
+ Builder.CreateLoad(FileInfoTy->getElementType(1),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 1));
+ auto *EmitFunctionCallArgsArray =
+ Builder.CreateLoad(FileInfoTy->getElementType(2),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 2));
+ auto *EmitArcsCallArgsArray =
+ Builder.CreateLoad(FileInfoTy->getElementType(3),
+ Builder.CreateStructGEP(FileInfoTy, FileInfoPtr, 3));
+ auto *EnterCounterLoopCond =
+ Builder.CreateICmpSLT(Builder.getInt32(0), NumCounters);
+ Builder.CreateCondBr(EnterCounterLoopCond, CounterLoopHeader, FileLoopLatch);
+
+ Builder.SetInsertPoint(CounterLoopHeader);
+ auto *JV = Builder.CreatePHI(Builder.getInt32Ty(), /*NumReservedValues*/ 2);
+ JV->addIncoming(Builder.getInt32(0), FileLoopHeader);
+ auto *EmitFunctionCallArgsPtr = Builder.CreateInBoundsGEP(
+ EmitFunctionCallArgsTy, EmitFunctionCallArgsArray, JV);
+ auto *EmitFunctionCall = Builder.CreateCall(
+ EmitFunction,
+ {Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 0)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 1)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(2),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 2)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(3),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr, 3)),
+ Builder.CreateLoad(EmitFunctionCallArgsTy->getElementType(4),
+ Builder.CreateStructGEP(EmitFunctionCallArgsTy,
+ EmitFunctionCallArgsPtr,
+ 4))});
+ if (auto AK = TLI->getExtAttrForI32Param(false)) {
+ EmitFunctionCall->addParamAttr(0, AK);
+ EmitFunctionCall->addParamAttr(2, AK);
+ EmitFunctionCall->addParamAttr(3, AK);
+ EmitFunctionCall->addParamAttr(4, AK);
+ }
+ auto *EmitArcsCallArgsPtr =
+ Builder.CreateInBoundsGEP(EmitArcsCallArgsTy, EmitArcsCallArgsArray, JV);
+ auto *EmitArcsCall = Builder.CreateCall(
+ EmitArcs,
+ {Builder.CreateLoad(
+ EmitArcsCallArgsTy->getElementType(0),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy, EmitArcsCallArgsPtr, 0)),
+ Builder.CreateLoad(EmitArcsCallArgsTy->getElementType(1),
+ Builder.CreateStructGEP(EmitArcsCallArgsTy,
+ EmitArcsCallArgsPtr, 1))});
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ EmitArcsCall->addParamAttr(0, AK);
+ auto *NextJV = Builder.CreateAdd(JV, Builder.getInt32(1));
+ auto *CounterLoopCond = Builder.CreateICmpSLT(NextJV, NumCounters);
+ Builder.CreateCondBr(CounterLoopCond, CounterLoopHeader, FileLoopLatch);
+ JV->addIncoming(NextJV, CounterLoopHeader);
+
+ Builder.SetInsertPoint(FileLoopLatch);
+ Builder.CreateCall(SummaryInfo, {});
+ Builder.CreateCall(EndFile, {});
+ auto *NextIV = Builder.CreateAdd(IV, Builder.getInt32(1));
+ auto *FileLoopCond =
+ Builder.CreateICmpSLT(NextIV, Builder.getInt32(FileInfos.size()));
+ Builder.CreateCondBr(FileLoopCond, FileLoopHeader, ExitBB);
+ IV->addIncoming(NextIV, FileLoopLatch);
+
+ Builder.SetInsertPoint(ExitBB);
+ Builder.CreateRetVoid();
+
+ return WriteoutF;
+}
+
+Function *GCOVProfiler::
+insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
+ if (!FlushF)
+ FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_flush", M);
+ else
+ FlushF->setLinkage(GlobalValue::InternalLinkage);
+ FlushF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ FlushF->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ FlushF->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
+
+ // Write out the current counters.
+ Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+ assert(WriteoutF && "Need to create the writeout function first!");
+
+ IRBuilder<> Builder(Entry);
+ Builder.CreateCall(WriteoutF, {});
+
+ // Zero out the counters.
+ for (const auto &I : CountersBySP) {
+ GlobalVariable *GV = I.first;
+ Constant *Null = Constant::getNullValue(GV->getValueType());
+ Builder.CreateStore(Null, GV);
+ }
+
+ Type *RetTy = FlushF->getReturnType();
+ if (RetTy == Type::getVoidTy(*Ctx))
+ Builder.CreateRetVoid();
+ else if (RetTy->isIntegerTy())
+ // Used if __llvm_gcov_flush was implicitly declared.
+ Builder.CreateRet(ConstantInt::get(RetTy, 0));
+ else
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
new file mode 100644
index 000000000000..f87132ee4758
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -0,0 +1,1521 @@
+//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of HWAddressSanitizer, an address sanity checker
+/// based on tagged addressing.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hwasan"
+
+static const char *const kHwasanModuleCtorName = "hwasan.module_ctor";
+static const char *const kHwasanNoteName = "hwasan.note";
+static const char *const kHwasanInitName = "__hwasan_init";
+static const char *const kHwasanPersonalityThunkName =
+ "__hwasan_personality_thunk";
+
+static const char *const kHwasanShadowMemoryDynamicAddress =
+ "__hwasan_shadow_memory_dynamic_address";
+
+// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+static const size_t kNumberOfAccessSizes = 5;
+
+static const size_t kDefaultShadowScale = 4;
+static const uint64_t kDynamicShadowSentinel =
+ std::numeric_limits<uint64_t>::max();
+static const unsigned kPointerTagShift = 56;
+
+static const unsigned kShadowBaseAlignment = 32;
+
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+ "hwasan-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+ cl::init("__hwasan_"));
+
+static cl::opt<bool>
+ ClInstrumentWithCalls("hwasan-instrument-with-calls",
+ cl::desc("instrument reads and writes with callbacks"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
+ cl::desc("instrument read instructions"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentWrites(
+ "hwasan-instrument-writes", cl::desc("instrument write instructions"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClInstrumentAtomics(
+ "hwasan-instrument-atomics",
+ cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClRecover(
+ "hwasan-recover",
+ cl::desc("Enable recovery mode (continue-after-error)."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
+ cl::desc("instrument stack (allocas)"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClUARRetagToZero(
+ "hwasan-uar-retag-to-zero",
+ cl::desc("Clear alloca tags before returning from the function to allow "
+ "non-instrumented and instrumented function calls mix. When set "
+ "to false, allocas are retagged before returning from the "
+ "function to detect use after return."),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClGenerateTagsWithCalls(
+ "hwasan-generate-tags-with-calls",
+ cl::desc("generate new tags with runtime library calls"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<int> ClMatchAllTag(
+ "hwasan-match-all-tag",
+ cl::desc("don't report bad accesses via pointers with this tag"),
+ cl::Hidden, cl::init(-1));
+
+static cl::opt<bool> ClEnableKhwasan(
+ "hwasan-kernel",
+ cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
+ cl::Hidden, cl::init(false));
+
+// These flags allow to change the shadow mapping and control how shadow memory
+// is accessed. The shadow mapping looks like:
+// Shadow = (Mem >> scale) + offset
+
+static cl::opt<uint64_t>
+ ClMappingOffset("hwasan-mapping-offset",
+ cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<bool>
+ ClWithIfunc("hwasan-with-ifunc",
+ cl::desc("Access dynamic shadow through an ifunc global on "
+ "platforms that support this"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClWithTls(
+ "hwasan-with-tls",
+ cl::desc("Access dynamic shadow through an thread-local pointer on "
+ "platforms that support this"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClRecordStackHistory("hwasan-record-stack-history",
+ cl::desc("Record stack frames with tagged allocations "
+ "in a thread-local ring buffer"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool>
+ ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
+ cl::desc("instrument memory intrinsics"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool>
+ ClInstrumentLandingPads("hwasan-instrument-landing-pads",
+ cl::desc("instrument landing pads"), cl::Hidden,
+ cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<bool> ClUseShortGranules(
+ "hwasan-use-short-granules",
+ cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
+ cl::init(false), cl::ZeroOrMore);
+
+static cl::opt<bool> ClInstrumentPersonalityFunctions(
+ "hwasan-instrument-personality-functions",
+ cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
+ cl::ZeroOrMore);
+
+static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
+ cl::desc("inline all checks"),
+ cl::Hidden, cl::init(false));
+
+namespace {
+
+/// An instrumentation pass implementing detection of addressability bugs
+/// using tagged pointers.
+class HWAddressSanitizer {
+public:
+ explicit HWAddressSanitizer(Module &M, bool CompileKernel = false,
+ bool Recover = false) : M(M) {
+ this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
+ this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0 ?
+ ClEnableKhwasan : CompileKernel;
+
+ initializeModule();
+ }
+
+ bool sanitizeFunction(Function &F);
+ void initializeModule();
+
+ void initializeCallbacks(Module &M);
+
+ Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
+ Value *getDynamicShadowNonTls(IRBuilder<> &IRB);
+
+ void untagPointerOperand(Instruction *I, Value *Addr);
+ Value *shadowBase();
+ Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+ void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
+ unsigned AccessSizeIndex,
+ Instruction *InsertBefore);
+ void instrumentMemIntrinsic(MemIntrinsic *MI);
+ bool instrumentMemAccess(Instruction *I);
+ Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+ uint64_t *TypeSize, unsigned *Alignment,
+ Value **MaybeMask);
+
+ bool isInterestingAlloca(const AllocaInst &AI);
+ bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
+ Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
+ Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
+ bool instrumentStack(
+ SmallVectorImpl<AllocaInst *> &Allocas,
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ Value *readRegister(IRBuilder<> &IRB, StringRef Name);
+ bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
+ Value *getNextTagWithCall(IRBuilder<> &IRB);
+ Value *getStackBaseTag(IRBuilder<> &IRB);
+ Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
+ unsigned AllocaNo);
+ Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
+
+ Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
+ void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
+
+ void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
+ void instrumentGlobals();
+
+ void instrumentPersonalityFunctions();
+
+private:
+ LLVMContext *C;
+ Module &M;
+ Triple TargetTriple;
+ FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
+ FunctionCallee HWAsanHandleVfork;
+
+ /// This struct defines the shadow mapping using the rule:
+ /// shadow = (mem >> Scale) + Offset.
+ /// If InGlobal is true, then
+ /// extern char __hwasan_shadow[];
+ /// shadow = (mem >> Scale) + &__hwasan_shadow
+ /// If InTls is true, then
+ /// extern char *__hwasan_tls;
+ /// shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
+ struct ShadowMapping {
+ int Scale;
+ uint64_t Offset;
+ bool InGlobal;
+ bool InTls;
+
+ void init(Triple &TargetTriple);
+ unsigned getObjectAlignment() const { return 1U << Scale; }
+ };
+ ShadowMapping Mapping;
+
+ Type *VoidTy = Type::getVoidTy(M.getContext());
+ Type *IntptrTy;
+ Type *Int8PtrTy;
+ Type *Int8Ty;
+ Type *Int32Ty;
+ Type *Int64Ty = Type::getInt64Ty(M.getContext());
+
+ bool CompileKernel;
+ bool Recover;
+ bool UseShortGranules;
+ bool InstrumentLandingPads;
+
+ Function *HwasanCtorFunction;
+
+ FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
+ FunctionCallee HwasanMemoryAccessCallbackSized[2];
+
+ FunctionCallee HwasanTagMemoryFunc;
+ FunctionCallee HwasanGenerateTagFunc;
+ FunctionCallee HwasanThreadEnterFunc;
+
+ Constant *ShadowGlobal;
+
+ Value *LocalDynamicShadow = nullptr;
+ Value *StackBaseTag = nullptr;
+ GlobalValue *ThreadPtrGlobal = nullptr;
+};
+
+class HWAddressSanitizerLegacyPass : public FunctionPass {
+public:
+ // Pass identification, replacement for typeid.
+ static char ID;
+
+ explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
+ bool Recover = false)
+ : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover) {}
+
+ StringRef getPassName() const override { return "HWAddressSanitizer"; }
+
+ bool doInitialization(Module &M) override {
+ HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+ return true;
+ }
+
+ bool runOnFunction(Function &F) override {
+ return HWASan->sanitizeFunction(F);
+ }
+
+ bool doFinalization(Module &M) override {
+ HWASan.reset();
+ return false;
+ }
+
+private:
+ std::unique_ptr<HWAddressSanitizer> HWASan;
+ bool CompileKernel;
+ bool Recover;
+};
+
+} // end anonymous namespace
+
+char HWAddressSanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(
+ HWAddressSanitizerLegacyPass, "hwasan",
+ "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
+ false)
+INITIALIZE_PASS_END(
+ HWAddressSanitizerLegacyPass, "hwasan",
+ "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
+ false)
+
+FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
+ bool Recover) {
+ assert(!CompileKernel || Recover);
+ return new HWAddressSanitizerLegacyPass(CompileKernel, Recover);
+}
+
+HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
+ : CompileKernel(CompileKernel), Recover(Recover) {}
+
+PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ HWAddressSanitizer HWASan(M, CompileKernel, Recover);
+ bool Modified = false;
+ for (Function &F : M)
+ Modified |= HWASan.sanitizeFunction(F);
+ if (Modified)
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+/// Module-level initialization.
+///
+/// inserts a call to __hwasan_init to the module's constructor list.
+void HWAddressSanitizer::initializeModule() {
+ LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
+ auto &DL = M.getDataLayout();
+
+ TargetTriple = Triple(M.getTargetTriple());
+
+ Mapping.init(TargetTriple);
+
+ C = &(M.getContext());
+ IRBuilder<> IRB(*C);
+ IntptrTy = IRB.getIntPtrTy(DL);
+ Int8PtrTy = IRB.getInt8PtrTy();
+ Int8Ty = IRB.getInt8Ty();
+ Int32Ty = IRB.getInt32Ty();
+
+ HwasanCtorFunction = nullptr;
+
+ // Older versions of Android do not have the required runtime support for
+ // short granules, global or personality function instrumentation. On other
+ // platforms we currently require using the latest version of the runtime.
+ bool NewRuntime =
+ !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
+
+ UseShortGranules =
+ ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
+
+ // If we don't have personality function support, fall back to landing pads.
+ InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
+ ? ClInstrumentLandingPads
+ : !NewRuntime;
+
+ if (!CompileKernel) {
+ std::tie(HwasanCtorFunction, std::ignore) =
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kHwasanModuleCtorName, kHwasanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
+ Ctor->setComdat(CtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
+
+ bool InstrumentGlobals =
+ ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
+ if (InstrumentGlobals)
+ instrumentGlobals();
+
+ bool InstrumentPersonalityFunctions =
+ ClInstrumentPersonalityFunctions.getNumOccurrences()
+ ? ClInstrumentPersonalityFunctions
+ : NewRuntime;
+ if (InstrumentPersonalityFunctions)
+ instrumentPersonalityFunctions();
+ }
+
+ if (!TargetTriple.isAndroid()) {
+ Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
+ auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
+ GlobalValue::ExternalLinkage, nullptr,
+ "__hwasan_tls", nullptr,
+ GlobalVariable::InitialExecTLSModel);
+ appendToCompilerUsed(M, GV);
+ return GV;
+ });
+ ThreadPtrGlobal = cast<GlobalVariable>(C);
+ }
+}
+
+void HWAddressSanitizer::initializeCallbacks(Module &M) {
+ IRBuilder<> IRB(*C);
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ const std::string TypeStr = AccessIsWrite ? "store" : "load";
+ const std::string EndingStr = Recover ? "_noabort" : "";
+
+ HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
+
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
+ M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + TypeStr +
+ itostr(1ULL << AccessSizeIndex) + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
+ }
+ }
+
+ HwasanTagMemoryFunc = M.getOrInsertFunction(
+ "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
+ HwasanGenerateTagFunc =
+ M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
+
+ ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
+ ArrayType::get(IRB.getInt8Ty(), 0));
+
+ const std::string MemIntrinCallbackPrefix =
+ CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
+ HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt32Ty(), IntptrTy);
+
+ HWAsanHandleVfork =
+ M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
+
+ HwasanThreadEnterFunc =
+ M.getOrInsertFunction("__hwasan_thread_enter", IRB.getVoidTy());
+}
+
+Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
+ // An empty inline asm with input reg == output reg.
+ // An opaque no-op cast, basically.
+ InlineAsm *Asm = InlineAsm::get(
+ FunctionType::get(Int8PtrTy, {ShadowGlobal->getType()}, false),
+ StringRef(""), StringRef("=r,0"),
+ /*hasSideEffects=*/false);
+ return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow");
+}
+
+Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) {
+ // Generate code only when dynamic addressing is needed.
+ if (Mapping.Offset != kDynamicShadowSentinel)
+ return nullptr;
+
+ if (Mapping.InGlobal) {
+ return getDynamicShadowIfunc(IRB);
+ } else {
+ Value *GlobalDynamicAddress =
+ IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
+ kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
+ return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
+ }
+}
+
+Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I,
+ bool *IsWrite,
+ uint64_t *TypeSize,
+ unsigned *Alignment,
+ Value **MaybeMask) {
+ // Skip memory accesses inserted by another instrumentation.
+ if (I->hasMetadata("nosanitize")) return nullptr;
+
+ // Do not instrument the load fetching the dynamic shadow address.
+ if (LocalDynamicShadow == I)
+ return nullptr;
+
+ Value *PtrOperand = nullptr;
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!ClInstrumentReads) return nullptr;
+ *IsWrite = false;
+ *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
+ *Alignment = LI->getAlignment();
+ PtrOperand = LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!ClInstrumentWrites) return nullptr;
+ *IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
+ *Alignment = SI->getAlignment();
+ PtrOperand = SI->getPointerOperand();
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ if (!ClInstrumentAtomics) return nullptr;
+ *IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
+ *Alignment = 0;
+ PtrOperand = RMW->getPointerOperand();
+ } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+ if (!ClInstrumentAtomics) return nullptr;
+ *IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
+ *Alignment = 0;
+ PtrOperand = XCHG->getPointerOperand();
+ }
+
+ if (PtrOperand) {
+ // Do not instrument accesses from different address spaces; we cannot deal
+ // with them.
+ Type *PtrTy = cast<PointerType>(PtrOperand->getType()->getScalarType());
+ if (PtrTy->getPointerAddressSpace() != 0)
+ return nullptr;
+
+ // Ignore swifterror addresses.
+ // swifterror memory addresses are mem2reg promoted by instruction
+ // selection. As such they cannot have regular uses like an instrumentation
+ // function and it makes no sense to track them as memory.
+ if (PtrOperand->isSwiftError())
+ return nullptr;
+ }
+
+ return PtrOperand;
+}
+
+static unsigned getPointerOperandIndex(Instruction *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerOperandIndex();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperandIndex();
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
+ return RMW->getPointerOperandIndex();
+ if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
+ return XCHG->getPointerOperandIndex();
+ report_fatal_error("Unexpected instruction");
+ return -1;
+}
+
+static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
+ size_t Res = countTrailingZeros(TypeSize / 8);
+ assert(Res < kNumberOfAccessSizes);
+ return Res;
+}
+
+void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
+ if (TargetTriple.isAArch64())
+ return;
+
+ IRBuilder<> IRB(I);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ Value *UntaggedPtr =
+ IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
+ I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
+}
+
+Value *HWAddressSanitizer::shadowBase() {
+ if (LocalDynamicShadow)
+ return LocalDynamicShadow;
+ return ConstantExpr::getIntToPtr(ConstantInt::get(IntptrTy, Mapping.Offset),
+ Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
+ // Mem >> Scale
+ Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
+ if (Mapping.Offset == 0)
+ return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
+ // (Mem >> Scale) + Offset
+ return IRB.CreateGEP(Int8Ty, shadowBase(), Shadow);
+}
+
+void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
+ unsigned AccessSizeIndex,
+ Instruction *InsertBefore) {
+ const int64_t AccessInfo = Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex;
+ IRBuilder<> IRB(InsertBefore);
+
+ if (!ClInlineAllChecks && TargetTriple.isAArch64() &&
+ TargetTriple.isOSBinFormatELF() && !Recover) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+ IRB.CreateCall(Intrinsic::getDeclaration(
+ M, UseShortGranules
+ ? Intrinsic::hwasan_check_memaccess_shortgranules
+ : Intrinsic::hwasan_check_memaccess),
+ {shadowBase(), Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+ return;
+ }
+
+ Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
+ Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift),
+ IRB.getInt8Ty());
+ Value *AddrLong = untagPointer(IRB, PtrLong);
+ Value *Shadow = memToShadow(AddrLong, IRB);
+ Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
+ Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
+
+ int matchAllTag = ClMatchAllTag.getNumOccurrences() > 0 ?
+ ClMatchAllTag : (CompileKernel ? 0xFF : -1);
+ if (matchAllTag != -1) {
+ Value *TagNotIgnored = IRB.CreateICmpNE(PtrTag,
+ ConstantInt::get(PtrTag->getType(), matchAllTag));
+ TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
+ }
+
+ Instruction *CheckTerm =
+ SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
+ MDBuilder(*C).createBranchWeights(1, 100000));
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *OutOfShortGranuleTagRange =
+ IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
+ Instruction *CheckFailTerm =
+ SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
+ MDBuilder(*C).createBranchWeights(1, 100000));
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
+ PtrLowBits = IRB.CreateAdd(
+ PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
+ Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
+ SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000),
+ nullptr, nullptr, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(CheckTerm);
+ Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
+ InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
+ Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
+ Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
+ SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
+ MDBuilder(*C).createBranchWeights(1, 100000),
+ nullptr, nullptr, CheckFailTerm->getParent());
+
+ IRB.SetInsertPoint(CheckFailTerm);
+ InlineAsm *Asm;
+ switch (TargetTriple.getArch()) {
+ case Triple::x86_64:
+ // The signal handler will find the data address in rdi.
+ Asm = InlineAsm::get(
+ FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
+ "int3\nnopl " + itostr(0x40 + AccessInfo) + "(%rax)",
+ "{rdi}",
+ /*hasSideEffects=*/true);
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ // The signal handler will find the data address in x0.
+ Asm = InlineAsm::get(
+ FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
+ "brk #" + itostr(0x900 + AccessInfo),
+ "{x0}",
+ /*hasSideEffects=*/true);
+ break;
+ default:
+ report_fatal_error("unsupported architecture");
+ }
+ IRB.CreateCall(Asm, PtrLong);
+ if (Recover)
+ cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
+}
+
+void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> IRB(MI);
+ if (isa<MemTransferInst>(MI)) {
+ IRB.CreateCall(
+ isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ } else if (isa<MemSetInst>(MI)) {
+ IRB.CreateCall(
+ HWAsanMemset,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ }
+ MI->eraseFromParent();
+}
+
+bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) {
+ LLVM_DEBUG(dbgs() << "Instrumenting: " << *I << "\n");
+ bool IsWrite = false;
+ unsigned Alignment = 0;
+ uint64_t TypeSize = 0;
+ Value *MaybeMask = nullptr;
+
+ if (ClInstrumentMemIntrinsics && isa<MemIntrinsic>(I)) {
+ instrumentMemIntrinsic(cast<MemIntrinsic>(I));
+ return true;
+ }
+
+ Value *Addr =
+ isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
+
+ if (!Addr)
+ return false;
+
+ if (MaybeMask)
+ return false; //FIXME
+
+ IRBuilder<> IRB(I);
+ if (isPowerOf2_64(TypeSize) &&
+ (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) &&
+ (Alignment >= (1UL << Mapping.Scale) || Alignment == 0 ||
+ Alignment >= TypeSize / 8)) {
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+ if (ClInstrumentWithCalls) {
+ IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex],
+ IRB.CreatePointerCast(Addr, IntptrTy));
+ } else {
+ instrumentMemAccessInline(Addr, IsWrite, AccessSizeIndex, I);
+ }
+ } else {
+ IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite],
+ {IRB.CreatePointerCast(Addr, IntptrTy),
+ ConstantInt::get(IntptrTy, TypeSize / 8)});
+ }
+ untagPointerOperand(I, Addr);
+
+ return true;
+}
+
+static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
+ uint64_t ArraySize = 1;
+ if (AI.isArrayAllocation()) {
+ const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
+ assert(CI && "non-constant array size");
+ ArraySize = CI->getZExtValue();
+ }
+ Type *Ty = AI.getAllocatedType();
+ uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
+ return SizeInBytes * ArraySize;
+}
+
+bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI,
+ Value *Tag, size_t Size) {
+ size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ if (!UseShortGranules)
+ Size = AlignedSize;
+
+ Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
+ if (ClInstrumentWithCalls) {
+ IRB.CreateCall(HwasanTagMemoryFunc,
+ {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
+ ConstantInt::get(IntptrTy, AlignedSize)});
+ } else {
+ size_t ShadowSize = Size >> Mapping.Scale;
+ Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
+ // If this memset is not inlined, it will be intercepted in the hwasan
+ // runtime library. That's OK, because the interceptor skips the checks if
+ // the address is in the shadow region.
+ // FIXME: the interceptor is not as fast as real memset. Consider lowering
+ // llvm.memset right here into either a sequence of stores, or a call to
+ // hwasan_tag_memory.
+ if (ShadowSize)
+ IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1);
+ if (Size != AlignedSize) {
+ IRB.CreateStore(
+ ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
+ IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
+ IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
+ Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
+ AlignedSize - 1));
+ }
+ }
+ return true;
+}
+
+static unsigned RetagMask(unsigned AllocaNo) {
+ // A list of 8-bit numbers that have at most one run of non-zero bits.
+ // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
+ // masks.
+ // The list does not include the value 255, which is used for UAR.
+ //
+ // Because we are more likely to use earlier elements of this list than later
+ // ones, it is sorted in increasing order of probability of collision with a
+ // mask allocated (temporally) nearby. The program that generated this list
+ // can be found at:
+ // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
+ static unsigned FastMasks[] = {0, 128, 64, 192, 32, 96, 224, 112, 240,
+ 48, 16, 120, 248, 56, 24, 8, 124, 252,
+ 60, 28, 12, 4, 126, 254, 62, 30, 14,
+ 6, 2, 127, 63, 31, 15, 7, 3, 1};
+ return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
+}
+
+Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
+ return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
+}
+
+Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
+ if (ClGenerateTagsWithCalls)
+ return getNextTagWithCall(IRB);
+ if (StackBaseTag)
+ return StackBaseTag;
+ // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
+ // first).
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ auto GetStackPointerFn = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ Value *StackPointer = IRB.CreateCall(
+ GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
+
+ // Extract some entropy from the stack pointer for the tags.
+ // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
+ // between functions).
+ Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
+ Value *StackTag =
+ IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20),
+ "hwasan.stack.base.tag");
+ return StackTag;
+}
+
+Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
+ AllocaInst *AI, unsigned AllocaNo) {
+ if (ClGenerateTagsWithCalls)
+ return getNextTagWithCall(IRB);
+ return IRB.CreateXor(StackTag,
+ ConstantInt::get(IntptrTy, RetagMask(AllocaNo)));
+}
+
+Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
+ if (ClUARRetagToZero)
+ return ConstantInt::get(IntptrTy, 0);
+ if (ClGenerateTagsWithCalls)
+ return getNextTagWithCall(IRB);
+ return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, 0xFFU));
+}
+
+// Add a tag to an address.
+Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
+ Value *PtrLong, Value *Tag) {
+ Value *TaggedPtrLong;
+ if (CompileKernel) {
+ // Kernel addresses have 0xFF in the most significant byte.
+ Value *ShiftedTag = IRB.CreateOr(
+ IRB.CreateShl(Tag, kPointerTagShift),
+ ConstantInt::get(IntptrTy, (1ULL << kPointerTagShift) - 1));
+ TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
+ } else {
+ // Userspace can simply do OR (tag << 56);
+ Value *ShiftedTag = IRB.CreateShl(Tag, kPointerTagShift);
+ TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
+ }
+ return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
+}
+
+// Remove tag from an address.
+Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
+ Value *UntaggedPtrLong;
+ if (CompileKernel) {
+ // Kernel addresses have 0xFF in the most significant byte.
+ UntaggedPtrLong = IRB.CreateOr(PtrLong,
+ ConstantInt::get(PtrLong->getType(), 0xFFULL << kPointerTagShift));
+ } else {
+ // Userspace addresses have 0x00.
+ UntaggedPtrLong = IRB.CreateAnd(PtrLong,
+ ConstantInt::get(PtrLong->getType(), ~(0xFFULL << kPointerTagShift)));
+ }
+ return UntaggedPtrLong;
+}
+
+Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
+ // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
+ // in Bionic's libc/private/bionic_tls.h.
+ Function *ThreadPointerFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
+ Value *SlotPtr = IRB.CreatePointerCast(
+ IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+ IRB.CreateCall(ThreadPointerFunc), 0x30),
+ Ty->getPointerTo(0));
+ return SlotPtr;
+ }
+ if (ThreadPtrGlobal)
+ return ThreadPtrGlobal;
+
+
+ return nullptr;
+}
+
+void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
+ if (!Mapping.InTls) {
+ LocalDynamicShadow = getDynamicShadowNonTls(IRB);
+ return;
+ }
+
+ if (!WithFrameRecord && TargetTriple.isAndroid()) {
+ LocalDynamicShadow = getDynamicShadowIfunc(IRB);
+ return;
+ }
+
+ Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
+ assert(SlotPtr);
+
+ Instruction *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
+
+ Function *F = IRB.GetInsertBlock()->getParent();
+ if (F->getFnAttribute("hwasan-abi").getValueAsString() == "interceptor") {
+ Value *ThreadLongEqZero =
+ IRB.CreateICmpEQ(ThreadLong, ConstantInt::get(IntptrTy, 0));
+ auto *Br = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ ThreadLongEqZero, cast<Instruction>(ThreadLongEqZero)->getNextNode(),
+ false, MDBuilder(*C).createBranchWeights(1, 100000)));
+
+ IRB.SetInsertPoint(Br);
+ // FIXME: This should call a new runtime function with a custom calling
+ // convention to avoid needing to spill all arguments here.
+ IRB.CreateCall(HwasanThreadEnterFunc);
+ LoadInst *ReloadThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
+
+ IRB.SetInsertPoint(&*Br->getSuccessor(0)->begin());
+ PHINode *ThreadLongPhi = IRB.CreatePHI(IntptrTy, 2);
+ ThreadLongPhi->addIncoming(ThreadLong, ThreadLong->getParent());
+ ThreadLongPhi->addIncoming(ReloadThreadLong, ReloadThreadLong->getParent());
+ ThreadLong = ThreadLongPhi;
+ }
+
+ // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
+ Value *ThreadLongMaybeUntagged =
+ TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
+
+ if (WithFrameRecord) {
+ StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
+
+ // Prepare ring buffer data.
+ Value *PC;
+ if (TargetTriple.getArch() == Triple::aarch64)
+ PC = readRegister(IRB, "pc");
+ else
+ PC = IRB.CreatePtrToInt(F, IntptrTy);
+ Module *M = F->getParent();
+ auto GetStackPointerFn = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ Value *SP = IRB.CreatePtrToInt(
+ IRB.CreateCall(GetStackPointerFn,
+ {Constant::getNullValue(IRB.getInt32Ty())}),
+ IntptrTy);
+ // Mix SP and PC.
+ // Assumptions:
+ // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
+ // SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
+ // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
+ // 0xSSSSPPPPPPPPPPPP
+ SP = IRB.CreateShl(SP, 44);
+
+ // Store data to ring buffer.
+ Value *RecordPtr =
+ IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
+ IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
+
+ // Update the ring buffer. Top byte of ThreadLong defines the size of the
+ // buffer in pages, it must be a power of two, and the start of the buffer
+ // must be aligned by twice that much. Therefore wrap around of the ring
+ // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
+ // The use of AShr instead of LShr is due to
+ // https://bugs.llvm.org/show_bug.cgi?id=39030
+ // Runtime library makes sure not to use the highest bit.
+ Value *WrapMask = IRB.CreateXor(
+ IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
+ ConstantInt::get(IntptrTy, (uint64_t)-1));
+ Value *ThreadLongNew = IRB.CreateAnd(
+ IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
+ IRB.CreateStore(ThreadLongNew, SlotPtr);
+ }
+
+ // Get shadow base address by aligning RecordPtr up.
+ // Note: this is not correct if the pointer is already aligned.
+ // Runtime library will make sure this never happens.
+ LocalDynamicShadow = IRB.CreateAdd(
+ IRB.CreateOr(
+ ThreadLongMaybeUntagged,
+ ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
+ ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
+ LocalDynamicShadow = IRB.CreateIntToPtr(LocalDynamicShadow, Int8PtrTy);
+}
+
+Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *ReadRegister =
+ Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
+ MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
+ Value *Args[] = {MetadataAsValue::get(*C, MD)};
+ return IRB.CreateCall(ReadRegister, Args);
+}
+
+bool HWAddressSanitizer::instrumentLandingPads(
+ SmallVectorImpl<Instruction *> &LandingPadVec) {
+ for (auto *LP : LandingPadVec) {
+ IRBuilder<> IRB(LP->getNextNode());
+ IRB.CreateCall(
+ HWAsanHandleVfork,
+ {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
+ : "sp")});
+ }
+ return true;
+}
+
+bool HWAddressSanitizer::instrumentStack(
+ SmallVectorImpl<AllocaInst *> &Allocas,
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
+ // Ideally, we want to calculate tagged stack base pointer, and rewrite all
+ // alloca addresses using that. Unfortunately, offsets are not known yet
+ // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
+ // temp, shift-OR it into each alloca address and xor with the retag mask.
+ // This generates one extra instruction per alloca use.
+ for (unsigned N = 0; N < Allocas.size(); ++N) {
+ auto *AI = Allocas[N];
+ IRBuilder<> IRB(AI->getNextNode());
+
+ // Replace uses of the alloca with tagged address.
+ Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
+ Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
+ Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
+ std::string Name =
+ AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
+ Replacement->setName(Name + ".hwasan");
+
+ AI->replaceUsesWithIf(Replacement,
+ [AILong](Use &U) { return U.getUser() != AILong; });
+
+ for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
+ DIExpression *OldExpr = DDI->getExpression();
+ DIExpression *NewExpr = DIExpression::append(
+ OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
+ DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
+ }
+
+ size_t Size = getAllocaSizeInBytes(*AI);
+ tagAlloca(IRB, AI, Tag, Size);
+
+ for (auto RI : RetVec) {
+ IRB.SetInsertPoint(RI);
+
+ // Re-tag alloca memory with the special UAR tag.
+ Value *Tag = getUARTag(IRB, StackTag);
+ tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
+ }
+ }
+
+ return true;
+}
+
+bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
+ return (AI.getAllocatedType()->isSized() &&
+ // FIXME: instrument dynamic allocas, too
+ AI.isStaticAlloca() &&
+ // alloca() may be called with 0 size, ignore it.
+ getAllocaSizeInBytes(AI) > 0 &&
+ // We are only interested in allocas not promotable to registers.
+ // Promotable allocas are common under -O0.
+ !isAllocaPromotable(&AI) &&
+ // inalloca allocas are not treated as static, and we don't want
+ // dynamic alloca instrumentation for them as well.
+ !AI.isUsedWithInAlloca() &&
+ // swifterror allocas are register promoted by ISel
+ !AI.isSwiftError());
+}
+
+bool HWAddressSanitizer::sanitizeFunction(Function &F) {
+ if (&F == HwasanCtorFunction)
+ return false;
+
+ if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
+
+ SmallVector<Instruction*, 16> ToInstrument;
+ SmallVector<AllocaInst*, 8> AllocasToInstrument;
+ SmallVector<Instruction*, 8> RetVec;
+ SmallVector<Instruction*, 8> LandingPadVec;
+ DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
+ for (auto &BB : F) {
+ for (auto &Inst : BB) {
+ if (ClInstrumentStack)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
+ if (isInterestingAlloca(*AI))
+ AllocasToInstrument.push_back(AI);
+ continue;
+ }
+
+ if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
+ isa<CleanupReturnInst>(Inst))
+ RetVec.push_back(&Inst);
+
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
+ if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
+ AllocaDeclareMap[Alloca].push_back(DDI);
+
+ if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
+ LandingPadVec.push_back(&Inst);
+
+ Value *MaybeMask = nullptr;
+ bool IsWrite;
+ unsigned Alignment;
+ uint64_t TypeSize;
+ Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
+ &Alignment, &MaybeMask);
+ if (Addr || isa<MemIntrinsic>(Inst))
+ ToInstrument.push_back(&Inst);
+ }
+ }
+
+ initializeCallbacks(*F.getParent());
+
+ if (!LandingPadVec.empty())
+ instrumentLandingPads(LandingPadVec);
+
+ if (AllocasToInstrument.empty() && F.hasPersonalityFn() &&
+ F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
+ // __hwasan_personality_thunk is a no-op for functions without an
+ // instrumented stack, so we can drop it.
+ F.setPersonalityFn(nullptr);
+ }
+
+ if (AllocasToInstrument.empty() && ToInstrument.empty())
+ return false;
+
+ assert(!LocalDynamicShadow);
+
+ Instruction *InsertPt = &*F.getEntryBlock().begin();
+ IRBuilder<> EntryIRB(InsertPt);
+ emitPrologue(EntryIRB,
+ /*WithFrameRecord*/ ClRecordStackHistory &&
+ !AllocasToInstrument.empty());
+
+ bool Changed = false;
+ if (!AllocasToInstrument.empty()) {
+ Value *StackTag =
+ ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
+ Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
+ StackTag);
+ }
+
+ // Pad and align each of the allocas that we instrumented to stop small
+ // uninteresting allocas from hiding in instrumented alloca's padding and so
+ // that we have enough space to store real tags for short granules.
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+ for (AllocaInst *AI : AllocasToInstrument) {
+ uint64_t Size = getAllocaSizeInBytes(*AI);
+ uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ AI->setAlignment(
+ MaybeAlign(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
+ if (Size != AlignedSize) {
+ Type *AllocatedType = AI->getAllocatedType();
+ if (AI->isArrayAllocation()) {
+ uint64_t ArraySize =
+ cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+ AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+ }
+ Type *TypeWithPadding = StructType::get(
+ AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+ auto *NewAI = new AllocaInst(
+ TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+ NewAI->takeName(AI);
+ NewAI->setAlignment(MaybeAlign(AI->getAlignment()));
+ NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(AI->isSwiftError());
+ NewAI->copyMetadata(*AI);
+ auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+ AI->replaceAllUsesWith(Bitcast);
+ AllocaToPaddedAllocaMap[AI] = NewAI;
+ }
+ }
+
+ if (!AllocaToPaddedAllocaMap.empty()) {
+ for (auto &BB : F)
+ for (auto &Inst : BB)
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst))
+ if (auto *AI =
+ dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation()))
+ if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
+ DVI->setArgOperand(
+ 0, MetadataAsValue::get(*C, LocalAsMetadata::get(NewAI)));
+ for (auto &P : AllocaToPaddedAllocaMap)
+ P.first->eraseFromParent();
+ }
+
+ // If we split the entry block, move any allocas that were originally in the
+ // entry block back into the entry block so that they aren't treated as
+ // dynamic allocas.
+ if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
+ InsertPt = &*F.getEntryBlock().begin();
+ for (auto II = EntryIRB.GetInsertBlock()->begin(),
+ IE = EntryIRB.GetInsertBlock()->end();
+ II != IE;) {
+ Instruction *I = &*II++;
+ if (auto *AI = dyn_cast<AllocaInst>(I))
+ if (isa<ConstantInt>(AI->getArraySize()))
+ I->moveBefore(InsertPt);
+ }
+ }
+
+ for (auto Inst : ToInstrument)
+ Changed |= instrumentMemAccess(Inst);
+
+ LocalDynamicShadow = nullptr;
+ StackBaseTag = nullptr;
+
+ return Changed;
+}
+
+void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
+ Constant *Initializer = GV->getInitializer();
+ uint64_t SizeInBytes =
+ M.getDataLayout().getTypeAllocSize(Initializer->getType());
+ uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
+ if (SizeInBytes != NewSize) {
+ // Pad the initializer out to the next multiple of 16 bytes and add the
+ // required short granule tag.
+ std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
+ Init.back() = Tag;
+ Constant *Padding = ConstantDataArray::get(*C, Init);
+ Initializer = ConstantStruct::getAnon({Initializer, Padding});
+ }
+
+ auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
+ GlobalValue::ExternalLinkage, Initializer,
+ GV->getName() + ".hwasan");
+ NewGV->copyAttributesFrom(GV);
+ NewGV->setLinkage(GlobalValue::PrivateLinkage);
+ NewGV->copyMetadata(GV, 0);
+ NewGV->setAlignment(
+ MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
+
+ // It is invalid to ICF two globals that have different tags. In the case
+ // where the size of the global is a multiple of the tag granularity the
+ // contents of the globals may be the same but the tags (i.e. symbol values)
+ // may be different, and the symbols are not considered during ICF. In the
+ // case where the size is not a multiple of the granularity, the short granule
+ // tags would discriminate two globals with different tags, but there would
+ // otherwise be nothing stopping such a global from being incorrectly ICF'd
+ // with an uninstrumented (i.e. tag 0) global that happened to have the short
+ // granule tag in the last byte.
+ NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+
+ // Descriptor format (assuming little-endian):
+ // bytes 0-3: relative address of global
+ // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
+ // it isn't, we create multiple descriptors)
+ // byte 7: tag
+ auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
+ const uint64_t MaxDescriptorSize = 0xfffff0;
+ for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
+ DescriptorPos += MaxDescriptorSize) {
+ auto *Descriptor =
+ new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
+ nullptr, GV->getName() + ".hwasan.descriptor");
+ auto *GVRelPtr = ConstantExpr::getTrunc(
+ ConstantExpr::getAdd(
+ ConstantExpr::getSub(
+ ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+ ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
+ ConstantInt::get(Int64Ty, DescriptorPos)),
+ Int32Ty);
+ uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
+ auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
+ Descriptor->setComdat(NewGV->getComdat());
+ Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
+ Descriptor->setSection("hwasan_globals");
+ Descriptor->setMetadata(LLVMContext::MD_associated,
+ MDNode::get(*C, ValueAsMetadata::get(NewGV)));
+ appendToCompilerUsed(M, Descriptor);
+ }
+
+ Constant *Aliasee = ConstantExpr::getIntToPtr(
+ ConstantExpr::getAdd(
+ ConstantExpr::getPtrToInt(NewGV, Int64Ty),
+ ConstantInt::get(Int64Ty, uint64_t(Tag) << kPointerTagShift)),
+ GV->getType());
+ auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
+ GV->getLinkage(), "", Aliasee, &M);
+ Alias->setVisibility(GV->getVisibility());
+ Alias->takeName(GV);
+ GV->replaceAllUsesWith(Alias);
+ GV->eraseFromParent();
+}
+
+void HWAddressSanitizer::instrumentGlobals() {
+ // Start by creating a note that contains pointers to the list of global
+ // descriptors. Adding a note to the output file will cause the linker to
+ // create a PT_NOTE program header pointing to the note that we can use to
+ // find the descriptor list starting from the program headers. A function
+ // provided by the runtime initializes the shadow memory for the globals by
+ // accessing the descriptor list via the note. The dynamic loader needs to
+ // call this function whenever a library is loaded.
+ //
+ // The reason why we use a note for this instead of a more conventional
+ // approach of having a global constructor pass a descriptor list pointer to
+ // the runtime is because of an order of initialization problem. With
+ // constructors we can encounter the following problematic scenario:
+ //
+ // 1) library A depends on library B and also interposes one of B's symbols
+ // 2) B's constructors are called before A's (as required for correctness)
+ // 3) during construction, B accesses one of its "own" globals (actually
+ // interposed by A) and triggers a HWASAN failure due to the initialization
+ // for A not having happened yet
+ //
+ // Even without interposition it is possible to run into similar situations in
+ // cases where two libraries mutually depend on each other.
+ //
+ // We only need one note per binary, so put everything for the note in a
+ // comdat.
+ Comdat *NoteComdat = M.getOrInsertComdat(kHwasanNoteName);
+
+ Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
+ auto Start =
+ new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+ nullptr, "__start_hwasan_globals");
+ Start->setVisibility(GlobalValue::HiddenVisibility);
+ Start->setDSOLocal(true);
+ auto Stop =
+ new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
+ nullptr, "__stop_hwasan_globals");
+ Stop->setVisibility(GlobalValue::HiddenVisibility);
+ Stop->setDSOLocal(true);
+
+ // Null-terminated so actually 8 bytes, which are required in order to align
+ // the note properly.
+ auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
+
+ auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
+ Int32Ty, Int32Ty);
+ auto *Note =
+ new GlobalVariable(M, NoteTy, /*isConstantGlobal=*/true,
+ GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
+ Note->setSection(".note.hwasan.globals");
+ Note->setComdat(NoteComdat);
+ Note->setAlignment(Align(4));
+ Note->setDSOLocal(true);
+
+ // The pointers in the note need to be relative so that the note ends up being
+ // placed in rodata, which is the standard location for notes.
+ auto CreateRelPtr = [&](Constant *Ptr) {
+ return ConstantExpr::getTrunc(
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
+ ConstantExpr::getPtrToInt(Note, Int64Ty)),
+ Int32Ty);
+ };
+ Note->setInitializer(ConstantStruct::getAnon(
+ {ConstantInt::get(Int32Ty, 8), // n_namesz
+ ConstantInt::get(Int32Ty, 8), // n_descsz
+ ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
+ Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
+ appendToCompilerUsed(M, Note);
+
+ // Create a zero-length global in hwasan_globals so that the linker will
+ // always create start and stop symbols.
+ auto Dummy = new GlobalVariable(
+ M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
+ Dummy->setSection("hwasan_globals");
+ Dummy->setComdat(NoteComdat);
+ Dummy->setMetadata(LLVMContext::MD_associated,
+ MDNode::get(*C, ValueAsMetadata::get(Note)));
+ appendToCompilerUsed(M, Dummy);
+
+ std::vector<GlobalVariable *> Globals;
+ for (GlobalVariable &GV : M.globals()) {
+ if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
+ GV.isThreadLocal())
+ continue;
+
+ // Common symbols can't have aliases point to them, so they can't be tagged.
+ if (GV.hasCommonLinkage())
+ continue;
+
+ // Globals with custom sections may be used in __start_/__stop_ enumeration,
+ // which would be broken both by adding tags and potentially by the extra
+ // padding/alignment that we insert.
+ if (GV.hasSection())
+ continue;
+
+ Globals.push_back(&GV);
+ }
+
+ MD5 Hasher;
+ Hasher.update(M.getSourceFileName());
+ MD5::MD5Result Hash;
+ Hasher.final(Hash);
+ uint8_t Tag = Hash[0];
+
+ for (GlobalVariable *GV : Globals) {
+ // Skip tag 0 in order to avoid collisions with untagged memory.
+ if (Tag == 0)
+ Tag = 1;
+ instrumentGlobal(GV, Tag++);
+ }
+}
+
+void HWAddressSanitizer::instrumentPersonalityFunctions() {
+ // We need to untag stack frames as we unwind past them. That is the job of
+ // the personality function wrapper, which either wraps an existing
+ // personality function or acts as a personality function on its own. Each
+ // function that has a personality function or that can be unwound past has
+ // its personality function changed to a thunk that calls the personality
+ // function wrapper in the runtime.
+ MapVector<Constant *, std::vector<Function *>> PersonalityFns;
+ for (Function &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
+ continue;
+
+ if (F.hasPersonalityFn()) {
+ PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
+ } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
+ PersonalityFns[nullptr].push_back(&F);
+ }
+ }
+
+ if (PersonalityFns.empty())
+ return;
+
+ FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
+ "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
+ Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
+ FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
+ FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
+
+ for (auto &P : PersonalityFns) {
+ std::string ThunkName = kHwasanPersonalityThunkName;
+ if (P.first)
+ ThunkName += ("." + P.first->getName()).str();
+ FunctionType *ThunkFnTy = FunctionType::get(
+ Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
+ bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
+ cast<GlobalValue>(P.first)->hasLocalLinkage());
+ auto *ThunkFn = Function::Create(ThunkFnTy,
+ IsLocal ? GlobalValue::InternalLinkage
+ : GlobalValue::LinkOnceODRLinkage,
+ ThunkName, &M);
+ if (!IsLocal) {
+ ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
+ ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
+ }
+
+ auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
+ IRBuilder<> IRB(BB);
+ CallInst *WrapperCall = IRB.CreateCall(
+ HwasanPersonalityWrapper,
+ {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
+ ThunkFn->getArg(3), ThunkFn->getArg(4),
+ P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
+ : Constant::getNullValue(Int8PtrTy),
+ IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
+ IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
+ WrapperCall->setTailCall();
+ IRB.CreateRet(WrapperCall);
+
+ for (Function *F : P.second)
+ F->setPersonalityFn(ThunkFn);
+ }
+}
+
+void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) {
+ Scale = kDefaultShadowScale;
+ if (ClMappingOffset.getNumOccurrences() > 0) {
+ InGlobal = false;
+ InTls = false;
+ Offset = ClMappingOffset;
+ } else if (ClEnableKhwasan || ClInstrumentWithCalls) {
+ InGlobal = false;
+ InTls = false;
+ Offset = 0;
+ } else if (ClWithIfunc) {
+ InGlobal = true;
+ InTls = false;
+ Offset = kDynamicShadowSentinel;
+ } else if (ClWithTls) {
+ InGlobal = false;
+ InTls = true;
+ Offset = kDynamicShadowSentinel;
+ } else {
+ InGlobal = false;
+ InTls = false;
+ Offset = kDynamicShadowSentinel;
+ }
+}
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
new file mode 100644
index 000000000000..74d6e76eceb6
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -0,0 +1,443 @@
+//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the transformation that promotes indirect calls to
+// conditional direct calls when the indirect-call value profile metadata is
+// available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-icall-prom"
+
+STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
+STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
+
+// Command line option to disable indirect-call promotion with the default as
+// false. This is for debug purpose.
+static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
+ cl::desc("Disable indirect call promotion"));
+
+// Set the cutoff value for the promotion. If the value is other than 0, we
+// stop the transformation once the total number of promotions equals the cutoff
+// value.
+// For debug use only.
+static cl::opt<unsigned>
+ ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of promotions for this compilation"));
+
+// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
+// For debug use only.
+static cl::opt<unsigned>
+ ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Skip Callsite up to this number for this compilation"));
+
+// Set if the pass is called in LTO optimization. The difference for LTO mode
+// is the pass won't prefix the source module name to the internal linkage
+// symbols.
+static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
+ cl::desc("Run indirect-call promotion in LTO "
+ "mode"));
+
+// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
+// mode is it will add prof metadatato the created direct call.
+static cl::opt<bool>
+ ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
+ cl::desc("Run indirect-call promotion in SamplePGO mode"));
+
+// If the option is set to true, only call instructions will be considered for
+// transformation -- invoke instructions will be ignored.
+static cl::opt<bool>
+ ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
+ cl::desc("Run indirect-call promotion for call instructions "
+ "only"));
+
+// If the option is set to true, only invoke instructions will be considered for
+// transformation -- call instructions will be ignored.
+static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
+ cl::Hidden,
+ cl::desc("Run indirect-call promotion for "
+ "invoke instruction only"));
+
+// Dump the function level IR if the transformation happened in this
+// function. For debug use only.
+static cl::opt<bool>
+ ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
+ cl::desc("Dump IR after transformation happens"));
+
+namespace {
+
+class PGOIndirectCallPromotionLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
+ : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
+ initializePGOIndirectCallPromotionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ }
+
+ StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
+
+private:
+ bool runOnModule(Module &M) override;
+
+ // If this pass is called in LTO. We need to special handling the PGOFuncName
+ // for the static variables due to LTO's internalization.
+ bool InLTO;
+
+ // If this pass is called in SamplePGO. We need to add the prof metadata to
+ // the promoted direct call.
+ bool SamplePGO;
+};
+
+} // end anonymous namespace
+
+char PGOIndirectCallPromotionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
+ "Use PGO instrumentation profile to promote indirect "
+ "calls to direct calls.",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
+ "Use PGO instrumentation profile to promote indirect "
+ "calls to direct calls.",
+ false, false)
+
+ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
+ bool SamplePGO) {
+ return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
+}
+
+namespace {
+
+// The class for main data structure to promote indirect calls to conditional
+// direct calls.
+class ICallPromotionFunc {
+private:
+ Function &F;
+ Module *M;
+
+ // Symtab that maps indirect call profile values to function names and
+ // defines.
+ InstrProfSymtab *Symtab;
+
+ bool SamplePGO;
+
+ OptimizationRemarkEmitter &ORE;
+
+ // A struct that records the direct target and it's call count.
+ struct PromotionCandidate {
+ Function *TargetFunction;
+ uint64_t Count;
+
+ PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
+ };
+
+ // Check if the indirect-call call site should be promoted. Return the number
+ // of promotions. Inst is the candidate indirect call, ValueDataRef
+ // contains the array of value profile data for profiled targets,
+ // TotalCount is the total profiled count of call executions, and
+ // NumCandidates is the number of candidate entries in ValueDataRef.
+ std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
+ Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
+ uint64_t TotalCount, uint32_t NumCandidates);
+
+ // Promote a list of targets for one indirect-call callsite. Return
+ // the number of promotions.
+ uint32_t tryToPromote(Instruction *Inst,
+ const std::vector<PromotionCandidate> &Candidates,
+ uint64_t &TotalCount);
+
+public:
+ ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
+ bool SamplePGO, OptimizationRemarkEmitter &ORE)
+ : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+ ICallPromotionFunc(const ICallPromotionFunc &) = delete;
+ ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete;
+
+ bool processFunction(ProfileSummaryInfo *PSI);
+};
+
+} // end anonymous namespace
+
+// Indirect-call promotion heuristic. The direct targets are sorted based on
+// the count. Stop at the first target that is not promoted.
+std::vector<ICallPromotionFunc::PromotionCandidate>
+ICallPromotionFunc::getPromotionCandidatesForCallSite(
+ Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
+ uint64_t TotalCount, uint32_t NumCandidates) {
+ std::vector<PromotionCandidate> Ret;
+
+ LLVM_DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
+ << " Num_targets: " << ValueDataRef.size()
+ << " Num_candidates: " << NumCandidates << "\n");
+ NumOfPGOICallsites++;
+ if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
+ LLVM_DEBUG(dbgs() << " Skip: User options.\n");
+ return Ret;
+ }
+
+ for (uint32_t I = 0; I < NumCandidates; I++) {
+ uint64_t Count = ValueDataRef[I].Count;
+ assert(Count <= TotalCount);
+ uint64_t Target = ValueDataRef[I].Value;
+ LLVM_DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
+ << " Target_func: " << Target << "\n");
+
+ if (ICPInvokeOnly && isa<CallInst>(Inst)) {
+ LLVM_DEBUG(dbgs() << " Not promote: User options.\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
+ << " Not promote: User options";
+ });
+ break;
+ }
+ if (ICPCallOnly && isa<InvokeInst>(Inst)) {
+ LLVM_DEBUG(dbgs() << " Not promote: User option.\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst)
+ << " Not promote: User options";
+ });
+ break;
+ }
+ if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
+ LLVM_DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst)
+ << " Not promote: Cutoff reached";
+ });
+ break;
+ }
+
+ Function *TargetFunction = Symtab->getFunction(Target);
+ if (TargetFunction == nullptr) {
+ LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst)
+ << "Cannot promote indirect call: target with md5sum "
+ << ore::NV("target md5sum", Target) << " not found";
+ });
+ break;
+ }
+
+ const char *Reason = nullptr;
+ if (!isLegalToPromote(CallSite(Inst), TargetFunction, &Reason)) {
+ using namespace ore;
+
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst)
+ << "Cannot promote indirect call to "
+ << NV("TargetFunction", TargetFunction) << " with count of "
+ << NV("Count", Count) << ": " << Reason;
+ });
+ break;
+ }
+
+ Ret.push_back(PromotionCandidate(TargetFunction, Count));
+ TotalCount -= Count;
+ }
+ return Ret;
+}
+
+Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst,
+ Function *DirectCallee,
+ uint64_t Count, uint64_t TotalCount,
+ bool AttachProfToDirectCall,
+ OptimizationRemarkEmitter *ORE) {
+
+ uint64_t ElseCount = TotalCount - Count;
+ uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
+ uint64_t Scale = calculateCountScale(MaxCount);
+ MDBuilder MDB(Inst->getContext());
+ MDNode *BranchWeights = MDB.createBranchWeights(
+ scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
+
+ Instruction *NewInst =
+ promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights);
+
+ if (AttachProfToDirectCall) {
+ MDBuilder MDB(NewInst->getContext());
+ NewInst->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights({static_cast<uint32_t>(Count)}));
+ }
+
+ using namespace ore;
+
+ if (ORE)
+ ORE->emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Promoted", Inst)
+ << "Promote indirect call to " << NV("DirectCallee", DirectCallee)
+ << " with count " << NV("Count", Count) << " out of "
+ << NV("TotalCount", TotalCount);
+ });
+ return NewInst;
+}
+
+// Promote indirect-call to conditional direct-call for one callsite.
+uint32_t ICallPromotionFunc::tryToPromote(
+ Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
+ uint64_t &TotalCount) {
+ uint32_t NumPromoted = 0;
+
+ for (auto &C : Candidates) {
+ uint64_t Count = C.Count;
+ pgo::promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount,
+ SamplePGO, &ORE);
+ assert(TotalCount >= Count);
+ TotalCount -= Count;
+ NumOfPGOICallPromotion++;
+ NumPromoted++;
+ }
+ return NumPromoted;
+}
+
+// Traverse all the indirect-call callsite and get the value profile
+// annotation to perform indirect-call promotion.
+bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
+ bool Changed = false;
+ ICallPromotionAnalysis ICallAnalysis;
+ for (auto &I : findIndirectCalls(F)) {
+ uint32_t NumVals, NumCandidates;
+ uint64_t TotalCount;
+ auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
+ I, NumVals, TotalCount, NumCandidates);
+ if (!NumCandidates ||
+ (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
+ continue;
+ auto PromotionCandidates = getPromotionCandidatesForCallSite(
+ I, ICallProfDataRef, TotalCount, NumCandidates);
+ uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
+ if (NumPromoted == 0)
+ continue;
+
+ Changed = true;
+ // Adjust the MD.prof metadata. First delete the old one.
+ I->setMetadata(LLVMContext::MD_prof, nullptr);
+ // If all promoted, we don't need the MD.prof metadata.
+ if (TotalCount == 0 || NumPromoted == NumVals)
+ continue;
+ // Otherwise we need update with the un-promoted records back.
+ annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
+ IPVK_IndirectCallTarget, NumCandidates);
+ }
+ return Changed;
+}
+
+// A wrapper function that does the actual work.
+static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
+ bool InLTO, bool SamplePGO,
+ ModuleAnalysisManager *AM = nullptr) {
+ if (DisableICP)
+ return false;
+ InstrProfSymtab Symtab;
+ if (Error E = Symtab.create(M, InLTO)) {
+ std::string SymtabFailure = toString(std::move(E));
+ LLVM_DEBUG(dbgs() << "Failed to create symtab: " << SymtabFailure << "\n");
+ (void)SymtabFailure;
+ return false;
+ }
+ bool Changed = false;
+ for (auto &F : M) {
+ if (F.isDeclaration() || F.hasOptNone())
+ continue;
+
+ std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
+ OptimizationRemarkEmitter *ORE;
+ if (AM) {
+ auto &FAM =
+ AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ } else {
+ OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = OwnedORE.get();
+ }
+
+ ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE);
+ bool FuncChanged = ICallPromotion.processFunction(PSI);
+ if (ICPDUMPAFTER && FuncChanged) {
+ LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
+ LLVM_DEBUG(dbgs() << "\n");
+ }
+ Changed |= FuncChanged;
+ if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
+ LLVM_DEBUG(dbgs() << " Stop: Cutoff reached.\n");
+ break;
+ }
+ }
+ return Changed;
+}
+
+bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ // Command-line option has the priority for InLTO.
+ return promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
+ SamplePGO | ICPSamplePGOMode);
+}
+
+PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+ if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
+ SamplePGO | ICPSamplePGOMode, &AM))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
new file mode 100644
index 000000000000..93d3a8a14d5c
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -0,0 +1,212 @@
+//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include <fstream>
+#include <map>
+#include <mutex>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+#define DEBUG_TYPE "instrorderfile"
+
+static cl::opt<std::string> ClOrderFileWriteMapping(
+ "orderfile-write-mapping", cl::init(""),
+ cl::desc(
+ "Dump functions and their MD5 hash to deobfuscate profile data"),
+ cl::Hidden);
+
+namespace {
+
+// We need a global bitmap to tell if a function is executed. We also
+// need a global variable to save the order of functions. We can use a
+// fixed-size buffer that saves the MD5 hash of the function. We need
+// a global variable to save the index into the buffer.
+
+std::mutex MappingMutex;
+
+struct InstrOrderFile {
+private:
+ GlobalVariable *OrderFileBuffer;
+ GlobalVariable *BufferIdx;
+ GlobalVariable *BitMap;
+ ArrayType *BufferTy;
+ ArrayType *MapTy;
+
+public:
+ InstrOrderFile() {}
+
+ void createOrderFileData(Module &M) {
+ LLVMContext &Ctx = M.getContext();
+ int NumFunctions = 0;
+ for (Function &F : M) {
+ if (!F.isDeclaration())
+ NumFunctions++;
+ }
+
+ BufferTy =
+ ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
+ Type *IdxTy = Type::getInt32Ty(Ctx);
+ MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
+
+ // Create the global variables.
+ std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
+ OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(BufferTy), SymbolName);
+ Triple TT = Triple(M.getTargetTriple());
+ OrderFileBuffer->setSection(
+ getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
+
+ std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
+ BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(IdxTy), IndexName);
+
+ std::string BitMapName = "bitmap_0";
+ BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(MapTy), BitMapName);
+ }
+
+ // Generate the code sequence in the entry block of each function to
+ // update the buffer.
+ void generateCodeSequence(Module &M, Function &F, int FuncId) {
+ if (!ClOrderFileWriteMapping.empty()) {
+ std::lock_guard<std::mutex> LogLock(MappingMutex);
+ std::error_code EC;
+ llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC,
+ llvm::sys::fs::OF_Append);
+ if (EC) {
+ report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
+ " to save mapping file for order file instrumentation\n");
+ } else {
+ std::stringstream stream;
+ stream << std::hex << MD5Hash(F.getName());
+ std::string singleLine = "MD5 " + stream.str() + " " +
+ std::string(F.getName()) + '\n';
+ OS << singleLine;
+ }
+ }
+
+ BasicBlock *OrigEntry = &F.getEntryBlock();
+
+ LLVMContext &Ctx = M.getContext();
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
+
+ // Create a new entry block for instrumentation. We will check the bitmap
+ // in this basic block.
+ BasicBlock *NewEntry =
+ BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
+ IRBuilder<> entryB(NewEntry);
+ // Create a basic block for updating the circular buffer.
+ BasicBlock *UpdateOrderFileBB =
+ BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
+ IRBuilder<> updateB(UpdateOrderFileBB);
+
+ // Check the bitmap, if it is already 1, do nothing.
+ // Otherwise, set the bit, grab the index, update the buffer.
+ Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, FuncId)};
+ Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
+ LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
+ entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
+ Value *IsNotExecuted =
+ entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
+ entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
+
+ // Fill up UpdateOrderFileBB: grab the index, update the buffer!
+ Value *IdxVal = updateB.CreateAtomicRMW(
+ AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
+ AtomicOrdering::SequentiallyConsistent);
+ // We need to wrap around the index to fit it inside the buffer.
+ Value *WrappedIdx = updateB.CreateAnd(
+ IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
+ Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
+ Value *BufferAddr =
+ updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
+ updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
+ BufferAddr);
+ updateB.CreateBr(OrigEntry);
+ }
+
+ bool run(Module &M) {
+ createOrderFileData(M);
+
+ int FuncId = 0;
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ generateCodeSequence(M, F, FuncId);
+ ++FuncId;
+ }
+
+ return true;
+ }
+
+}; // End of InstrOrderFile struct
+
+class InstrOrderFileLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ InstrOrderFileLegacyPass() : ModulePass(ID) {
+ initializeInstrOrderFileLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+};
+
+} // End anonymous namespace
+
+bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ return InstrOrderFile().run(M);
+}
+
+PreservedAnalyses
+InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (InstrOrderFile().run(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
+ "Instrumentation for Order File", false, false)
+INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
+ "Instrumentation for Order File", false, false)
+
+char InstrOrderFileLegacyPass::ID = 0;
+
+ModulePass *llvm::createInstrOrderFilePass() {
+ return new InstrOrderFileLegacyPass();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
new file mode 100644
index 000000000000..1f092a5f3103
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -0,0 +1,1048 @@
+//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
+// It also builds the data structures and initialization code needed for
+// updating execution counts and emitting the profile at runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "instrprof"
+
+// The start and end values of precise value profile range for memory
+// intrinsic sizes
+cl::opt<std::string> MemOPSizeRange(
+ "memop-size-range",
+ cl::desc("Set the range of size in memory intrinsic calls to be profiled "
+ "precisely, in a format of <start_val>:<end_val>"),
+ cl::init(""));
+
+// The value that considered to be large value in memory intrinsic.
+cl::opt<unsigned> MemOPSizeLarge(
+ "memop-size-large",
+ cl::desc("Set large value thresthold in memory intrinsic size profiling. "
+ "Value of 0 disables the large value profiling."),
+ cl::init(8192));
+
+namespace {
+
+cl::opt<bool> DoNameCompression("enable-name-compression",
+ cl::desc("Enable name string compression"),
+ cl::init(true));
+
+cl::opt<bool> DoHashBasedCounterSplit(
+ "hash-based-counter-split",
+ cl::desc("Rename counter variable of a comdat function based on cfg hash"),
+ cl::init(true));
+
+cl::opt<bool> ValueProfileStaticAlloc(
+ "vp-static-alloc",
+ cl::desc("Do static counter allocation for value profiler"),
+ cl::init(true));
+
+cl::opt<double> NumCountersPerValueSite(
+ "vp-counters-per-site",
+ cl::desc("The average number of profile counters allocated "
+ "per value profiling site."),
+ // This is set to a very small value because in real programs, only
+ // a very small percentage of value sites have non-zero targets, e.g, 1/30.
+ // For those sites with non-zero profile, the average number of targets
+ // is usually smaller than 2.
+ cl::init(1.0));
+
+cl::opt<bool> AtomicCounterUpdateAll(
+ "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
+ cl::desc("Make all profile counter updates atomic (for testing only)"),
+ cl::init(false));
+
+cl::opt<bool> AtomicCounterUpdatePromoted(
+ "atomic-counter-update-promoted", cl::ZeroOrMore,
+ cl::desc("Do counter update using atomic fetch add "
+ " for promoted counters only"),
+ cl::init(false));
+
+// If the option is not specified, the default behavior about whether
+// counter promotion is done depends on how instrumentaiton lowering
+// pipeline is setup, i.e., the default value of true of this option
+// does not mean the promotion will be done by default. Explicitly
+// setting this option can override the default behavior.
+cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
+ cl::desc("Do counter register promotion"),
+ cl::init(false));
+cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
+ cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
+ cl::desc("Max number counter promotions per loop to avoid"
+ " increasing register pressure too much"));
+
+// A debug option
+cl::opt<int>
+ MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
+ cl::desc("Max number of allowed counter promotions"));
+
+cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
+ cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
+ cl::desc("The max number of exiting blocks of a loop to allow "
+ " speculative counter promotion"));
+
+cl::opt<bool> SpeculativeCounterPromotionToLoop(
+ cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
+ cl::desc("When the option is false, if the target block is in a loop, "
+ "the promotion will be disallowed unless the promoted counter "
+ " update can be further/iteratively promoted into an acyclic "
+ " region."));
+
+cl::opt<bool> IterativeCounterPromotion(
+ cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
+ cl::desc("Allow counter promotion across the whole loop nest."));
+
+class InstrProfilingLegacyPass : public ModulePass {
+ InstrProfiling InstrProf;
+
+public:
+ static char ID;
+
+ InstrProfilingLegacyPass() : ModulePass(ID) {}
+ InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS = false)
+ : ModulePass(ID), InstrProf(Options, IsCS) {}
+
+ StringRef getPassName() const override {
+ return "Frontend instrumentation-based coverage lowering";
+ }
+
+ bool runOnModule(Module &M) override {
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ return InstrProf.run(M, GetTLI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+};
+
+///
+/// A helper class to promote one counter RMW operation in the loop
+/// into register update.
+///
+/// RWM update for the counter will be sinked out of the loop after
+/// the transformation.
+///
+class PGOCounterPromoterHelper : public LoadAndStorePromoter {
+public:
+ PGOCounterPromoterHelper(
+ Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
+ BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<Instruction *> InsertPts,
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+ LoopInfo &LI)
+ : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
+ InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
+ assert(isa<LoadInst>(L));
+ assert(isa<StoreInst>(S));
+ SSA.AddAvailableValue(PH, Init);
+ }
+
+ void doExtraRewritesBeforeFinalDeletion() override {
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Instruction *InsertPos = InsertPts[i];
+ // Get LiveIn value into the ExitBlock. If there are multiple
+ // predecessors, the value is defined by a PHI node in this
+ // block.
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
+ Type *Ty = LiveInValue->getType();
+ IRBuilder<> Builder(InsertPos);
+ if (AtomicCounterUpdatePromoted)
+ // automic update currently can only be promoted across the current
+ // loop, not the whole loop nest.
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
+ AtomicOrdering::SequentiallyConsistent);
+ else {
+ LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
+ auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
+ auto *NewStore = Builder.CreateStore(NewVal, Addr);
+
+ // Now update the parent loop's candidate list:
+ if (IterativeCounterPromotion) {
+ auto *TargetLoop = LI.getLoopFor(ExitBlock);
+ if (TargetLoop)
+ LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
+ }
+ }
+ }
+ }
+
+private:
+ Instruction *Store;
+ ArrayRef<BasicBlock *> ExitBlocks;
+ ArrayRef<Instruction *> InsertPts;
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+ LoopInfo &LI;
+};
+
+/// A helper class to do register promotion for all profile counter
+/// updates in a loop.
+///
+class PGOCounterPromoter {
+public:
+ PGOCounterPromoter(
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+ Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
+ : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
+ LI(LI), BFI(BFI) {
+
+ SmallVector<BasicBlock *, 8> LoopExitBlocks;
+ SmallPtrSet<BasicBlock *, 8> BlockSet;
+ L.getExitBlocks(LoopExitBlocks);
+
+ for (BasicBlock *ExitBlock : LoopExitBlocks) {
+ if (BlockSet.insert(ExitBlock).second) {
+ ExitBlocks.push_back(ExitBlock);
+ InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+ }
+ }
+ }
+
+ bool run(int64_t *NumPromoted) {
+ // Skip 'infinite' loops:
+ if (ExitBlocks.size() == 0)
+ return false;
+ unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
+ if (MaxProm == 0)
+ return false;
+
+ unsigned Promoted = 0;
+ for (auto &Cand : LoopToCandidates[&L]) {
+
+ SmallVector<PHINode *, 4> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+ Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+
+ // If BFI is set, we will use it to guide the promotions.
+ if (BFI) {
+ auto *BB = Cand.first->getParent();
+ auto InstrCount = BFI->getBlockProfileCount(BB);
+ if (!InstrCount)
+ continue;
+ auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
+ // If the average loop trip count is not greater than 1.5, we skip
+ // promotion.
+ if (PreheaderCount &&
+ (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2))
+ continue;
+ }
+
+ PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
+ L.getLoopPreheader(), ExitBlocks,
+ InsertPts, LoopToCandidates, LI);
+ Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
+ Promoted++;
+ if (Promoted >= MaxProm)
+ break;
+
+ (*NumPromoted)++;
+ if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
+ break;
+ }
+
+ LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
+ << L.getLoopDepth() << ")\n");
+ return Promoted != 0;
+ }
+
+private:
+ bool allowSpeculativeCounterPromotion(Loop *LP) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ // Not considierered speculative.
+ if (ExitingBlocks.size() == 1)
+ return true;
+ if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+ return false;
+ return true;
+ }
+
+ // Returns the max number of Counter Promotions for LP.
+ unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
+ // We can't insert into a catchswitch.
+ SmallVector<BasicBlock *, 8> LoopExitBlocks;
+ LP->getExitBlocks(LoopExitBlocks);
+ if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
+ return isa<CatchSwitchInst>(Exit->getTerminator());
+ }))
+ return 0;
+
+ if (!LP->hasDedicatedExits())
+ return 0;
+
+ BasicBlock *PH = LP->getLoopPreheader();
+ if (!PH)
+ return 0;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ LP->getExitingBlocks(ExitingBlocks);
+
+ // If BFI is set, we do more aggressive promotions based on BFI.
+ if (BFI)
+ return (unsigned)-1;
+
+ // Not considierered speculative.
+ if (ExitingBlocks.size() == 1)
+ return MaxNumOfPromotionsPerLoop;
+
+ if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+ return 0;
+
+ // Whether the target block is in a loop does not matter:
+ if (SpeculativeCounterPromotionToLoop)
+ return MaxNumOfPromotionsPerLoop;
+
+ // Now check the target block:
+ unsigned MaxProm = MaxNumOfPromotionsPerLoop;
+ for (auto *TargetBlock : LoopExitBlocks) {
+ auto *TargetLoop = LI.getLoopFor(TargetBlock);
+ if (!TargetLoop)
+ continue;
+ unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
+ unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
+ MaxProm =
+ std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
+ PendingCandsInTarget);
+ }
+ return MaxProm;
+ }
+
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+ Loop &L;
+ LoopInfo &LI;
+ BlockFrequencyInfo *BFI;
+};
+
+} // end anonymous namespace
+
+PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ if (!run(M, GetTLI))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+char InstrProfilingLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ InstrProfilingLegacyPass, "instrprof",
+ "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+ InstrProfilingLegacyPass, "instrprof",
+ "Frontend instrumentation-based coverage lowering.", false, false)
+
+ModulePass *
+llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
+ bool IsCS) {
+ return new InstrProfilingLegacyPass(Options, IsCS);
+}
+
+static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
+ InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
+ if (Inc)
+ return Inc;
+ return dyn_cast<InstrProfIncrementInst>(Instr);
+}
+
+bool InstrProfiling::lowerIntrinsics(Function *F) {
+ bool MadeChange = false;
+ PromotionCandidates.clear();
+ for (BasicBlock &BB : *F) {
+ for (auto I = BB.begin(), E = BB.end(); I != E;) {
+ auto Instr = I++;
+ InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+ if (Inc) {
+ lowerIncrement(Inc);
+ MadeChange = true;
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ lowerValueProfileInst(Ind);
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ promoteCounterLoadStores(F);
+ return true;
+}
+
+bool InstrProfiling::isCounterPromotionEnabled() const {
+ if (DoCounterPromotion.getNumOccurrences() > 0)
+ return DoCounterPromotion;
+
+ return Options.DoCounterPromotion;
+}
+
+void InstrProfiling::promoteCounterLoadStores(Function *F) {
+ if (!isCounterPromotionEnabled())
+ return;
+
+ DominatorTree DT(*F);
+ LoopInfo LI(DT);
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
+
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ if (Options.UseBFIInPromotion) {
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
+ BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
+ }
+
+ for (const auto &LoadStore : PromotionCandidates) {
+ auto *CounterLoad = LoadStore.first;
+ auto *CounterStore = LoadStore.second;
+ BasicBlock *BB = CounterLoad->getParent();
+ Loop *ParentLoop = LI.getLoopFor(BB);
+ if (!ParentLoop)
+ continue;
+ LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
+ }
+
+ SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
+
+ // Do a post-order traversal of the loops so that counter updates can be
+ // iteratively hoisted outside the loop nest.
+ for (auto *Loop : llvm::reverse(Loops)) {
+ PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
+ Promoter.run(&TotalCountersPromoted);
+ }
+}
+
+/// Check if the module contains uses of any profiling intrinsics.
+static bool containsProfilingIntrinsics(Module &M) {
+ if (auto *F = M.getFunction(
+ Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
+ if (!F->use_empty())
+ return true;
+ if (auto *F = M.getFunction(
+ Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
+ if (!F->use_empty())
+ return true;
+ if (auto *F = M.getFunction(
+ Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
+ if (!F->use_empty())
+ return true;
+ return false;
+}
+
+bool InstrProfiling::run(
+ Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI) {
+ this->M = &M;
+ this->GetTLI = std::move(GetTLI);
+ NamesVar = nullptr;
+ NamesSize = 0;
+ ProfileDataMap.clear();
+ UsedVars.clear();
+ getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
+ MemOPSizeRangeLast);
+ TT = Triple(M.getTargetTriple());
+
+ // Emit the runtime hook even if no counters are present.
+ bool MadeChange = emitRuntimeHook();
+
+ // Improve compile time by avoiding linear scans when there is no work.
+ GlobalVariable *CoverageNamesVar =
+ M.getNamedGlobal(getCoverageUnusedNamesVarName());
+ if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
+ return MadeChange;
+
+ // We did not know how many value sites there would be inside
+ // the instrumented function. This is counting the number of instrumented
+ // target value sites to enter it as field in the profile data variable.
+ for (Function &F : M) {
+ InstrProfIncrementInst *FirstProfIncInst = nullptr;
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(), E = BB.end(); I != E; I++)
+ if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
+ computeNumValueSiteCounts(Ind);
+ else if (FirstProfIncInst == nullptr)
+ FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
+
+ // Value profiling intrinsic lowering requires per-function profile data
+ // variable to be created first.
+ if (FirstProfIncInst != nullptr)
+ static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
+ }
+
+ for (Function &F : M)
+ MadeChange |= lowerIntrinsics(&F);
+
+ if (CoverageNamesVar) {
+ lowerCoverageData(CoverageNamesVar);
+ MadeChange = true;
+ }
+
+ if (!MadeChange)
+ return false;
+
+ emitVNodes();
+ emitNameData();
+ emitRegistration();
+ emitUses();
+ emitInitialization();
+ return true;
+}
+
+static FunctionCallee
+getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
+ bool IsRange = false) {
+ LLVMContext &Ctx = M.getContext();
+ auto *ReturnTy = Type::getVoidTy(M.getContext());
+
+ AttributeList AL;
+ if (auto AK = TLI.getExtAttrForI32Param(false))
+ AL = AL.addParamAttribute(M.getContext(), 2, AK);
+
+ if (!IsRange) {
+ Type *ParamTypes[] = {
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *ValueProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
+ return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+ ValueProfilingCallTy, AL);
+ } else {
+ Type *RangeParamTypes[] = {
+#define VALUE_RANGE_PROF 1
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
+#undef VALUE_RANGE_PROF
+ };
+ auto *ValueRangeProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
+ return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
+ ValueRangeProfilingCallTy, AL);
+ }
+}
+
+void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
+ GlobalVariable *Name = Ind->getName();
+ uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+ uint64_t Index = Ind->getIndex()->getZExtValue();
+ auto It = ProfileDataMap.find(Name);
+ if (It == ProfileDataMap.end()) {
+ PerFunctionProfileData PD;
+ PD.NumValueSites[ValueKind] = Index + 1;
+ ProfileDataMap[Name] = PD;
+ } else if (It->second.NumValueSites[ValueKind] <= Index)
+ It->second.NumValueSites[ValueKind] = Index + 1;
+}
+
+void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+ GlobalVariable *Name = Ind->getName();
+ auto It = ProfileDataMap.find(Name);
+ assert(It != ProfileDataMap.end() && It->second.DataVar &&
+ "value profiling detected in function with no counter incerement");
+
+ GlobalVariable *DataVar = It->second.DataVar;
+ uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+ uint64_t Index = Ind->getIndex()->getZExtValue();
+ for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
+ Index += It->second.NumValueSites[Kind];
+
+ IRBuilder<> Builder(Ind);
+ bool IsRange = (Ind->getValueKind()->getZExtValue() ==
+ llvm::InstrProfValueKind::IPVK_MemOPSize);
+ CallInst *Call = nullptr;
+ auto *TLI = &GetTLI(*Ind->getFunction());
+ if (!IsRange) {
+ Value *Args[3] = {Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index)};
+ Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
+ } else {
+ Value *Args[6] = {
+ Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index),
+ Builder.getInt64(MemOPSizeRangeStart),
+ Builder.getInt64(MemOPSizeRangeLast),
+ Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
+ Call =
+ Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
+ }
+ if (auto AK = TLI->getExtAttrForI32Param(false))
+ Call->addParamAttr(2, AK);
+ Ind->replaceAllUsesWith(Call);
+ Ind->eraseFromParent();
+}
+
+void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
+ GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
+
+ IRBuilder<> Builder(Inc);
+ uint64_t Index = Inc->getIndex()->getZExtValue();
+ Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
+ Counters, 0, Index);
+
+ if (Options.Atomic || AtomicCounterUpdateAll) {
+ Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
+ AtomicOrdering::Monotonic);
+ } else {
+ Value *IncStep = Inc->getStep();
+ Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
+ auto *Count = Builder.CreateAdd(Load, Inc->getStep());
+ auto *Store = Builder.CreateStore(Count, Addr);
+ if (isCounterPromotionEnabled())
+ PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
+ }
+ Inc->eraseFromParent();
+}
+
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
+ ConstantArray *Names =
+ cast<ConstantArray>(CoverageNamesVar->getInitializer());
+ for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
+ Constant *NC = Names->getOperand(I);
+ Value *V = NC->stripPointerCasts();
+ assert(isa<GlobalVariable>(V) && "Missing reference to function name");
+ GlobalVariable *Name = cast<GlobalVariable>(V);
+
+ Name->setLinkage(GlobalValue::PrivateLinkage);
+ ReferencedNames.push_back(Name);
+ NC->dropAllReferences();
+ }
+ CoverageNamesVar->eraseFromParent();
+}
+
+/// Get the name of a profiling variable for a particular function.
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+ StringRef NamePrefix = getInstrProfNameVarPrefix();
+ StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
+ Function *F = Inc->getParent()->getParent();
+ Module *M = F->getParent();
+ if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
+ !canRenameComdatFunc(*F))
+ return (Prefix + Name).str();
+ uint64_t FuncHash = Inc->getHash()->getZExtValue();
+ SmallVector<char, 24> HashPostfix;
+ if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
+ return (Prefix + Name).str();
+ return (Prefix + Name + "." + Twine(FuncHash)).str();
+}
+
+static inline bool shouldRecordFunctionAddr(Function *F) {
+ // Check the linkage
+ bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
+ if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+ !HasAvailableExternallyLinkage)
+ return true;
+
+ // A function marked 'alwaysinline' with available_externally linkage can't
+ // have its address taken. Doing so would create an undefined external ref to
+ // the function, which would fail to link.
+ if (HasAvailableExternallyLinkage &&
+ F->hasFnAttribute(Attribute::AlwaysInline))
+ return false;
+
+ // Prohibit function address recording if the function is both internal and
+ // COMDAT. This avoids the profile data variable referencing internal symbols
+ // in COMDAT.
+ if (F->hasLocalLinkage() && F->hasComdat())
+ return false;
+
+ // Check uses of this function for other than direct calls or invokes to it.
+ // Inline virtual functions have linkeOnceODR linkage. When a key method
+ // exists, the vtable will only be emitted in the TU where the key method
+ // is defined. In a TU where vtable is not available, the function won't
+ // be 'addresstaken'. If its address is not recorded here, the profile data
+ // with missing address may be picked by the linker leading to missing
+ // indirect call target info.
+ return F->hasAddressTaken() || F->hasLinkOnceLinkage();
+}
+
+static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
+ // Don't do this for Darwin. compiler-rt uses linker magic.
+ if (TT.isOSDarwin())
+ return false;
+ // Use linker script magic to get data/cnts/name start/end.
+ if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
+ TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
+ TT.isOSWindows())
+ return false;
+
+ return true;
+}
+
+GlobalVariable *
+InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
+ GlobalVariable *NamePtr = Inc->getName();
+ auto It = ProfileDataMap.find(NamePtr);
+ PerFunctionProfileData PD;
+ if (It != ProfileDataMap.end()) {
+ if (It->second.RegionCounters)
+ return It->second.RegionCounters;
+ PD = It->second;
+ }
+
+ // Match the linkage and visibility of the name global. COFF supports using
+ // comdats with internal symbols, so do that if we can.
+ Function *Fn = Inc->getParent()->getParent();
+ GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
+ GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+ if (TT.isOSBinFormatCOFF()) {
+ Linkage = GlobalValue::InternalLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
+
+ // Move the name variable to the right section. Place them in a COMDAT group
+ // if the associated function is a COMDAT. This will make sure that only one
+ // copy of counters of the COMDAT function will be emitted after linking. Keep
+ // in mind that this pass may run before the inliner, so we need to create a
+ // new comdat group for the counters and profiling data. If we use the comdat
+ // of the parent function, that will result in relocations against discarded
+ // sections.
+ bool NeedComdat = needsComdatForCounter(*Fn, *M);
+ if (NeedComdat) {
+ if (TT.isOSBinFormatCOFF()) {
+ // For COFF, put the counters, data, and values each into their own
+ // comdats. We can't use a group because the Visual C++ linker will
+ // report duplicate symbol errors if there are multiple external symbols
+ // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
+ Linkage = GlobalValue::LinkOnceODRLinkage;
+ Visibility = GlobalValue::HiddenVisibility;
+ }
+ }
+ auto MaybeSetComdat = [=](GlobalVariable *GV) {
+ if (NeedComdat)
+ GV->setComdat(M->getOrInsertComdat(GV->getName()));
+ };
+
+ uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
+ LLVMContext &Ctx = M->getContext();
+ ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
+
+ // Create the counters variable.
+ auto *CounterPtr =
+ new GlobalVariable(*M, CounterTy, false, Linkage,
+ Constant::getNullValue(CounterTy),
+ getVarName(Inc, getInstrProfCountersVarPrefix()));
+ CounterPtr->setVisibility(Visibility);
+ CounterPtr->setSection(
+ getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
+ CounterPtr->setAlignment(Align(8));
+ MaybeSetComdat(CounterPtr);
+ CounterPtr->setLinkage(Linkage);
+
+ auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
+ // Allocate statically the array of pointers to value profile nodes for
+ // the current function.
+ Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
+ if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
+ uint64_t NS = 0;
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ NS += PD.NumValueSites[Kind];
+ if (NS) {
+ ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
+
+ auto *ValuesVar =
+ new GlobalVariable(*M, ValuesTy, false, Linkage,
+ Constant::getNullValue(ValuesTy),
+ getVarName(Inc, getInstrProfValuesVarPrefix()));
+ ValuesVar->setVisibility(Visibility);
+ ValuesVar->setSection(
+ getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
+ ValuesVar->setAlignment(Align(8));
+ MaybeSetComdat(ValuesVar);
+ ValuesPtrExpr =
+ ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
+ }
+ }
+
+ // Create data variable.
+ auto *Int16Ty = Type::getInt16Ty(Ctx);
+ auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
+ Type *DataTypes[] = {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
+
+ Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
+ ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
+ : ConstantPointerNull::get(Int8PtrTy);
+
+ Constant *Int16ArrayVals[IPVK_Last + 1];
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+
+ Constant *DataVals[] = {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
+ ConstantStruct::get(DataTy, DataVals),
+ getVarName(Inc, getInstrProfDataVarPrefix()));
+ Data->setVisibility(Visibility);
+ Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
+ Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
+ MaybeSetComdat(Data);
+ Data->setLinkage(Linkage);
+
+ PD.RegionCounters = CounterPtr;
+ PD.DataVar = Data;
+ ProfileDataMap[NamePtr] = PD;
+
+ // Mark the data variable as used so that it isn't stripped out.
+ UsedVars.push_back(Data);
+ // Now that the linkage set by the FE has been passed to the data and counter
+ // variables, reset Name variable's linkage and visibility to private so that
+ // it can be removed later by the compiler.
+ NamePtr->setLinkage(GlobalValue::PrivateLinkage);
+ // Collect the referenced names to be used by emitNameData.
+ ReferencedNames.push_back(NamePtr);
+
+ return CounterPtr;
+}
+
+void InstrProfiling::emitVNodes() {
+ if (!ValueProfileStaticAlloc)
+ return;
+
+ // For now only support this on platforms that do
+ // not require runtime registration to discover
+ // named section start/end.
+ if (needsRuntimeRegistrationOfSectionRange(TT))
+ return;
+
+ size_t TotalNS = 0;
+ for (auto &PD : ProfileDataMap) {
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ TotalNS += PD.second.NumValueSites[Kind];
+ }
+
+ if (!TotalNS)
+ return;
+
+ uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
+// Heuristic for small programs with very few total value sites.
+// The default value of vp-counters-per-site is chosen based on
+// the observation that large apps usually have a low percentage
+// of value sites that actually have any profile data, and thus
+// the average number of counters per site is low. For small
+// apps with very few sites, this may not be true. Bump up the
+// number of counters in this case.
+#define INSTR_PROF_MIN_VAL_COUNTS 10
+ if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
+ NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
+
+ auto &Ctx = M->getContext();
+ Type *VNodeTypes[] = {
+#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
+
+ ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
+ auto *VNodesVar = new GlobalVariable(
+ *M, VNodesTy, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
+ VNodesVar->setSection(
+ getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
+ UsedVars.push_back(VNodesVar);
+}
+
+void InstrProfiling::emitNameData() {
+ std::string UncompressedData;
+
+ if (ReferencedNames.empty())
+ return;
+
+ std::string CompressedNameStr;
+ if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
+ DoNameCompression)) {
+ report_fatal_error(toString(std::move(E)), false);
+ }
+
+ auto &Ctx = M->getContext();
+ auto *NamesVal = ConstantDataArray::getString(
+ Ctx, StringRef(CompressedNameStr), false);
+ NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
+ GlobalValue::PrivateLinkage, NamesVal,
+ getInstrProfNamesVarName());
+ NamesSize = CompressedNameStr.size();
+ NamesVar->setSection(
+ getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
+ // On COFF, it's important to reduce the alignment down to 1 to prevent the
+ // linker from inserting padding before the start of the names section or
+ // between names entries.
+ NamesVar->setAlignment(Align::None());
+ UsedVars.push_back(NamesVar);
+
+ for (auto *NamePtr : ReferencedNames)
+ NamePtr->eraseFromParent();
+}
+
+void InstrProfiling::emitRegistration() {
+ if (!needsRuntimeRegistrationOfSectionRange(TT))
+ return;
+
+ // Construct the function.
+ auto *VoidTy = Type::getVoidTy(M->getContext());
+ auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
+ auto *Int64Ty = Type::getInt64Ty(M->getContext());
+ auto *RegisterFTy = FunctionType::get(VoidTy, false);
+ auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
+ getInstrProfRegFuncsName(), M);
+ RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ if (Options.NoRedZone)
+ RegisterF->addFnAttr(Attribute::NoRedZone);
+
+ auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
+ auto *RuntimeRegisterF =
+ Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
+ getInstrProfRegFuncName(), M);
+
+ IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
+ for (Value *Data : UsedVars)
+ if (Data != NamesVar && !isa<Function>(Data))
+ IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
+
+ if (NamesVar) {
+ Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
+ auto *NamesRegisterTy =
+ FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
+ auto *NamesRegisterF =
+ Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
+ getInstrProfNamesRegFuncName(), M);
+ IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
+ IRB.getInt64(NamesSize)});
+ }
+
+ IRB.CreateRetVoid();
+}
+
+bool InstrProfiling::emitRuntimeHook() {
+ // We expect the linker to be invoked with -u<hook_var> flag for linux,
+ // for which case there is no need to emit the user function.
+ if (TT.isOSLinux())
+ return false;
+
+ // If the module's provided its own runtime, we don't need to do anything.
+ if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
+ return false;
+
+ // Declare an external variable that will pull in the runtime initialization.
+ auto *Int32Ty = Type::getInt32Ty(M->getContext());
+ auto *Var =
+ new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, getInstrProfRuntimeHookVarName());
+
+ // Make a function that uses it.
+ auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+ GlobalValue::LinkOnceODRLinkage,
+ getInstrProfRuntimeHookVarUseFuncName(), M);
+ User->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ User->addFnAttr(Attribute::NoRedZone);
+ User->setVisibility(GlobalValue::HiddenVisibility);
+ if (TT.supportsCOMDAT())
+ User->setComdat(M->getOrInsertComdat(User->getName()));
+
+ IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
+ auto *Load = IRB.CreateLoad(Int32Ty, Var);
+ IRB.CreateRet(Load);
+
+ // Mark the user variable as used so that it isn't stripped out.
+ UsedVars.push_back(User);
+ return true;
+}
+
+void InstrProfiling::emitUses() {
+ if (!UsedVars.empty())
+ appendToUsed(*M, UsedVars);
+}
+
+void InstrProfiling::emitInitialization() {
+ // Create ProfileFileName variable. Don't don't this for the
+ // context-sensitive instrumentation lowering: This lowering is after
+ // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
+ // have already create the variable before LTO/ThinLTO linking.
+ if (!IsCS)
+ createProfileFileNameVar(*M, Options.InstrProfileOutput);
+ Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+ if (!RegisterF)
+ return;
+
+ // Create the initialization function.
+ auto *VoidTy = Type::getVoidTy(M->getContext());
+ auto *F = Function::Create(FunctionType::get(VoidTy, false),
+ GlobalValue::InternalLinkage,
+ getInstrProfInitFuncName(), M);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ // Add the basic block and the necessary calls.
+ IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
+ IRB.CreateCall(RegisterF, {});
+ IRB.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
+}
diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 000000000000..a6c2c9b464b6
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,128 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+/// Moves I before IP. Returns new insert point.
+static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
+ // If I is IP, move the insert point down.
+ if (I == IP) {
+ ++IP;
+ } else {
+ // Otherwise, move I before IP and return IP.
+ I->moveBefore(&*IP);
+ }
+ return IP;
+}
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
+ BasicBlock::iterator IP) {
+ assert(&BB.getParent()->getEntryBlock() == &BB);
+ for (auto I = IP, E = BB.end(); I != E; ++I) {
+ bool KeepInEntry = false;
+ if (auto *AI = dyn_cast<AllocaInst>(I)) {
+ if (AI->isStaticAlloca())
+ KeepInEntry = true;
+ } else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == llvm::Intrinsic::localescape)
+ KeepInEntry = true;
+ }
+ if (KeepInEntry)
+ IP = moveBeforeInsertPoint(I, IP);
+ }
+ return IP;
+}
+
+// Create a constant for Str so that we can pass it to the run-time lib.
+GlobalVariable *llvm::createPrivateGlobalForString(Module &M, StringRef Str,
+ bool AllowMerging,
+ const char *NamePrefix) {
+ Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+ // We use private linkage for module-local strings. If they can be merged
+ // with another one, we set the unnamed_addr attribute.
+ GlobalVariable *GV =
+ new GlobalVariable(M, StrConst->getType(), true,
+ GlobalValue::PrivateLinkage, StrConst, NamePrefix);
+ if (AllowMerging)
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ GV->setAlignment(Align::None()); // Strings may not be merged w/o setting
+ // alignment explicitly.
+ return GV;
+}
+
+Comdat *llvm::GetOrCreateFunctionComdat(Function &F, Triple &T,
+ const std::string &ModuleId) {
+ if (auto Comdat = F.getComdat()) return Comdat;
+ assert(F.hasName());
+ Module *M = F.getParent();
+ std::string Name = F.getName();
+
+ // Make a unique comdat name for internal linkage things on ELF. On COFF, the
+ // name of the comdat group identifies the leader symbol of the comdat group.
+ // The linkage of the leader symbol is considered during comdat resolution,
+ // and internal symbols with the same name from different objects will not be
+ // merged.
+ if (T.isOSBinFormatELF() && F.hasLocalLinkage()) {
+ if (ModuleId.empty())
+ return nullptr;
+ Name += ModuleId;
+ }
+
+ // Make a new comdat for the function. Use the "no duplicates" selection kind
+ // for non-weak symbols if the object file format supports it.
+ Comdat *C = M->getOrInsertComdat(Name);
+ if (T.isOSBinFormatCOFF() && !F.isWeakForLinker())
+ C->setSelectionKind(Comdat::NoDuplicates);
+ F.setComdat(C);
+ return C;
+}
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+ initializeAddressSanitizerLegacyPassPass(Registry);
+ initializeModuleAddressSanitizerLegacyPassPass(Registry);
+ initializeBoundsCheckingLegacyPassPass(Registry);
+ initializeControlHeightReductionLegacyPassPass(Registry);
+ initializeGCOVProfilerLegacyPassPass(Registry);
+ initializePGOInstrumentationGenLegacyPassPass(Registry);
+ initializePGOInstrumentationUseLegacyPassPass(Registry);
+ initializePGOIndirectCallPromotionLegacyPassPass(Registry);
+ initializePGOMemOPSizeOptLegacyPassPass(Registry);
+ initializeInstrOrderFileLegacyPassPass(Registry);
+ initializeInstrProfilingLegacyPassPass(Registry);
+ initializeMemorySanitizerLegacyPassPass(Registry);
+ initializeHWAddressSanitizerLegacyPassPass(Registry);
+ initializeThreadSanitizerLegacyPassPass(Registry);
+ initializeModuleSanitizerCoverageLegacyPassPass(Registry);
+ initializeDataFlowSanitizerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+ initializeInstrumentation(*unwrap(R));
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 000000000000..892a6a26da91
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,109 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This module provides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
+#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/IR/BasicBlock.h"
+#include <algorithm>
+#include <vector>
+
+namespace llvm {
+
+ /// MaximumSpanningTree - A MST implementation.
+ /// The type parameter T determines the type of the nodes of the graph.
+ template <typename T>
+ class MaximumSpanningTree {
+ public:
+ typedef std::pair<const T*, const T*> Edge;
+ typedef std::pair<Edge, double> EdgeWeight;
+ typedef std::vector<EdgeWeight> EdgeWeights;
+ protected:
+ typedef std::vector<Edge> MaxSpanTree;
+
+ MaxSpanTree MST;
+
+ private:
+ // A comparing class for comparing weighted edges.
+ struct EdgeWeightCompare {
+ static bool getBlockSize(const T *X) {
+ const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+ return BB ? BB->size() : 0;
+ }
+
+ bool operator()(EdgeWeight X, EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+
+ // Equal edge weights: break ties by comparing block sizes.
+ size_t XSizeA = getBlockSize(X.first.first);
+ size_t YSizeA = getBlockSize(Y.first.first);
+ if (XSizeA > YSizeA) return true;
+ if (XSizeA < YSizeA) return false;
+
+ size_t XSizeB = getBlockSize(X.first.second);
+ size_t YSizeB = getBlockSize(Y.first.second);
+ if (XSizeB > YSizeB) return true;
+ if (XSizeB < YSizeB) return false;
+
+ return false;
+ }
+ };
+
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+
+ /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+ /// spanning tree.
+ MaximumSpanningTree(EdgeWeights &EdgeVector) {
+ llvm::stable_sort(EdgeVector, EdgeWeightCompare());
+
+ // Create spanning tree, Forest contains a special data structure
+ // that makes checking if two nodes are already in a common (sub-)tree
+ // fast and cheap.
+ EquivalenceClasses<const T*> Forest;
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ Forest.insert(e.first);
+ Forest.insert(e.second);
+ }
+
+ // Iterate over the sorted edges, biggest first.
+ for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+ EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+ Edge e = (*EWi).first;
+
+ if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+ Forest.unionSets(e.first, e.second);
+ // So we know now that the edge is not already in a subtree, so we push
+ // the edge to the MST.
+ MST.push_back(e);
+ }
+ }
+ }
+
+ typename MaxSpanTree::iterator begin() {
+ return MST.begin();
+ }
+
+ typename MaxSpanTree::iterator end() {
+ return MST.end();
+ }
+ };
+
+} // End llvm namespace
+
+#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
new file mode 100644
index 000000000000..69c9020e060b
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -0,0 +1,4602 @@
+//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file is a part of MemorySanitizer, a detector of uninitialized
+/// reads.
+///
+/// The algorithm of the tool is similar to Memcheck
+/// (http://goo.gl/QKbem). We associate a few shadow bits with every
+/// byte of the application memory, poison the shadow of the malloc-ed
+/// or alloca-ed memory, load the shadow bits on every memory read,
+/// propagate the shadow bits through some of the arithmetic
+/// instruction (including MOV), store the shadow bits on every memory
+/// write, report a bug on some other instructions (e.g. JMP) if the
+/// associated shadow is poisoned.
+///
+/// But there are differences too. The first and the major one:
+/// compiler instrumentation instead of binary instrumentation. This
+/// gives us much better register allocation, possible compiler
+/// optimizations and a fast start-up. But this brings the major issue
+/// as well: msan needs to see all program events, including system
+/// calls and reads/writes in system libraries, so we either need to
+/// compile *everything* with msan or use a binary translation
+/// component (e.g. DynamoRIO) to instrument pre-built libraries.
+/// Another difference from Memcheck is that we use 8 shadow bits per
+/// byte of application memory and use a direct shadow mapping. This
+/// greatly simplifies the instrumentation code and avoids races on
+/// shadow updates (Memcheck is single-threaded so races are not a
+/// concern there. Memcheck uses 2 shadow bits per byte with a slow
+/// path storage that uses 8 bits per byte).
+///
+/// The default value of shadow is 0, which means "clean" (not poisoned).
+///
+/// Every module initializer should call __msan_init to ensure that the
+/// shadow memory is ready. On error, __msan_warning is called. Since
+/// parameters and return values may be passed via registers, we have a
+/// specialized thread-local shadow for return values
+/// (__msan_retval_tls) and parameters (__msan_param_tls).
+///
+/// Origin tracking.
+///
+/// MemorySanitizer can track origins (allocation points) of all uninitialized
+/// values. This behavior is controlled with a flag (msan-track-origins) and is
+/// disabled by default.
+///
+/// Origins are 4-byte values created and interpreted by the runtime library.
+/// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
+/// of application memory. Propagation of origins is basically a bunch of
+/// "select" instructions that pick the origin of a dirty argument, if an
+/// instruction has one.
+///
+/// Every 4 aligned, consecutive bytes of application memory have one origin
+/// value associated with them. If these bytes contain uninitialized data
+/// coming from 2 different allocations, the last store wins. Because of this,
+/// MemorySanitizer reports can show unrelated origins, but this is unlikely in
+/// practice.
+///
+/// Origins are meaningless for fully initialized values, so MemorySanitizer
+/// avoids storing origin to memory when a fully initialized value is stored.
+/// This way it avoids needless overwritting origin of the 4-byte region on
+/// a short (i.e. 1 byte) clean store, and it is also good for performance.
+///
+/// Atomic handling.
+///
+/// Ideally, every atomic store of application value should update the
+/// corresponding shadow location in an atomic way. Unfortunately, atomic store
+/// of two disjoint locations can not be done without severe slowdown.
+///
+/// Therefore, we implement an approximation that may err on the safe side.
+/// In this implementation, every atomically accessed location in the program
+/// may only change from (partially) uninitialized to fully initialized, but
+/// not the other way around. We load the shadow _after_ the application load,
+/// and we store the shadow _before_ the app store. Also, we always store clean
+/// shadow (if the application store is atomic). This way, if the store-load
+/// pair constitutes a happens-before arc, shadow store and load are correctly
+/// ordered such that the load will get either the value that was stored, or
+/// some later value (which is always clean).
+///
+/// This does not work very well with Compare-And-Swap (CAS) and
+/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
+/// must store the new shadow before the app operation, and load the shadow
+/// after the app operation. Computers don't work this way. Current
+/// implementation ignores the load aspect of CAS/RMW, always returning a clean
+/// value. It implements the store part as a simple atomic store by storing a
+/// clean shadow.
+///
+/// Instrumenting inline assembly.
+///
+/// For inline assembly code LLVM has little idea about which memory locations
+/// become initialized depending on the arguments. It can be possible to figure
+/// out which arguments are meant to point to inputs and outputs, but the
+/// actual semantics can be only visible at runtime. In the Linux kernel it's
+/// also possible that the arguments only indicate the offset for a base taken
+/// from a segment register, so it's dangerous to treat any asm() arguments as
+/// pointers. We take a conservative approach generating calls to
+/// __msan_instrument_asm_store(ptr, size)
+/// , which defer the memory unpoisoning to the runtime library.
+/// The latter can perform more complex address checks to figure out whether
+/// it's safe to touch the shadow memory.
+/// Like with atomic operations, we call __msan_instrument_asm_store() before
+/// the assembly call, so that changes to the shadow memory will be seen by
+/// other threads together with main memory initialization.
+///
+/// KernelMemorySanitizer (KMSAN) implementation.
+///
+/// The major differences between KMSAN and MSan instrumentation are:
+/// - KMSAN always tracks the origins and implies msan-keep-going=true;
+/// - KMSAN allocates shadow and origin memory for each page separately, so
+/// there are no explicit accesses to shadow and origin in the
+/// instrumentation.
+/// Shadow and origin values for a particular X-byte memory location
+/// (X=1,2,4,8) are accessed through pointers obtained via the
+/// __msan_metadata_ptr_for_load_X(ptr)
+/// __msan_metadata_ptr_for_store_X(ptr)
+/// functions. The corresponding functions check that the X-byte accesses
+/// are possible and returns the pointers to shadow and origin memory.
+/// Arbitrary sized accesses are handled with:
+/// __msan_metadata_ptr_for_load_n(ptr, size)
+/// __msan_metadata_ptr_for_store_n(ptr, size);
+/// - TLS variables are stored in a single per-task struct. A call to a
+/// function __msan_get_context_state() returning a pointer to that struct
+/// is inserted into every instrumented function before the entry block;
+/// - __msan_warning() takes a 32-bit origin parameter;
+/// - local variables are poisoned with __msan_poison_alloca() upon function
+/// entry and unpoisoned with __msan_unpoison_alloca() before leaving the
+/// function;
+/// - the pass doesn't declare any global variables or add global constructors
+/// to the translation unit.
+///
+/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
+/// calls, making sure we're on the safe side wrt. possible false positives.
+///
+/// KernelMemorySanitizer only supports X86_64 at the moment.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <tuple>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "msan"
+
+static const unsigned kOriginSize = 4;
+static const unsigned kMinOriginAlignment = 4;
+static const unsigned kShadowTLSAlignment = 8;
+
+// These constants must be kept in sync with the ones in msan.h.
+static const unsigned kParamTLSSize = 800;
+static const unsigned kRetvalTLSSize = 800;
+
+// Accesses sizes are powers of two: 1, 2, 4, 8.
+static const size_t kNumberOfAccessSizes = 4;
+
+/// Track origins of uninitialized values.
+///
+/// Adds a section to MemorySanitizer report that points to the allocation
+/// (stack or heap) the uninitialized bits came from originally.
+static cl::opt<int> ClTrackOrigins("msan-track-origins",
+ cl::desc("Track origins (allocation sites) of poisoned memory"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<bool> ClKeepGoing("msan-keep-going",
+ cl::desc("keep going after reporting a UMR"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClPoisonStack("msan-poison-stack",
+ cl::desc("poison uninitialized stack variables"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
+ cl::desc("poison uninitialized stack variables with a call"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
+ cl::desc("poison uninitialized stack variables with the given pattern"),
+ cl::Hidden, cl::init(0xff));
+
+static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
+ cl::desc("poison undef temps"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
+ cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
+ cl::desc("exact handling of relational integer ICmp"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClHandleLifetimeIntrinsics(
+ "msan-handle-lifetime-intrinsics",
+ cl::desc(
+ "when possible, poison scoped variables at the beginning of the scope "
+ "(slower, but more precise)"),
+ cl::Hidden, cl::init(true));
+
+// When compiling the Linux kernel, we sometimes see false positives related to
+// MSan being unable to understand that inline assembly calls may initialize
+// local variables.
+// This flag makes the compiler conservatively unpoison every memory location
+// passed into an assembly call. Note that this may cause false positives.
+// Because it's impossible to figure out the array sizes, we can only unpoison
+// the first sizeof(type) bytes for each type* pointer.
+// The instrumentation is only enabled in KMSAN builds, and only if
+// -msan-handle-asm-conservative is on. This is done because we may want to
+// quickly disable assembly instrumentation when it breaks.
+static cl::opt<bool> ClHandleAsmConservative(
+ "msan-handle-asm-conservative",
+ cl::desc("conservative handling of inline assembly"), cl::Hidden,
+ cl::init(true));
+
+// This flag controls whether we check the shadow of the address
+// operand of load or store. Such bugs are very rare, since load from
+// a garbage address typically results in SEGV, but still happen
+// (e.g. only lower bits of address are garbage, or the access happens
+// early at program startup where malloc-ed memory is more likely to
+// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
+static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
+ cl::desc("report accesses through a pointer which has poisoned shadow"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
+ cl::desc("print out instructions with default strict semantics"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<int> ClInstrumentationWithCallThreshold(
+ "msan-instrumentation-with-call-threshold",
+ cl::desc(
+ "If the function being instrumented requires more than "
+ "this number of checks and origin stores, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(3500));
+
+static cl::opt<bool>
+ ClEnableKmsan("msan-kernel",
+ cl::desc("Enable KernelMemorySanitizer instrumentation"),
+ cl::Hidden, cl::init(false));
+
+// This is an experiment to enable handling of cases where shadow is a non-zero
+// compile-time constant. For some unexplainable reason they were silently
+// ignored in the instrumentation.
+static cl::opt<bool> ClCheckConstantShadow("msan-check-constant-shadow",
+ cl::desc("Insert checks for constant shadow values"),
+ cl::Hidden, cl::init(false));
+
+// This is off by default because of a bug in gold:
+// https://sourceware.org/bugzilla/show_bug.cgi?id=19002
+static cl::opt<bool> ClWithComdat("msan-with-comdat",
+ cl::desc("Place MSan constructors in comdat sections"),
+ cl::Hidden, cl::init(false));
+
+// These options allow to specify custom memory map parameters
+// See MemoryMapParams for details.
+static cl::opt<uint64_t> ClAndMask("msan-and-mask",
+ cl::desc("Define custom MSan AndMask"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
+ cl::desc("Define custom MSan XorMask"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
+ cl::desc("Define custom MSan ShadowBase"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
+ cl::desc("Define custom MSan OriginBase"),
+ cl::Hidden, cl::init(0));
+
+static const char *const kMsanModuleCtorName = "msan.module_ctor";
+static const char *const kMsanInitName = "__msan_init";
+
+namespace {
+
+// Memory map parameters used in application-to-shadow address calculation.
+// Offset = (Addr & ~AndMask) ^ XorMask
+// Shadow = ShadowBase + Offset
+// Origin = OriginBase + Offset
+struct MemoryMapParams {
+ uint64_t AndMask;
+ uint64_t XorMask;
+ uint64_t ShadowBase;
+ uint64_t OriginBase;
+};
+
+struct PlatformMemoryMapParams {
+ const MemoryMapParams *bits32;
+ const MemoryMapParams *bits64;
+};
+
+} // end anonymous namespace
+
+// i386 Linux
+static const MemoryMapParams Linux_I386_MemoryMapParams = {
+ 0x000080000000, // AndMask
+ 0, // XorMask (not used)
+ 0, // ShadowBase (not used)
+ 0x000040000000, // OriginBase
+};
+
+// x86_64 Linux
+static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
+#ifdef MSAN_LINUX_X86_64_OLD_MAPPING
+ 0x400000000000, // AndMask
+ 0, // XorMask (not used)
+ 0, // ShadowBase (not used)
+ 0x200000000000, // OriginBase
+#else
+ 0, // AndMask (not used)
+ 0x500000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x100000000000, // OriginBase
+#endif
+};
+
+// mips64 Linux
+static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
+ 0, // AndMask (not used)
+ 0x008000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x002000000000, // OriginBase
+};
+
+// ppc64 Linux
+static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
+ 0xE00000000000, // AndMask
+ 0x100000000000, // XorMask
+ 0x080000000000, // ShadowBase
+ 0x1C0000000000, // OriginBase
+};
+
+// aarch64 Linux
+static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
+ 0, // AndMask (not used)
+ 0x06000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x01000000000, // OriginBase
+};
+
+// i386 FreeBSD
+static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
+ 0x000180000000, // AndMask
+ 0x000040000000, // XorMask
+ 0x000020000000, // ShadowBase
+ 0x000700000000, // OriginBase
+};
+
+// x86_64 FreeBSD
+static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
+ 0xc00000000000, // AndMask
+ 0x200000000000, // XorMask
+ 0x100000000000, // ShadowBase
+ 0x380000000000, // OriginBase
+};
+
+// x86_64 NetBSD
+static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
+ 0, // AndMask
+ 0x500000000000, // XorMask
+ 0, // ShadowBase
+ 0x100000000000, // OriginBase
+};
+
+static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
+ &Linux_I386_MemoryMapParams,
+ &Linux_X86_64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
+ nullptr,
+ &Linux_MIPS64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
+ nullptr,
+ &Linux_PowerPC64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
+ nullptr,
+ &Linux_AArch64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
+ &FreeBSD_I386_MemoryMapParams,
+ &FreeBSD_X86_64_MemoryMapParams,
+};
+
+static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
+ nullptr,
+ &NetBSD_X86_64_MemoryMapParams,
+};
+
+namespace {
+
+/// Instrument functions of a module to detect uninitialized reads.
+///
+/// Instantiating MemorySanitizer inserts the msan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __msan_init function is in the list of global constructors for
+/// the module.
+class MemorySanitizer {
+public:
+ MemorySanitizer(Module &M, MemorySanitizerOptions Options)
+ : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
+ Recover(Options.Recover) {
+ initializeModule(M);
+ }
+
+ // MSan cannot be moved or copied because of MapParams.
+ MemorySanitizer(MemorySanitizer &&) = delete;
+ MemorySanitizer &operator=(MemorySanitizer &&) = delete;
+ MemorySanitizer(const MemorySanitizer &) = delete;
+ MemorySanitizer &operator=(const MemorySanitizer &) = delete;
+
+ bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
+
+private:
+ friend struct MemorySanitizerVisitor;
+ friend struct VarArgAMD64Helper;
+ friend struct VarArgMIPS64Helper;
+ friend struct VarArgAArch64Helper;
+ friend struct VarArgPowerPC64Helper;
+
+ void initializeModule(Module &M);
+ void initializeCallbacks(Module &M);
+ void createKernelApi(Module &M);
+ void createUserspaceApi(Module &M);
+
+ /// True if we're compiling the Linux kernel.
+ bool CompileKernel;
+ /// Track origins (allocation points) of uninitialized values.
+ int TrackOrigins;
+ bool Recover;
+
+ LLVMContext *C;
+ Type *IntptrTy;
+ Type *OriginTy;
+
+ // XxxTLS variables represent the per-thread state in MSan and per-task state
+ // in KMSAN.
+ // For the userspace these point to thread-local globals. In the kernel land
+ // they point to the members of a per-task struct obtained via a call to
+ // __msan_get_context_state().
+
+ /// Thread-local shadow storage for function parameters.
+ Value *ParamTLS;
+
+ /// Thread-local origin storage for function parameters.
+ Value *ParamOriginTLS;
+
+ /// Thread-local shadow storage for function return value.
+ Value *RetvalTLS;
+
+ /// Thread-local origin storage for function return value.
+ Value *RetvalOriginTLS;
+
+ /// Thread-local shadow storage for in-register va_arg function
+ /// parameters (x86_64-specific).
+ Value *VAArgTLS;
+
+ /// Thread-local shadow storage for in-register va_arg function
+ /// parameters (x86_64-specific).
+ Value *VAArgOriginTLS;
+
+ /// Thread-local shadow storage for va_arg overflow area
+ /// (x86_64-specific).
+ Value *VAArgOverflowSizeTLS;
+
+ /// Thread-local space used to pass origin value to the UMR reporting
+ /// function.
+ Value *OriginTLS;
+
+ /// Are the instrumentation callbacks set up?
+ bool CallbacksInitialized = false;
+
+ /// The run-time callback to print a warning.
+ FunctionCallee WarningFn;
+
+ // These arrays are indexed by log2(AccessSize).
+ FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
+ FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
+
+ /// Run-time helper that generates a new origin value for a stack
+ /// allocation.
+ FunctionCallee MsanSetAllocaOrigin4Fn;
+
+ /// Run-time helper that poisons stack on function entry.
+ FunctionCallee MsanPoisonStackFn;
+
+ /// Run-time helper that records a store (or any event) of an
+ /// uninitialized value and returns an updated origin id encoding this info.
+ FunctionCallee MsanChainOriginFn;
+
+ /// MSan runtime replacements for memmove, memcpy and memset.
+ FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
+
+ /// KMSAN callback for task-local function argument shadow.
+ StructType *MsanContextStateTy;
+ FunctionCallee MsanGetContextStateFn;
+
+ /// Functions for poisoning/unpoisoning local variables
+ FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
+
+ /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
+ /// pointers.
+ FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
+ FunctionCallee MsanMetadataPtrForLoad_1_8[4];
+ FunctionCallee MsanMetadataPtrForStore_1_8[4];
+ FunctionCallee MsanInstrumentAsmStoreFn;
+
+ /// Helper to choose between different MsanMetadataPtrXxx().
+ FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
+
+ /// Memory map parameters used in application-to-shadow calculation.
+ const MemoryMapParams *MapParams;
+
+ /// Custom memory map parameters used when -msan-shadow-base or
+ // -msan-origin-base is provided.
+ MemoryMapParams CustomMapParams;
+
+ MDNode *ColdCallWeights;
+
+ /// Branch weights for origin store.
+ MDNode *OriginStoreWeights;
+
+ /// An empty volatile inline asm that prevents callback merge.
+ InlineAsm *EmptyAsm;
+};
+
+void insertModuleCtor(Module &M) {
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kMsanModuleCtorName, kMsanInitName,
+ /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) {
+ if (!ClWithComdat) {
+ appendToGlobalCtors(M, Ctor, 0);
+ return;
+ }
+ Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
+ Ctor->setComdat(MsanCtorComdat);
+ appendToGlobalCtors(M, Ctor, 0, Ctor);
+ });
+}
+
+/// A legacy function pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads.
+struct MemorySanitizerLegacyPass : public FunctionPass {
+ // Pass identification, replacement for typeid.
+ static char ID;
+
+ MemorySanitizerLegacyPass(MemorySanitizerOptions Options = {})
+ : FunctionPass(ID), Options(Options) {}
+ StringRef getPassName() const override { return "MemorySanitizerLegacyPass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ return MSan->sanitizeFunction(
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
+ }
+ bool doInitialization(Module &M) override;
+
+ Optional<MemorySanitizer> MSan;
+ MemorySanitizerOptions Options;
+};
+
+template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
+ return (Opt.getNumOccurrences() > 0) ? Opt : Default;
+}
+
+} // end anonymous namespace
+
+MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K)
+ : Kernel(getOptOrDefault(ClEnableKmsan, K)),
+ TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
+ Recover(getOptOrDefault(ClKeepGoing, Kernel || R)) {}
+
+PreservedAnalyses MemorySanitizerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ MemorySanitizer Msan(*F.getParent(), Options);
+ if (Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses MemorySanitizerPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (Options.Kernel)
+ return PreservedAnalyses::all();
+ insertModuleCtor(M);
+ return PreservedAnalyses::none();
+}
+
+char MemorySanitizerLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
+ "MemorySanitizer: detects uninitialized reads.", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MemorySanitizerLegacyPass, "msan",
+ "MemorySanitizer: detects uninitialized reads.", false,
+ false)
+
+FunctionPass *
+llvm::createMemorySanitizerLegacyPassPass(MemorySanitizerOptions Options) {
+ return new MemorySanitizerLegacyPass(Options);
+}
+
+/// Create a non-const global initialized with the given string.
+///
+/// Creates a writable global for Str so that we can pass it to the
+/// run-time lib. Runtime uses first 4 bytes of the string to store the
+/// frame ID, so the string needs to be mutable.
+static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
+ StringRef Str) {
+ Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+ return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
+ GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+/// Create KMSAN API callbacks.
+void MemorySanitizer::createKernelApi(Module &M) {
+ IRBuilder<> IRB(*C);
+
+ // These will be initialized in insertKmsanPrologue().
+ RetvalTLS = nullptr;
+ RetvalOriginTLS = nullptr;
+ ParamTLS = nullptr;
+ ParamOriginTLS = nullptr;
+ VAArgTLS = nullptr;
+ VAArgOriginTLS = nullptr;
+ VAArgOverflowSizeTLS = nullptr;
+ // OriginTLS is unused in the kernel.
+ OriginTLS = nullptr;
+
+ // __msan_warning() in the kernel takes an origin.
+ WarningFn = M.getOrInsertFunction("__msan_warning", IRB.getVoidTy(),
+ IRB.getInt32Ty());
+ // Requests the per-task context state (kmsan_context_state*) from the
+ // runtime library.
+ MsanContextStateTy = StructType::get(
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
+ IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
+ OriginTy);
+ MsanGetContextStateFn = M.getOrInsertFunction(
+ "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
+
+ Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
+ PointerType::get(IRB.getInt32Ty(), 0));
+
+ for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
+ std::string name_load =
+ "__msan_metadata_ptr_for_load_" + std::to_string(size);
+ std::string name_store =
+ "__msan_metadata_ptr_for_store_" + std::to_string(size);
+ MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
+ name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+ MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
+ name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+ }
+
+ MsanMetadataPtrForLoadN = M.getOrInsertFunction(
+ "__msan_metadata_ptr_for_load_n", RetTy,
+ PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+ MsanMetadataPtrForStoreN = M.getOrInsertFunction(
+ "__msan_metadata_ptr_for_store_n", RetTy,
+ PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
+
+ // Functions for poisoning and unpoisoning memory.
+ MsanPoisonAllocaFn =
+ M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
+ MsanUnpoisonAllocaFn = M.getOrInsertFunction(
+ "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
+}
+
+static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
+ return M.getOrInsertGlobal(Name, Ty, [&] {
+ return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, Name, nullptr,
+ GlobalVariable::InitialExecTLSModel);
+ });
+}
+
+/// Insert declarations for userspace-specific functions and globals.
+void MemorySanitizer::createUserspaceApi(Module &M) {
+ IRBuilder<> IRB(*C);
+ // Create the callback.
+ // FIXME: this function should have "Cold" calling conv,
+ // which is not yet implemented.
+ StringRef WarningFnName = Recover ? "__msan_warning"
+ : "__msan_warning_noreturn";
+ WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
+
+ // Create the global TLS variables.
+ RetvalTLS =
+ getOrInsertGlobal(M, "__msan_retval_tls",
+ ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
+
+ RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
+
+ ParamTLS =
+ getOrInsertGlobal(M, "__msan_param_tls",
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+ ParamOriginTLS =
+ getOrInsertGlobal(M, "__msan_param_origin_tls",
+ ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+ VAArgTLS =
+ getOrInsertGlobal(M, "__msan_va_arg_tls",
+ ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
+
+ VAArgOriginTLS =
+ getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
+ ArrayType::get(OriginTy, kParamTLSSize / 4));
+
+ VAArgOverflowSizeTLS =
+ getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
+ OriginTLS = getOrInsertGlobal(M, "__msan_origin_tls", IRB.getInt32Ty());
+
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ unsigned AccessSize = 1 << AccessSizeIndex;
+ std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
+ MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
+ IRB.getInt32Ty());
+
+ FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
+ MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty());
+ }
+
+ MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
+ "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+ IRB.getInt8PtrTy(), IntptrTy);
+ MsanPoisonStackFn =
+ M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+}
+
+/// Insert extern declaration of runtime-provided functions and globals.
+void MemorySanitizer::initializeCallbacks(Module &M) {
+ // Only do this once.
+ if (CallbacksInitialized)
+ return;
+
+ IRBuilder<> IRB(*C);
+ // Initialize callbacks that are common for kernel and userspace
+ // instrumentation.
+ MsanChainOriginFn = M.getOrInsertFunction(
+ "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
+ MemmoveFn = M.getOrInsertFunction(
+ "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IntptrTy);
+ MemcpyFn = M.getOrInsertFunction(
+ "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+ IntptrTy);
+ MemsetFn = M.getOrInsertFunction(
+ "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+ IntptrTy);
+ // We insert an empty inline asm after __msan_report* to avoid callback merge.
+ EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+ StringRef(""), StringRef(""),
+ /*hasSideEffects=*/true);
+
+ MsanInstrumentAsmStoreFn =
+ M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
+ PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
+
+ if (CompileKernel) {
+ createKernelApi(M);
+ } else {
+ createUserspaceApi(M);
+ }
+ CallbacksInitialized = true;
+}
+
+FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
+ int size) {
+ FunctionCallee *Fns =
+ isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
+ switch (size) {
+ case 1:
+ return Fns[0];
+ case 2:
+ return Fns[1];
+ case 4:
+ return Fns[2];
+ case 8:
+ return Fns[3];
+ default:
+ return nullptr;
+ }
+}
+
+/// Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
+void MemorySanitizer::initializeModule(Module &M) {
+ auto &DL = M.getDataLayout();
+
+ bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
+ bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
+ // Check the overrides first
+ if (ShadowPassed || OriginPassed) {
+ CustomMapParams.AndMask = ClAndMask;
+ CustomMapParams.XorMask = ClXorMask;
+ CustomMapParams.ShadowBase = ClShadowBase;
+ CustomMapParams.OriginBase = ClOriginBase;
+ MapParams = &CustomMapParams;
+ } else {
+ Triple TargetTriple(M.getTargetTriple());
+ switch (TargetTriple.getOS()) {
+ case Triple::FreeBSD:
+ switch (TargetTriple.getArch()) {
+ case Triple::x86_64:
+ MapParams = FreeBSD_X86_MemoryMapParams.bits64;
+ break;
+ case Triple::x86:
+ MapParams = FreeBSD_X86_MemoryMapParams.bits32;
+ break;
+ default:
+ report_fatal_error("unsupported architecture");
+ }
+ break;
+ case Triple::NetBSD:
+ switch (TargetTriple.getArch()) {
+ case Triple::x86_64:
+ MapParams = NetBSD_X86_MemoryMapParams.bits64;
+ break;
+ default:
+ report_fatal_error("unsupported architecture");
+ }
+ break;
+ case Triple::Linux:
+ switch (TargetTriple.getArch()) {
+ case Triple::x86_64:
+ MapParams = Linux_X86_MemoryMapParams.bits64;
+ break;
+ case Triple::x86:
+ MapParams = Linux_X86_MemoryMapParams.bits32;
+ break;
+ case Triple::mips64:
+ case Triple::mips64el:
+ MapParams = Linux_MIPS_MemoryMapParams.bits64;
+ break;
+ case Triple::ppc64:
+ case Triple::ppc64le:
+ MapParams = Linux_PowerPC_MemoryMapParams.bits64;
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ MapParams = Linux_ARM_MemoryMapParams.bits64;
+ break;
+ default:
+ report_fatal_error("unsupported architecture");
+ }
+ break;
+ default:
+ report_fatal_error("unsupported operating system");
+ }
+ }
+
+ C = &(M.getContext());
+ IRBuilder<> IRB(*C);
+ IntptrTy = IRB.getIntPtrTy(DL);
+ OriginTy = IRB.getInt32Ty();
+
+ ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+ OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+
+ if (!CompileKernel) {
+ if (TrackOrigins)
+ M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
+ return new GlobalVariable(
+ M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(TrackOrigins), "__msan_track_origins");
+ });
+
+ if (Recover)
+ M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
+ return new GlobalVariable(M, IRB.getInt32Ty(), true,
+ GlobalValue::WeakODRLinkage,
+ IRB.getInt32(Recover), "__msan_keep_going");
+ });
+}
+}
+
+bool MemorySanitizerLegacyPass::doInitialization(Module &M) {
+ if (!Options.Kernel)
+ insertModuleCtor(M);
+ MSan.emplace(M, Options);
+ return true;
+}
+
+namespace {
+
+/// A helper class that handles instrumentation of VarArg
+/// functions on a particular platform.
+///
+/// Implementations are expected to insert the instrumentation
+/// necessary to propagate argument shadow through VarArg function
+/// calls. Visit* methods are called during an InstVisitor pass over
+/// the function, and should avoid creating new basic blocks. A new
+/// instance of this class is created for each instrumented function.
+struct VarArgHelper {
+ virtual ~VarArgHelper() = default;
+
+ /// Visit a CallSite.
+ virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
+
+ /// Visit a va_start call.
+ virtual void visitVAStartInst(VAStartInst &I) = 0;
+
+ /// Visit a va_copy call.
+ virtual void visitVACopyInst(VACopyInst &I) = 0;
+
+ /// Finalize function instrumentation.
+ ///
+ /// This method is called after visiting all interesting (see above)
+ /// instructions in a function.
+ virtual void finalizeInstrumentation() = 0;
+};
+
+struct MemorySanitizerVisitor;
+
+} // end anonymous namespace
+
+static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+ MemorySanitizerVisitor &Visitor);
+
+static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
+ if (TypeSize <= 8) return 0;
+ return Log2_32_Ceil((TypeSize + 7) / 8);
+}
+
+namespace {
+
+/// This class does all the work for a given function. Store and Load
+/// instructions store and load corresponding shadow and origin
+/// values. Most instructions propagate shadow from arguments to their
+/// return values. Certain instructions (most importantly, BranchInst)
+/// test their argument shadow and print reports (with a runtime call) if it's
+/// non-zero.
+struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
+ Function &F;
+ MemorySanitizer &MS;
+ SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
+ ValueMap<Value*, Value*> ShadowMap, OriginMap;
+ std::unique_ptr<VarArgHelper> VAHelper;
+ const TargetLibraryInfo *TLI;
+ BasicBlock *ActualFnStart;
+
+ // The following flags disable parts of MSan instrumentation based on
+ // blacklist contents and command-line options.
+ bool InsertChecks;
+ bool PropagateShadow;
+ bool PoisonStack;
+ bool PoisonUndef;
+ bool CheckReturnValue;
+
+ struct ShadowOriginAndInsertPoint {
+ Value *Shadow;
+ Value *Origin;
+ Instruction *OrigIns;
+
+ ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
+ : Shadow(S), Origin(O), OrigIns(I) {}
+ };
+ SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+ bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
+ SmallSet<AllocaInst *, 16> AllocaSet;
+ SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
+ SmallVector<StoreInst *, 16> StoreList;
+
+ MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
+ const TargetLibraryInfo &TLI)
+ : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
+ bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeMemory);
+ InsertChecks = SanitizeFunction;
+ PropagateShadow = SanitizeFunction;
+ PoisonStack = SanitizeFunction && ClPoisonStack;
+ PoisonUndef = SanitizeFunction && ClPoisonUndef;
+ // FIXME: Consider using SpecialCaseList to specify a list of functions that
+ // must always return fully initialized values. For now, we hardcode "main".
+ CheckReturnValue = SanitizeFunction && (F.getName() == "main");
+
+ MS.initializeCallbacks(*F.getParent());
+ if (MS.CompileKernel)
+ ActualFnStart = insertKmsanPrologue(F);
+ else
+ ActualFnStart = &F.getEntryBlock();
+
+ LLVM_DEBUG(if (!InsertChecks) dbgs()
+ << "MemorySanitizer is not inserting checks into '"
+ << F.getName() << "'\n");
+ }
+
+ Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
+ if (MS.TrackOrigins <= 1) return V;
+ return IRB.CreateCall(MS.MsanChainOriginFn, V);
+ }
+
+ Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+ if (IntptrSize == kOriginSize) return Origin;
+ assert(IntptrSize == kOriginSize * 2);
+ Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
+ return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
+ }
+
+ /// Fill memory range with the given origin value.
+ void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
+ unsigned Size, unsigned Alignment) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
+ unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
+ assert(IntptrAlignment >= kMinOriginAlignment);
+ assert(IntptrSize >= kOriginSize);
+
+ unsigned Ofs = 0;
+ unsigned CurrentAlignment = Alignment;
+ if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
+ Value *IntptrOrigin = originToIntptr(IRB, Origin);
+ Value *IntptrOriginPtr =
+ IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
+ for (unsigned i = 0; i < Size / IntptrSize; ++i) {
+ Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
+ : IntptrOriginPtr;
+ IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
+ Ofs += IntptrSize / kOriginSize;
+ CurrentAlignment = IntptrAlignment;
+ }
+ }
+
+ for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
+ Value *GEP =
+ i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
+ IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
+ CurrentAlignment = kMinOriginAlignment;
+ }
+ }
+
+ void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
+ Value *OriginPtr, unsigned Alignment, bool AsCall) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+ if (Shadow->getType()->isAggregateType()) {
+ paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
+ OriginAlignment);
+ } else {
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
+ if (ConstantShadow) {
+ if (ClCheckConstantShadow && !ConstantShadow->isZeroValue())
+ paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
+ OriginAlignment);
+ return;
+ }
+
+ unsigned TypeSizeInBits =
+ DL.getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
+ FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
+ Value *ConvertedShadow2 = IRB.CreateZExt(
+ ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall(Fn, {ConvertedShadow2,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ Origin});
+ } else {
+ Value *Cmp = IRB.CreateICmpNE(
+ ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+ IRBuilder<> IRBNew(CheckTerm);
+ paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
+ OriginAlignment);
+ }
+ }
+ }
+
+ void materializeStores(bool InstrumentWithCalls) {
+ for (StoreInst *SI : StoreList) {
+ IRBuilder<> IRB(SI);
+ Value *Val = SI->getValueOperand();
+ Value *Addr = SI->getPointerOperand();
+ Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
+ Value *ShadowPtr, *OriginPtr;
+ Type *ShadowTy = Shadow->getType();
+ unsigned Alignment = SI->getAlignment();
+ unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ std::tie(ShadowPtr, OriginPtr) =
+ getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
+
+ StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
+ LLVM_DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
+ (void)NewSI;
+
+ if (SI->isAtomic())
+ SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
+
+ if (MS.TrackOrigins && !SI->isAtomic())
+ storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
+ OriginAlignment, InstrumentWithCalls);
+ }
+ }
+
+ /// Helper function to insert a warning at IRB's current insert point.
+ void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
+ if (!Origin)
+ Origin = (Value *)IRB.getInt32(0);
+ if (MS.CompileKernel) {
+ IRB.CreateCall(MS.WarningFn, Origin);
+ } else {
+ if (MS.TrackOrigins) {
+ IRB.CreateStore(Origin, MS.OriginTLS);
+ }
+ IRB.CreateCall(MS.WarningFn, {});
+ }
+ IRB.CreateCall(MS.EmptyAsm, {});
+ // FIXME: Insert UnreachableInst if !MS.Recover?
+ // This may invalidate some of the following checks and needs to be done
+ // at the very end.
+ }
+
+ void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
+ bool AsCall) {
+ IRBuilder<> IRB(OrigIns);
+ LLVM_DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ LLVM_DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+
+ Constant *ConstantShadow = dyn_cast_or_null<Constant>(ConvertedShadow);
+ if (ConstantShadow) {
+ if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) {
+ insertWarningFn(IRB, Origin);
+ }
+ return;
+ }
+
+ const DataLayout &DL = OrigIns->getModule()->getDataLayout();
+
+ unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
+ FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
+ Value *ConvertedShadow2 =
+ IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall(Fn, {ConvertedShadow2, MS.TrackOrigins && Origin
+ ? Origin
+ : (Value *)IRB.getInt32(0)});
+ } else {
+ Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+ getCleanShadow(ConvertedShadow), "_mscmp");
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, OrigIns,
+ /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
+
+ IRB.SetInsertPoint(CheckTerm);
+ insertWarningFn(IRB, Origin);
+ LLVM_DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
+ }
+ }
+
+ void materializeChecks(bool InstrumentWithCalls) {
+ for (const auto &ShadowData : InstrumentationList) {
+ Instruction *OrigIns = ShadowData.OrigIns;
+ Value *Shadow = ShadowData.Shadow;
+ Value *Origin = ShadowData.Origin;
+ materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
+ }
+ LLVM_DEBUG(dbgs() << "DONE:\n" << F);
+ }
+
+ BasicBlock *insertKmsanPrologue(Function &F) {
+ BasicBlock *ret =
+ SplitBlock(&F.getEntryBlock(), F.getEntryBlock().getFirstNonPHI());
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
+ Constant *Zero = IRB.getInt32(0);
+ MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(0)}, "param_shadow");
+ MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(1)}, "retval_shadow");
+ MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(2)}, "va_arg_shadow");
+ MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(3)}, "va_arg_origin");
+ MS.VAArgOverflowSizeTLS =
+ IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
+ MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(5)}, "param_origin");
+ MS.RetvalOriginTLS =
+ IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
+ {Zero, IRB.getInt32(6)}, "retval_origin");
+ return ret;
+ }
+
+ /// Add MemorySanitizer instrumentation to a function.
+ bool runOnFunction() {
+ // In the presence of unreachable blocks, we may see Phi nodes with
+ // incoming nodes from such blocks. Since InstVisitor skips unreachable
+ // blocks, such nodes will not have any shadow value associated with them.
+ // It's easier to remove unreachable blocks than deal with missing shadow.
+ removeUnreachableBlocks(F);
+
+ // Iterate all BBs in depth-first order and create shadow instructions
+ // for all instructions (where applicable).
+ // For PHI nodes we create dummy shadow PHIs which will be finalized later.
+ for (BasicBlock *BB : depth_first(ActualFnStart))
+ visit(*BB);
+
+ // Finalize PHI nodes.
+ for (PHINode *PN : ShadowPHINodes) {
+ PHINode *PNS = cast<PHINode>(getShadow(PN));
+ PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
+ size_t NumValues = PN->getNumIncomingValues();
+ for (size_t v = 0; v < NumValues; v++) {
+ PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
+ if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
+ }
+ }
+
+ VAHelper->finalizeInstrumentation();
+
+ // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
+ // instrumenting only allocas.
+ if (InstrumentLifetimeStart) {
+ for (auto Item : LifetimeStartList) {
+ instrumentAlloca(*Item.second, Item.first);
+ AllocaSet.erase(Item.second);
+ }
+ }
+ // Poison the allocas for which we didn't instrument the corresponding
+ // lifetime intrinsics.
+ for (AllocaInst *AI : AllocaSet)
+ instrumentAlloca(*AI);
+
+ bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
+ InstrumentationList.size() + StoreList.size() >
+ (unsigned)ClInstrumentationWithCallThreshold;
+
+ // Insert shadow value checks.
+ materializeChecks(InstrumentWithCalls);
+
+ // Delayed instrumentation of StoreInst.
+ // This may not add new address checks.
+ materializeStores(InstrumentWithCalls);
+
+ return true;
+ }
+
+ /// Compute the shadow type that corresponds to a given Value.
+ Type *getShadowTy(Value *V) {
+ return getShadowTy(V->getType());
+ }
+
+ /// Compute the shadow type that corresponds to a given Type.
+ Type *getShadowTy(Type *OrigTy) {
+ if (!OrigTy->isSized()) {
+ return nullptr;
+ }
+ // For integer type, shadow is the same as the original type.
+ // This may return weird-sized types like i1.
+ if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
+ return IT;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
+ uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
+ return VectorType::get(IntegerType::get(*MS.C, EltSize),
+ VT->getNumElements());
+ }
+ if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
+ return ArrayType::get(getShadowTy(AT->getElementType()),
+ AT->getNumElements());
+ }
+ if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
+ SmallVector<Type*, 4> Elements;
+ for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+ Elements.push_back(getShadowTy(ST->getElementType(i)));
+ StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
+ LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
+ return Res;
+ }
+ uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
+ return IntegerType::get(*MS.C, TypeSize);
+ }
+
+ /// Flatten a vector type.
+ Type *getShadowTyNoVec(Type *ty) {
+ if (VectorType *vt = dyn_cast<VectorType>(ty))
+ return IntegerType::get(*MS.C, vt->getBitWidth());
+ return ty;
+ }
+
+ /// Convert a shadow value to it's flattened variant.
+ Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
+ Type *Ty = V->getType();
+ Type *NoVecTy = getShadowTyNoVec(Ty);
+ if (Ty == NoVecTy) return V;
+ return IRB.CreateBitCast(V, NoVecTy);
+ }
+
+ /// Compute the integer shadow offset that corresponds to a given
+ /// application address.
+ ///
+ /// Offset = (Addr & ~AndMask) ^ XorMask
+ Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
+ Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
+
+ uint64_t AndMask = MS.MapParams->AndMask;
+ if (AndMask)
+ OffsetLong =
+ IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
+
+ uint64_t XorMask = MS.MapParams->XorMask;
+ if (XorMask)
+ OffsetLong =
+ IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
+ return OffsetLong;
+ }
+
+ /// Compute the shadow and origin addresses corresponding to a given
+ /// application address.
+ ///
+ /// Shadow = ShadowBase + Offset
+ /// Origin = (OriginBase + Offset) & ~3ULL
+ std::pair<Value *, Value *> getShadowOriginPtrUserspace(Value *Addr,
+ IRBuilder<> &IRB,
+ Type *ShadowTy,
+ unsigned Alignment) {
+ Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
+ Value *ShadowLong = ShadowOffset;
+ uint64_t ShadowBase = MS.MapParams->ShadowBase;
+ if (ShadowBase != 0) {
+ ShadowLong =
+ IRB.CreateAdd(ShadowLong,
+ ConstantInt::get(MS.IntptrTy, ShadowBase));
+ }
+ Value *ShadowPtr =
+ IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
+ Value *OriginPtr = nullptr;
+ if (MS.TrackOrigins) {
+ Value *OriginLong = ShadowOffset;
+ uint64_t OriginBase = MS.MapParams->OriginBase;
+ if (OriginBase != 0)
+ OriginLong = IRB.CreateAdd(OriginLong,
+ ConstantInt::get(MS.IntptrTy, OriginBase));
+ if (Alignment < kMinOriginAlignment) {
+ uint64_t Mask = kMinOriginAlignment - 1;
+ OriginLong =
+ IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask));
+ }
+ OriginPtr =
+ IRB.CreateIntToPtr(OriginLong, PointerType::get(MS.OriginTy, 0));
+ }
+ return std::make_pair(ShadowPtr, OriginPtr);
+ }
+
+ std::pair<Value *, Value *>
+ getShadowOriginPtrKernel(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
+ unsigned Alignment, bool isStore) {
+ Value *ShadowOriginPtrs;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ int Size = DL.getTypeStoreSize(ShadowTy);
+
+ FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
+ Value *AddrCast =
+ IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
+ if (Getter) {
+ ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
+ } else {
+ Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+ ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
+ : MS.MsanMetadataPtrForLoadN,
+ {AddrCast, SizeVal});
+ }
+ Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
+ ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
+ Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
+
+ return std::make_pair(ShadowPtr, OriginPtr);
+ }
+
+ std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
+ Type *ShadowTy,
+ unsigned Alignment,
+ bool isStore) {
+ std::pair<Value *, Value *> ret;
+ if (MS.CompileKernel)
+ ret = getShadowOriginPtrKernel(Addr, IRB, ShadowTy, Alignment, isStore);
+ else
+ ret = getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
+ return ret;
+ }
+
+ /// Compute the shadow address for a given function argument.
+ ///
+ /// Shadow = ParamTLS+ArgOffset.
+ Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
+ int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
+ if (ArgOffset)
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+ "_msarg");
+ }
+
+ /// Compute the origin address for a given function argument.
+ Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
+ int ArgOffset) {
+ if (!MS.TrackOrigins)
+ return nullptr;
+ Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
+ if (ArgOffset)
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+ "_msarg_o");
+ }
+
+ /// Compute the shadow address for a retval.
+ Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
+ return IRB.CreatePointerCast(MS.RetvalTLS,
+ PointerType::get(getShadowTy(A), 0),
+ "_msret");
+ }
+
+ /// Compute the origin address for a retval.
+ Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
+ // We keep a single origin for the entire retval. Might be too optimistic.
+ return MS.RetvalOriginTLS;
+ }
+
+ /// Set SV to be the shadow value for V.
+ void setShadow(Value *V, Value *SV) {
+ assert(!ShadowMap.count(V) && "Values may only have one shadow");
+ ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
+ }
+
+ /// Set Origin to be the origin value for V.
+ void setOrigin(Value *V, Value *Origin) {
+ if (!MS.TrackOrigins) return;
+ assert(!OriginMap.count(V) && "Values may only have one origin");
+ LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << " ==> " << *Origin << "\n");
+ OriginMap[V] = Origin;
+ }
+
+ Constant *getCleanShadow(Type *OrigTy) {
+ Type *ShadowTy = getShadowTy(OrigTy);
+ if (!ShadowTy)
+ return nullptr;
+ return Constant::getNullValue(ShadowTy);
+ }
+
+ /// Create a clean shadow value for a given value.
+ ///
+ /// Clean shadow (all zeroes) means all bits of the value are defined
+ /// (initialized).
+ Constant *getCleanShadow(Value *V) {
+ return getCleanShadow(V->getType());
+ }
+
+ /// Create a dirty shadow of a given shadow type.
+ Constant *getPoisonedShadow(Type *ShadowTy) {
+ assert(ShadowTy);
+ if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
+ return Constant::getAllOnesValue(ShadowTy);
+ if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
+ SmallVector<Constant *, 4> Vals(AT->getNumElements(),
+ getPoisonedShadow(AT->getElementType()));
+ return ConstantArray::get(AT, Vals);
+ }
+ if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
+ SmallVector<Constant *, 4> Vals;
+ for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+ Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
+ return ConstantStruct::get(ST, Vals);
+ }
+ llvm_unreachable("Unexpected shadow type");
+ }
+
+ /// Create a dirty shadow for a given value.
+ Constant *getPoisonedShadow(Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (!ShadowTy)
+ return nullptr;
+ return getPoisonedShadow(ShadowTy);
+ }
+
+ /// Create a clean (zero) origin.
+ Value *getCleanOrigin() {
+ return Constant::getNullValue(MS.OriginTy);
+ }
+
+ /// Get the shadow value for a given Value.
+ ///
+ /// This function either returns the value set earlier with setShadow,
+ /// or extracts if from ParamTLS (for function arguments).
+ Value *getShadow(Value *V) {
+ if (!PropagateShadow) return getCleanShadow(V);
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->getMetadata("nosanitize"))
+ return getCleanShadow(V);
+ // For instructions the shadow is already stored in the map.
+ Value *Shadow = ShadowMap[V];
+ if (!Shadow) {
+ LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
+ (void)I;
+ assert(Shadow && "No shadow for a value");
+ }
+ return Shadow;
+ }
+ if (UndefValue *U = dyn_cast<UndefValue>(V)) {
+ Value *AllOnes = PoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V);
+ LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
+ (void)U;
+ return AllOnes;
+ }
+ if (Argument *A = dyn_cast<Argument>(V)) {
+ // For arguments we compute the shadow on demand and store it in the map.
+ Value **ShadowPtr = &ShadowMap[V];
+ if (*ShadowPtr)
+ return *ShadowPtr;
+ Function *F = A->getParent();
+ IRBuilder<> EntryIRB(ActualFnStart->getFirstNonPHI());
+ unsigned ArgOffset = 0;
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ for (auto &FArg : F->args()) {
+ if (!FArg.getType()->isSized()) {
+ LLVM_DEBUG(dbgs() << "Arg is not sized\n");
+ continue;
+ }
+ unsigned Size =
+ FArg.hasByValAttr()
+ ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
+ : DL.getTypeAllocSize(FArg.getType());
+ if (A == &FArg) {
+ bool Overflow = ArgOffset + Size > kParamTLSSize;
+ Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ if (FArg.hasByValAttr()) {
+ // ByVal pointer itself has clean shadow. We copy the actual
+ // argument shadow to the underlying memory.
+ // Figure out maximal valid memcpy alignment.
+ unsigned ArgAlign = FArg.getParamAlignment();
+ if (ArgAlign == 0) {
+ Type *EltType = A->getType()->getPointerElementType();
+ ArgAlign = DL.getABITypeAlignment(EltType);
+ }
+ Value *CpShadowPtr =
+ getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
+ /*isStore*/ true)
+ .first;
+ // TODO(glider): need to copy origins.
+ if (Overflow) {
+ // ParamTLS overflow.
+ EntryIRB.CreateMemSet(
+ CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
+ Size, ArgAlign);
+ } else {
+ unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
+ Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
+ CopyAlign, Size);
+ LLVM_DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n");
+ (void)Cpy;
+ }
+ *ShadowPtr = getCleanShadow(V);
+ } else {
+ if (Overflow) {
+ // ParamTLS overflow.
+ *ShadowPtr = getCleanShadow(V);
+ } else {
+ *ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
+ kShadowTLSAlignment);
+ }
+ }
+ LLVM_DEBUG(dbgs()
+ << " ARG: " << FArg << " ==> " << **ShadowPtr << "\n");
+ if (MS.TrackOrigins && !Overflow) {
+ Value *OriginPtr =
+ getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
+ } else {
+ setOrigin(A, getCleanOrigin());
+ }
+ }
+ ArgOffset += alignTo(Size, kShadowTLSAlignment);
+ }
+ assert(*ShadowPtr && "Could not find shadow for an argument");
+ return *ShadowPtr;
+ }
+ // For everything else the shadow is zero.
+ return getCleanShadow(V);
+ }
+
+ /// Get the shadow for i-th argument of the instruction I.
+ Value *getShadow(Instruction *I, int i) {
+ return getShadow(I->getOperand(i));
+ }
+
+ /// Get the origin for a value.
+ Value *getOrigin(Value *V) {
+ if (!MS.TrackOrigins) return nullptr;
+ if (!PropagateShadow) return getCleanOrigin();
+ if (isa<Constant>(V)) return getCleanOrigin();
+ assert((isa<Instruction>(V) || isa<Argument>(V)) &&
+ "Unexpected value type in getOrigin()");
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (I->getMetadata("nosanitize"))
+ return getCleanOrigin();
+ }
+ Value *Origin = OriginMap[V];
+ assert(Origin && "Missing origin");
+ return Origin;
+ }
+
+ /// Get the origin for i-th argument of the instruction I.
+ Value *getOrigin(Instruction *I, int i) {
+ return getOrigin(I->getOperand(i));
+ }
+
+ /// Remember the place where a shadow check should be inserted.
+ ///
+ /// This location will be later instrumented with a check that will print a
+ /// UMR warning in runtime if the shadow value is not 0.
+ void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
+ assert(Shadow);
+ if (!InsertChecks) return;
+#ifndef NDEBUG
+ Type *ShadowTy = Shadow->getType();
+ assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
+ "Can only insert checks for integer and vector shadow types");
+#endif
+ InstrumentationList.push_back(
+ ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+ }
+
+ /// Remember the place where a shadow check should be inserted.
+ ///
+ /// This location will be later instrumented with a check that will print a
+ /// UMR warning in runtime if the value is not fully defined.
+ void insertShadowCheck(Value *Val, Instruction *OrigIns) {
+ assert(Val);
+ Value *Shadow, *Origin;
+ if (ClCheckConstantShadow) {
+ Shadow = getShadow(Val);
+ if (!Shadow) return;
+ Origin = getOrigin(Val);
+ } else {
+ Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+ if (!Shadow) return;
+ Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+ }
+ insertShadowCheck(Shadow, Origin, OrigIns);
+ }
+
+ AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
+ switch (a) {
+ case AtomicOrdering::NotAtomic:
+ return AtomicOrdering::NotAtomic;
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Release:
+ return AtomicOrdering::Release;
+ case AtomicOrdering::Acquire:
+ case AtomicOrdering::AcquireRelease:
+ return AtomicOrdering::AcquireRelease;
+ case AtomicOrdering::SequentiallyConsistent:
+ return AtomicOrdering::SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
+ }
+
+ AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
+ switch (a) {
+ case AtomicOrdering::NotAtomic:
+ return AtomicOrdering::NotAtomic;
+ case AtomicOrdering::Unordered:
+ case AtomicOrdering::Monotonic:
+ case AtomicOrdering::Acquire:
+ return AtomicOrdering::Acquire;
+ case AtomicOrdering::Release:
+ case AtomicOrdering::AcquireRelease:
+ return AtomicOrdering::AcquireRelease;
+ case AtomicOrdering::SequentiallyConsistent:
+ return AtomicOrdering::SequentiallyConsistent;
+ }
+ llvm_unreachable("Unknown ordering");
+ }
+
+ // ------------------- Visitors.
+ using InstVisitor<MemorySanitizerVisitor>::visit;
+ void visit(Instruction &I) {
+ if (!I.getMetadata("nosanitize"))
+ InstVisitor<MemorySanitizerVisitor>::visit(I);
+ }
+
+ /// Instrument LoadInst
+ ///
+ /// Loads the corresponding shadow and (optionally) origin.
+ /// Optionally, checks that the load address is fully defined.
+ void visitLoadInst(LoadInst &I) {
+ assert(I.getType()->isSized() && "Load type must have size");
+ assert(!I.getMetadata("nosanitize"));
+ IRBuilder<> IRB(I.getNextNode());
+ Type *ShadowTy = getShadowTy(&I);
+ Value *Addr = I.getPointerOperand();
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = I.getAlignment();
+ if (PropagateShadow) {
+ std::tie(ShadowPtr, OriginPtr) =
+ getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(I.getPointerOperand(), &I);
+
+ if (I.isAtomic())
+ I.setOrdering(addAcquireOrdering(I.getOrdering()));
+
+ if (MS.TrackOrigins) {
+ if (PropagateShadow) {
+ unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
+ setOrigin(
+ &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
+ } else {
+ setOrigin(&I, getCleanOrigin());
+ }
+ }
+ }
+
+ /// Instrument StoreInst
+ ///
+ /// Stores the corresponding shadow and (optionally) origin.
+ /// Optionally, checks that the store address is fully defined.
+ void visitStoreInst(StoreInst &I) {
+ StoreList.push_back(&I);
+ if (ClCheckAccessAddress)
+ insertShadowCheck(I.getPointerOperand(), &I);
+ }
+
+ void handleCASOrRMW(Instruction &I) {
+ assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
+
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getOperand(0);
+ Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, I.getType(),
+ /*Alignment*/ 1, /*isStore*/ true)
+ .first;
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ // Only test the conditional argument of cmpxchg instruction.
+ // The other argument can potentially be uninitialized, but we can not
+ // detect this situation reliably without possible false positives.
+ if (isa<AtomicCmpXchgInst>(I))
+ insertShadowCheck(I.getOperand(1), &I);
+
+ IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
+
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &I) {
+ handleCASOrRMW(I);
+ I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ }
+
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ handleCASOrRMW(I);
+ I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
+ }
+
+ // Vector manipulation.
+ void visitExtractElementInst(ExtractElementInst &I) {
+ insertShadowCheck(I.getOperand(1), &I);
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
+ "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitInsertElementInst(InsertElementInst &I) {
+ insertShadowCheck(I.getOperand(2), &I);
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
+ I.getOperand(2), "_msprop"));
+ setOriginForNaryOp(I);
+ }
+
+ void visitShuffleVectorInst(ShuffleVectorInst &I) {
+ insertShadowCheck(I.getOperand(2), &I);
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
+ I.getOperand(2), "_msprop"));
+ setOriginForNaryOp(I);
+ }
+
+ // Casts.
+ void visitSExtInst(SExtInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitZExtInst(ZExtInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitTruncInst(TruncInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitBitCastInst(BitCastInst &I) {
+ // Special case: if this is the bitcast (there is exactly 1 allowed) between
+ // a musttail call and a ret, don't instrument. New instructions are not
+ // allowed after a musttail call.
+ if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
+ if (CI->isMustTailCall())
+ return;
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitPtrToIntInst(PtrToIntInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+ "_msprop_ptrtoint"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitIntToPtrInst(IntToPtrInst &I) {
+ IRBuilder<> IRB(&I);
+ setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+ "_msprop_inttoptr"));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
+ void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
+ void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
+ void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
+ void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
+ void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
+
+ /// Propagate shadow for bitwise AND.
+ ///
+ /// This code is exact, i.e. if, for example, a bit in the left argument
+ /// is defined and 0, then neither the value not definedness of the
+ /// corresponding bit in B don't affect the resulting shadow.
+ void visitAnd(BinaryOperator &I) {
+ IRBuilder<> IRB(&I);
+ // "And" of 0 and a poisoned value results in unpoisoned value.
+ // 1&1 => 1; 0&1 => 0; p&1 => p;
+ // 1&0 => 0; 0&0 => 0; p&0 => 0;
+ // 1&p => p; 0&p => 0; p&p => p;
+ // S = (S1 & S2) | (V1 & S2) | (S1 & V2)
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *V1 = I.getOperand(0);
+ Value *V2 = I.getOperand(1);
+ if (V1->getType() != S1->getType()) {
+ V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+ V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+ }
+ Value *S1S2 = IRB.CreateAnd(S1, S2);
+ Value *V1S2 = IRB.CreateAnd(V1, S2);
+ Value *S1V2 = IRB.CreateAnd(S1, V2);
+ setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
+ setOriginForNaryOp(I);
+ }
+
+ void visitOr(BinaryOperator &I) {
+ IRBuilder<> IRB(&I);
+ // "Or" of 1 and a poisoned value results in unpoisoned value.
+ // 1|1 => 1; 0|1 => 1; p|1 => 1;
+ // 1|0 => 1; 0|0 => 0; p|0 => p;
+ // 1|p => 1; 0|p => p; p|p => p;
+ // S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *V1 = IRB.CreateNot(I.getOperand(0));
+ Value *V2 = IRB.CreateNot(I.getOperand(1));
+ if (V1->getType() != S1->getType()) {
+ V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+ V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+ }
+ Value *S1S2 = IRB.CreateAnd(S1, S2);
+ Value *V1S2 = IRB.CreateAnd(V1, S2);
+ Value *S1V2 = IRB.CreateAnd(S1, V2);
+ setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
+ setOriginForNaryOp(I);
+ }
+
+ /// Default propagation of shadow and/or origin.
+ ///
+ /// This class implements the general case of shadow propagation, used in all
+ /// cases where we don't know and/or don't care about what the operation
+ /// actually does. It converts all input shadow values to a common type
+ /// (extending or truncating as necessary), and bitwise OR's them.
+ ///
+ /// This is much cheaper than inserting checks (i.e. requiring inputs to be
+ /// fully initialized), and less prone to false positives.
+ ///
+ /// This class also implements the general case of origin propagation. For a
+ /// Nary operation, result origin is set to the origin of an argument that is
+ /// not entirely initialized. If there is more than one such arguments, the
+ /// rightmost of them is picked. It does not matter which one is picked if all
+ /// arguments are initialized.
+ template <bool CombineShadow>
+ class Combiner {
+ Value *Shadow = nullptr;
+ Value *Origin = nullptr;
+ IRBuilder<> &IRB;
+ MemorySanitizerVisitor *MSV;
+
+ public:
+ Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
+ : IRB(IRB), MSV(MSV) {}
+
+ /// Add a pair of shadow and origin values to the mix.
+ Combiner &Add(Value *OpShadow, Value *OpOrigin) {
+ if (CombineShadow) {
+ assert(OpShadow);
+ if (!Shadow)
+ Shadow = OpShadow;
+ else {
+ OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
+ Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
+ }
+ }
+
+ if (MSV->MS.TrackOrigins) {
+ assert(OpOrigin);
+ if (!Origin) {
+ Origin = OpOrigin;
+ } else {
+ Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
+ // No point in adding something that might result in 0 origin value.
+ if (!ConstOrigin || !ConstOrigin->isNullValue()) {
+ Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
+ Value *Cond =
+ IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
+ Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+ }
+ }
+ }
+ return *this;
+ }
+
+ /// Add an application value to the mix.
+ Combiner &Add(Value *V) {
+ Value *OpShadow = MSV->getShadow(V);
+ Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
+ return Add(OpShadow, OpOrigin);
+ }
+
+ /// Set the current combined values as the given instruction's shadow
+ /// and origin.
+ void Done(Instruction *I) {
+ if (CombineShadow) {
+ assert(Shadow);
+ Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
+ MSV->setShadow(I, Shadow);
+ }
+ if (MSV->MS.TrackOrigins) {
+ assert(Origin);
+ MSV->setOrigin(I, Origin);
+ }
+ }
+ };
+
+ using ShadowAndOriginCombiner = Combiner<true>;
+ using OriginCombiner = Combiner<false>;
+
+ /// Propagate origin for arbitrary operation.
+ void setOriginForNaryOp(Instruction &I) {
+ if (!MS.TrackOrigins) return;
+ IRBuilder<> IRB(&I);
+ OriginCombiner OC(this, IRB);
+ for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+ OC.Add(OI->get());
+ OC.Done(&I);
+ }
+
+ size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
+ assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
+ "Vector of pointers is not a valid shadow type");
+ return Ty->isVectorTy() ?
+ Ty->getVectorNumElements() * Ty->getScalarSizeInBits() :
+ Ty->getPrimitiveSizeInBits();
+ }
+
+ /// Cast between two shadow types, extending or truncating as
+ /// necessary.
+ Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
+ bool Signed = false) {
+ Type *srcTy = V->getType();
+ size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
+ size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
+ if (srcSizeInBits > 1 && dstSizeInBits == 1)
+ return IRB.CreateICmpNE(V, getCleanShadow(V));
+
+ if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
+ return IRB.CreateIntCast(V, dstTy, Signed);
+ if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
+ dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
+ return IRB.CreateIntCast(V, dstTy, Signed);
+ Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
+ Value *V2 =
+ IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
+ return IRB.CreateBitCast(V2, dstTy);
+ // TODO: handle struct types.
+ }
+
+ /// Cast an application value to the type of its own shadow.
+ Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
+ Type *ShadowTy = getShadowTy(V);
+ if (V->getType() == ShadowTy)
+ return V;
+ if (V->getType()->isPtrOrPtrVectorTy())
+ return IRB.CreatePtrToInt(V, ShadowTy);
+ else
+ return IRB.CreateBitCast(V, ShadowTy);
+ }
+
+ /// Propagate shadow for arbitrary operation.
+ void handleShadowOr(Instruction &I) {
+ IRBuilder<> IRB(&I);
+ ShadowAndOriginCombiner SC(this, IRB);
+ for (Instruction::op_iterator OI = I.op_begin(); OI != I.op_end(); ++OI)
+ SC.Add(OI->get());
+ SC.Done(&I);
+ }
+
+ void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
+
+ // Handle multiplication by constant.
+ //
+ // Handle a special case of multiplication by constant that may have one or
+ // more zeros in the lower bits. This makes corresponding number of lower bits
+ // of the result zero as well. We model it by shifting the other operand
+ // shadow left by the required number of bits. Effectively, we transform
+ // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
+ // We use multiplication by 2**N instead of shift to cover the case of
+ // multiplication by 0, which may occur in some elements of a vector operand.
+ void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
+ Value *OtherArg) {
+ Constant *ShadowMul;
+ Type *Ty = ConstArg->getType();
+ if (Ty->isVectorTy()) {
+ unsigned NumElements = Ty->getVectorNumElements();
+ Type *EltTy = Ty->getSequentialElementType();
+ SmallVector<Constant *, 16> Elements;
+ for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+ if (ConstantInt *Elt =
+ dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
+ const APInt &V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ Elements.push_back(ConstantInt::get(EltTy, V2));
+ } else {
+ Elements.push_back(ConstantInt::get(EltTy, 1));
+ }
+ }
+ ShadowMul = ConstantVector::get(Elements);
+ } else {
+ if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
+ const APInt &V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ ShadowMul = ConstantInt::get(Ty, V2);
+ } else {
+ ShadowMul = ConstantInt::get(Ty, 1);
+ }
+ }
+
+ IRBuilder<> IRB(&I);
+ setShadow(&I,
+ IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
+ setOrigin(&I, getOrigin(OtherArg));
+ }
+
+ void visitMul(BinaryOperator &I) {
+ Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+ Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+ if (constOp0 && !constOp1)
+ handleMulByConstant(I, constOp0, I.getOperand(1));
+ else if (constOp1 && !constOp0)
+ handleMulByConstant(I, constOp1, I.getOperand(0));
+ else
+ handleShadowOr(I);
+ }
+
+ void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
+ void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
+ void visitSub(BinaryOperator &I) { handleShadowOr(I); }
+ void visitXor(BinaryOperator &I) { handleShadowOr(I); }
+
+ void handleIntegerDiv(Instruction &I) {
+ IRBuilder<> IRB(&I);
+ // Strict on the second argument.
+ insertShadowCheck(I.getOperand(1), &I);
+ setShadow(&I, getShadow(&I, 0));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
+ void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
+ void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
+ void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
+
+ // Floating point division is side-effect free. We can not require that the
+ // divisor is fully initialized and must propagate shadow. See PR37523.
+ void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
+ void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
+
+ /// Instrument == and != comparisons.
+ ///
+ /// Sometimes the comparison result is known even if some of the bits of the
+ /// arguments are not.
+ void handleEqualityComparison(ICmpInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *A = I.getOperand(0);
+ Value *B = I.getOperand(1);
+ Value *Sa = getShadow(A);
+ Value *Sb = getShadow(B);
+
+ // Get rid of pointers and vectors of pointers.
+ // For ints (and vectors of ints), types of A and Sa match,
+ // and this is a no-op.
+ A = IRB.CreatePointerCast(A, Sa->getType());
+ B = IRB.CreatePointerCast(B, Sb->getType());
+
+ // A == B <==> (C = A^B) == 0
+ // A != B <==> (C = A^B) != 0
+ // Sc = Sa | Sb
+ Value *C = IRB.CreateXor(A, B);
+ Value *Sc = IRB.CreateOr(Sa, Sb);
+ // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
+ // Result is defined if one of the following is true
+ // * there is a defined 1 bit in C
+ // * C is fully defined
+ // Si = !(C & ~Sc) && Sc
+ Value *Zero = Constant::getNullValue(Sc->getType());
+ Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
+ Value *Si =
+ IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
+ IRB.CreateICmpEQ(
+ IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
+ Si->setName("_msprop_icmp");
+ setShadow(&I, Si);
+ setOriginForNaryOp(I);
+ }
+
+ /// Build the lowest possible value of V, taking into account V's
+ /// uninitialized bits.
+ Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+ bool isSigned) {
+ if (isSigned) {
+ // Split shadow into sign bit and other bits.
+ Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+ Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+ // Maximise the undefined shadow bit, minimize other undefined bits.
+ return
+ IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
+ } else {
+ // Minimize undefined bits.
+ return IRB.CreateAnd(A, IRB.CreateNot(Sa));
+ }
+ }
+
+ /// Build the highest possible value of V, taking into account V's
+ /// uninitialized bits.
+ Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+ bool isSigned) {
+ if (isSigned) {
+ // Split shadow into sign bit and other bits.
+ Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+ Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+ // Minimise the undefined shadow bit, maximise other undefined bits.
+ return
+ IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
+ } else {
+ // Maximize undefined bits.
+ return IRB.CreateOr(A, Sa);
+ }
+ }
+
+ /// Instrument relational comparisons.
+ ///
+ /// This function does exact shadow propagation for all relational
+ /// comparisons of integers, pointers and vectors of those.
+ /// FIXME: output seems suboptimal when one of the operands is a constant
+ void handleRelationalComparisonExact(ICmpInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *A = I.getOperand(0);
+ Value *B = I.getOperand(1);
+ Value *Sa = getShadow(A);
+ Value *Sb = getShadow(B);
+
+ // Get rid of pointers and vectors of pointers.
+ // For ints (and vectors of ints), types of A and Sa match,
+ // and this is a no-op.
+ A = IRB.CreatePointerCast(A, Sa->getType());
+ B = IRB.CreatePointerCast(B, Sb->getType());
+
+ // Let [a0, a1] be the interval of possible values of A, taking into account
+ // its undefined bits. Let [b0, b1] be the interval of possible values of B.
+ // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
+ bool IsSigned = I.isSigned();
+ Value *S1 = IRB.CreateICmp(I.getPredicate(),
+ getLowestPossibleValue(IRB, A, Sa, IsSigned),
+ getHighestPossibleValue(IRB, B, Sb, IsSigned));
+ Value *S2 = IRB.CreateICmp(I.getPredicate(),
+ getHighestPossibleValue(IRB, A, Sa, IsSigned),
+ getLowestPossibleValue(IRB, B, Sb, IsSigned));
+ Value *Si = IRB.CreateXor(S1, S2);
+ setShadow(&I, Si);
+ setOriginForNaryOp(I);
+ }
+
+ /// Instrument signed relational comparisons.
+ ///
+ /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
+ /// bit of the shadow. Everything else is delegated to handleShadowOr().
+ void handleSignedRelationalComparison(ICmpInst &I) {
+ Constant *constOp;
+ Value *op = nullptr;
+ CmpInst::Predicate pre;
+ if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
+ op = I.getOperand(0);
+ pre = I.getPredicate();
+ } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
+ op = I.getOperand(1);
+ pre = I.getSwappedPredicate();
+ } else {
+ handleShadowOr(I);
+ return;
+ }
+
+ if ((constOp->isNullValue() &&
+ (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
+ (constOp->isAllOnesValue() &&
+ (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
+ IRBuilder<> IRB(&I);
+ Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
+ "_msprop_icmp_s");
+ setShadow(&I, Shadow);
+ setOrigin(&I, getOrigin(op));
+ } else {
+ handleShadowOr(I);
+ }
+ }
+
+ void visitICmpInst(ICmpInst &I) {
+ if (!ClHandleICmp) {
+ handleShadowOr(I);
+ return;
+ }
+ if (I.isEquality()) {
+ handleEqualityComparison(I);
+ return;
+ }
+
+ assert(I.isRelational());
+ if (ClHandleICmpExact) {
+ handleRelationalComparisonExact(I);
+ return;
+ }
+ if (I.isSigned()) {
+ handleSignedRelationalComparison(I);
+ return;
+ }
+
+ assert(I.isUnsigned());
+ if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
+ handleRelationalComparisonExact(I);
+ return;
+ }
+
+ handleShadowOr(I);
+ }
+
+ void visitFCmpInst(FCmpInst &I) {
+ handleShadowOr(I);
+ }
+
+ void handleShift(BinaryOperator &I) {
+ IRBuilder<> IRB(&I);
+ // If any of the S2 bits are poisoned, the whole thing is poisoned.
+ // Otherwise perform the same shift on S1.
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
+ S2->getType());
+ Value *V2 = I.getOperand(1);
+ Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
+ setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+ setOriginForNaryOp(I);
+ }
+
+ void visitShl(BinaryOperator &I) { handleShift(I); }
+ void visitAShr(BinaryOperator &I) { handleShift(I); }
+ void visitLShr(BinaryOperator &I) { handleShift(I); }
+
+ /// Instrument llvm.memmove
+ ///
+ /// At this point we don't know if llvm.memmove will be inlined or not.
+ /// If we don't instrument it and it gets inlined,
+ /// our interceptor will not kick in and we will lose the memmove.
+ /// If we instrument the call here, but it does not get inlined,
+ /// we will memove the shadow twice: which is bad in case
+ /// of overlapping regions. So, we simply lower the intrinsic to a call.
+ ///
+ /// Similar situation exists for memcpy and memset.
+ void visitMemMoveInst(MemMoveInst &I) {
+ IRBuilder<> IRB(&I);
+ IRB.CreateCall(
+ MS.MemmoveFn,
+ {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+ I.eraseFromParent();
+ }
+
+ // Similar to memmove: avoid copying shadow twice.
+ // This is somewhat unfortunate as it may slowdown small constant memcpys.
+ // FIXME: consider doing manual inline for small constant sizes and proper
+ // alignment.
+ void visitMemCpyInst(MemCpyInst &I) {
+ IRBuilder<> IRB(&I);
+ IRB.CreateCall(
+ MS.MemcpyFn,
+ {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+ I.eraseFromParent();
+ }
+
+ // Same as memcpy.
+ void visitMemSetInst(MemSetInst &I) {
+ IRBuilder<> IRB(&I);
+ IRB.CreateCall(
+ MS.MemsetFn,
+ {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
+ I.eraseFromParent();
+ }
+
+ void visitVAStartInst(VAStartInst &I) {
+ VAHelper->visitVAStartInst(I);
+ }
+
+ void visitVACopyInst(VACopyInst &I) {
+ VAHelper->visitVACopyInst(I);
+ }
+
+ /// Handle vector store-like intrinsics.
+ ///
+ /// Instrument intrinsics that look like a simple SIMD store: writes memory,
+ /// has 1 pointer argument and 1 vector argument, returns void.
+ bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value* Addr = I.getArgOperand(0);
+ Value *Shadow = getShadow(&I, 1);
+ Value *ShadowPtr, *OriginPtr;
+
+ // We don't know the pointer alignment (could be unaligned SSE store!).
+ // Have to assume to worst case.
+ std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
+ Addr, IRB, Shadow->getType(), /*Alignment*/ 1, /*isStore*/ true);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ // FIXME: factor out common code from materializeStores
+ if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
+ return true;
+ }
+
+ /// Handle vector load-like intrinsics.
+ ///
+ /// Instrument intrinsics that look like a simple SIMD load: reads memory,
+ /// has 1 pointer argument, returns a vector.
+ bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getArgOperand(0);
+
+ Type *ShadowTy = getShadowTy(&I);
+ Value *ShadowPtr, *OriginPtr;
+ if (PropagateShadow) {
+ // We don't know the pointer alignment (could be unaligned SSE load!).
+ // Have to assume to worst case.
+ unsigned Alignment = 1;
+ std::tie(ShadowPtr, OriginPtr) =
+ getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ if (MS.TrackOrigins) {
+ if (PropagateShadow)
+ setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
+ else
+ setOrigin(&I, getCleanOrigin());
+ }
+ return true;
+ }
+
+ /// Handle (SIMD arithmetic)-like intrinsics.
+ ///
+ /// Instrument intrinsics with any number of arguments of the same type,
+ /// equal to the return type. The type should be simple (no aggregates or
+ /// pointers; vectors are fine).
+ /// Caller guarantees that this intrinsic does not access memory.
+ bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
+ Type *RetTy = I.getType();
+ if (!(RetTy->isIntOrIntVectorTy() ||
+ RetTy->isFPOrFPVectorTy() ||
+ RetTy->isX86_MMXTy()))
+ return false;
+
+ unsigned NumArgOperands = I.getNumArgOperands();
+
+ for (unsigned i = 0; i < NumArgOperands; ++i) {
+ Type *Ty = I.getArgOperand(i)->getType();
+ if (Ty != RetTy)
+ return false;
+ }
+
+ IRBuilder<> IRB(&I);
+ ShadowAndOriginCombiner SC(this, IRB);
+ for (unsigned i = 0; i < NumArgOperands; ++i)
+ SC.Add(I.getArgOperand(i));
+ SC.Done(&I);
+
+ return true;
+ }
+
+ /// Heuristically instrument unknown intrinsics.
+ ///
+ /// The main purpose of this code is to do something reasonable with all
+ /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
+ /// We recognize several classes of intrinsics by their argument types and
+ /// ModRefBehaviour and apply special intrumentation when we are reasonably
+ /// sure that we know what the intrinsic does.
+ ///
+ /// We special-case intrinsics where this approach fails. See llvm.bswap
+ /// handling as an example of that.
+ bool handleUnknownIntrinsic(IntrinsicInst &I) {
+ unsigned NumArgOperands = I.getNumArgOperands();
+ if (NumArgOperands == 0)
+ return false;
+
+ if (NumArgOperands == 2 &&
+ I.getArgOperand(0)->getType()->isPointerTy() &&
+ I.getArgOperand(1)->getType()->isVectorTy() &&
+ I.getType()->isVoidTy() &&
+ !I.onlyReadsMemory()) {
+ // This looks like a vector store.
+ return handleVectorStoreIntrinsic(I);
+ }
+
+ if (NumArgOperands == 1 &&
+ I.getArgOperand(0)->getType()->isPointerTy() &&
+ I.getType()->isVectorTy() &&
+ I.onlyReadsMemory()) {
+ // This looks like a vector load.
+ return handleVectorLoadIntrinsic(I);
+ }
+
+ if (I.doesNotAccessMemory())
+ if (maybeHandleSimpleNomemIntrinsic(I))
+ return true;
+
+ // FIXME: detect and handle SSE maskstore/maskload
+ return false;
+ }
+
+ void handleInvariantGroup(IntrinsicInst &I) {
+ setShadow(&I, getShadow(&I, 0));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
+ void handleLifetimeStart(IntrinsicInst &I) {
+ if (!PoisonStack)
+ return;
+ DenseMap<Value *, AllocaInst *> AllocaForValue;
+ AllocaInst *AI =
+ llvm::findAllocaForValue(I.getArgOperand(1), AllocaForValue);
+ if (!AI)
+ InstrumentLifetimeStart = false;
+ LifetimeStartList.push_back(std::make_pair(&I, AI));
+ }
+
+ void handleBswap(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Op = I.getArgOperand(0);
+ Type *OpType = Op->getType();
+ Function *BswapFunc = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::bswap, makeArrayRef(&OpType, 1));
+ setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
+ setOrigin(&I, getOrigin(Op));
+ }
+
+ // Instrument vector convert instrinsic.
+ //
+ // This function instruments intrinsics like cvtsi2ss:
+ // %Out = int_xxx_cvtyyy(%ConvertOp)
+ // or
+ // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
+ // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
+ // number \p Out elements, and (if has 2 arguments) copies the rest of the
+ // elements from \p CopyOp.
+ // In most cases conversion involves floating-point value which may trigger a
+ // hardware exception when not fully initialized. For this reason we require
+ // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
+ // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
+ // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
+ // return a fully initialized value.
+ void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
+ IRBuilder<> IRB(&I);
+ Value *CopyOp, *ConvertOp;
+
+ switch (I.getNumArgOperands()) {
+ case 3:
+ assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
+ LLVM_FALLTHROUGH;
+ case 2:
+ CopyOp = I.getArgOperand(0);
+ ConvertOp = I.getArgOperand(1);
+ break;
+ case 1:
+ ConvertOp = I.getArgOperand(0);
+ CopyOp = nullptr;
+ break;
+ default:
+ llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
+ }
+
+ // The first *NumUsedElements* elements of ConvertOp are converted to the
+ // same number of output elements. The rest of the output is copied from
+ // CopyOp, or (if not available) filled with zeroes.
+ // Combine shadow for elements of ConvertOp that are used in this operation,
+ // and insert a check.
+ // FIXME: consider propagating shadow of ConvertOp, at least in the case of
+ // int->any conversion.
+ Value *ConvertShadow = getShadow(ConvertOp);
+ Value *AggShadow = nullptr;
+ if (ConvertOp->getType()->isVectorTy()) {
+ AggShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+ for (int i = 1; i < NumUsedElements; ++i) {
+ Value *MoreShadow = IRB.CreateExtractElement(
+ ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+ AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
+ }
+ } else {
+ AggShadow = ConvertShadow;
+ }
+ assert(AggShadow->getType()->isIntegerTy());
+ insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
+
+ // Build result shadow by zero-filling parts of CopyOp shadow that come from
+ // ConvertOp.
+ if (CopyOp) {
+ assert(CopyOp->getType() == I.getType());
+ assert(CopyOp->getType()->isVectorTy());
+ Value *ResultShadow = getShadow(CopyOp);
+ Type *EltTy = ResultShadow->getType()->getVectorElementType();
+ for (int i = 0; i < NumUsedElements; ++i) {
+ ResultShadow = IRB.CreateInsertElement(
+ ResultShadow, ConstantInt::getNullValue(EltTy),
+ ConstantInt::get(IRB.getInt32Ty(), i));
+ }
+ setShadow(&I, ResultShadow);
+ setOrigin(&I, getOrigin(CopyOp));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+ }
+
+ // Given a scalar or vector, extract lower 64 bits (or less), and return all
+ // zeroes if it is zero, and all ones otherwise.
+ Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
+ if (S->getType()->isVectorTy())
+ S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
+ assert(S->getType()->getPrimitiveSizeInBits() <= 64);
+ Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
+ return CreateShadowCast(IRB, S2, T, /* Signed */ true);
+ }
+
+ // Given a vector, extract its first element, and return all
+ // zeroes if it is zero, and all ones otherwise.
+ Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
+ Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
+ Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
+ return CreateShadowCast(IRB, S2, T, /* Signed */ true);
+ }
+
+ Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
+ Type *T = S->getType();
+ assert(T->isVectorTy());
+ Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
+ return IRB.CreateSExt(S2, T);
+ }
+
+ // Instrument vector shift instrinsic.
+ //
+ // This function instruments intrinsics like int_x86_avx2_psll_w.
+ // Intrinsic shifts %In by %ShiftSize bits.
+ // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
+ // size, and the rest is ignored. Behavior is defined even if shift size is
+ // greater than register (or field) width.
+ void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
+ assert(I.getNumArgOperands() == 2);
+ IRBuilder<> IRB(&I);
+ // If any of the S2 bits are poisoned, the whole thing is poisoned.
+ // Otherwise perform the same shift on S1.
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
+ : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
+ Value *V1 = I.getOperand(0);
+ Value *V2 = I.getOperand(1);
+ Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledValue(),
+ {IRB.CreateBitCast(S1, V1->getType()), V2});
+ Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
+ setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+ setOriginForNaryOp(I);
+ }
+
+ // Get an X86_MMX-sized vector type.
+ Type *getMMXVectorTy(unsigned EltSizeInBits) {
+ const unsigned X86_MMXSizeInBits = 64;
+ assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
+ "Illegal MMX vector element size");
+ return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
+ X86_MMXSizeInBits / EltSizeInBits);
+ }
+
+ // Returns a signed counterpart for an (un)signed-saturate-and-pack
+ // intrinsic.
+ Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
+ switch (id) {
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_sse2_packuswb_128:
+ return Intrinsic::x86_sse2_packsswb_128;
+
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse41_packusdw:
+ return Intrinsic::x86_sse2_packssdw_128;
+
+ case Intrinsic::x86_avx2_packsswb:
+ case Intrinsic::x86_avx2_packuswb:
+ return Intrinsic::x86_avx2_packsswb;
+
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packusdw:
+ return Intrinsic::x86_avx2_packssdw;
+
+ case Intrinsic::x86_mmx_packsswb:
+ case Intrinsic::x86_mmx_packuswb:
+ return Intrinsic::x86_mmx_packsswb;
+
+ case Intrinsic::x86_mmx_packssdw:
+ return Intrinsic::x86_mmx_packssdw;
+ default:
+ llvm_unreachable("unexpected intrinsic id");
+ }
+ }
+
+ // Instrument vector pack instrinsic.
+ //
+ // This function instruments intrinsics like x86_mmx_packsswb, that
+ // packs elements of 2 input vectors into half as many bits with saturation.
+ // Shadow is propagated with the signed variant of the same intrinsic applied
+ // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
+ // EltSizeInBits is used only for x86mmx arguments.
+ void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
+ assert(I.getNumArgOperands() == 2);
+ bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+ IRBuilder<> IRB(&I);
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ assert(isX86_MMX || S1->getType()->isVectorTy());
+
+ // SExt and ICmpNE below must apply to individual elements of input vectors.
+ // In case of x86mmx arguments, cast them to appropriate vector types and
+ // back.
+ Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
+ if (isX86_MMX) {
+ S1 = IRB.CreateBitCast(S1, T);
+ S2 = IRB.CreateBitCast(S2, T);
+ }
+ Value *S1_ext = IRB.CreateSExt(
+ IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
+ Value *S2_ext = IRB.CreateSExt(
+ IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
+ if (isX86_MMX) {
+ Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
+ S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
+ S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
+ }
+
+ Function *ShadowFn = Intrinsic::getDeclaration(
+ F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
+
+ Value *S =
+ IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
+ if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ // Instrument sum-of-absolute-differencies intrinsic.
+ void handleVectorSadIntrinsic(IntrinsicInst &I) {
+ const unsigned SignificantBitsPerResultElement = 16;
+ bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+ Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
+ unsigned ZeroBitsPerResultElement =
+ ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
+
+ IRBuilder<> IRB(&I);
+ Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+ S = IRB.CreateBitCast(S, ResTy);
+ S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+ ResTy);
+ S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
+ S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ // Instrument multiply-add intrinsic.
+ void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+ unsigned EltSizeInBits = 0) {
+ bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+ Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
+ IRBuilder<> IRB(&I);
+ Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+ S = IRB.CreateBitCast(S, ResTy);
+ S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+ ResTy);
+ S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ // Instrument compare-packed intrinsic.
+ // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
+ // all-ones shadow.
+ void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Type *ResTy = getShadowTy(&I);
+ Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+ Value *S = IRB.CreateSExt(
+ IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ // Instrument compare-scalar intrinsic.
+ // This handles both cmp* intrinsics which return the result in the first
+ // element of a vector, and comi* which return the result as i32.
+ void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *S0 = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+ Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ void handleStmxcsr(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value* Addr = I.getArgOperand(0);
+ Type *Ty = IRB.getInt32Ty();
+ Value *ShadowPtr =
+ getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1, /*isStore*/ true)
+ .first;
+
+ IRB.CreateStore(getCleanShadow(Ty),
+ IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+ }
+
+ void handleLdmxcsr(IntrinsicInst &I) {
+ if (!InsertChecks) return;
+
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getArgOperand(0);
+ Type *Ty = IRB.getInt32Ty();
+ unsigned Alignment = 1;
+ Value *ShadowPtr, *OriginPtr;
+ std::tie(ShadowPtr, OriginPtr) =
+ getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
+ Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
+ : getCleanOrigin();
+ insertShadowCheck(Shadow, Origin, &I);
+ }
+
+ void handleMaskedStore(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *V = I.getArgOperand(0);
+ Value *Addr = I.getArgOperand(1);
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Value *Mask = I.getArgOperand(3);
+ Value *Shadow = getShadow(V);
+
+ Value *ShadowPtr;
+ Value *OriginPtr;
+ std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
+ Addr, IRB, Shadow->getType(), Align, /*isStore*/ true);
+
+ if (ClCheckAccessAddress) {
+ insertShadowCheck(Addr, &I);
+ // Uninitialized mask is kind of like uninitialized address, but not as
+ // scary.
+ insertShadowCheck(Mask, &I);
+ }
+
+ IRB.CreateMaskedStore(Shadow, ShadowPtr, Align, Mask);
+
+ if (MS.TrackOrigins) {
+ auto &DL = F.getParent()->getDataLayout();
+ paintOrigin(IRB, getOrigin(V), OriginPtr,
+ DL.getTypeStoreSize(Shadow->getType()),
+ std::max(Align, kMinOriginAlignment));
+ }
+ }
+
+ bool handleMaskedLoad(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getArgOperand(0);
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Value *Mask = I.getArgOperand(2);
+ Value *PassThru = I.getArgOperand(3);
+
+ Type *ShadowTy = getShadowTy(&I);
+ Value *ShadowPtr, *OriginPtr;
+ if (PropagateShadow) {
+ std::tie(ShadowPtr, OriginPtr) =
+ getShadowOriginPtr(Addr, IRB, ShadowTy, Align, /*isStore*/ false);
+ setShadow(&I, IRB.CreateMaskedLoad(ShadowPtr, Align, Mask,
+ getShadow(PassThru), "_msmaskedld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
+ if (ClCheckAccessAddress) {
+ insertShadowCheck(Addr, &I);
+ insertShadowCheck(Mask, &I);
+ }
+
+ if (MS.TrackOrigins) {
+ if (PropagateShadow) {
+ // Choose between PassThru's and the loaded value's origins.
+ Value *MaskedPassThruShadow = IRB.CreateAnd(
+ getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
+
+ Value *Acc = IRB.CreateExtractElement(
+ MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+ for (int i = 1, N = PassThru->getType()->getVectorNumElements(); i < N;
+ ++i) {
+ Value *More = IRB.CreateExtractElement(
+ MaskedPassThruShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+ Acc = IRB.CreateOr(Acc, More);
+ }
+
+ Value *Origin = IRB.CreateSelect(
+ IRB.CreateICmpNE(Acc, Constant::getNullValue(Acc->getType())),
+ getOrigin(PassThru), IRB.CreateLoad(MS.OriginTy, OriginPtr));
+
+ setOrigin(&I, Origin);
+ } else {
+ setOrigin(&I, getCleanOrigin());
+ }
+ }
+ return true;
+ }
+
+ // Instrument BMI / BMI2 intrinsics.
+ // All of these intrinsics are Z = I(X, Y)
+ // where the types of all operands and the result match, and are either i32 or i64.
+ // The following instrumentation happens to work for all of them:
+ // Sz = I(Sx, Y) | (sext (Sy != 0))
+ void handleBmiIntrinsic(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Type *ShadowTy = getShadowTy(&I);
+
+ // If any bit of the mask operand is poisoned, then the whole thing is.
+ Value *SMask = getShadow(&I, 1);
+ SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
+ ShadowTy);
+ // Apply the same intrinsic to the shadow of the first operand.
+ Value *S = IRB.CreateCall(I.getCalledFunction(),
+ {getShadow(&I, 0), I.getOperand(1)});
+ S = IRB.CreateOr(SMask, S);
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ void visitIntrinsicInst(IntrinsicInst &I) {
+ switch (I.getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ handleLifetimeStart(I);
+ break;
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ handleInvariantGroup(I);
+ break;
+ case Intrinsic::bswap:
+ handleBswap(I);
+ break;
+ case Intrinsic::masked_store:
+ handleMaskedStore(I);
+ break;
+ case Intrinsic::masked_load:
+ handleMaskedLoad(I);
+ break;
+ case Intrinsic::x86_sse_stmxcsr:
+ handleStmxcsr(I);
+ break;
+ case Intrinsic::x86_sse_ldmxcsr:
+ handleLdmxcsr(I);
+ break;
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvtusi2ss:
+ case Intrinsic::x86_avx512_cvtusi642sd:
+ case Intrinsic::x86_avx512_cvtusi642ss:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2ss:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ handleVectorConvertIntrinsic(I, 1);
+ break;
+ case Intrinsic::x86_sse_cvtps2pi:
+ case Intrinsic::x86_sse_cvttps2pi:
+ handleVectorConvertIntrinsic(I, 2);
+ break;
+
+ case Intrinsic::x86_avx512_psll_w_512:
+ case Intrinsic::x86_avx512_psll_d_512:
+ case Intrinsic::x86_avx512_psll_q_512:
+ case Intrinsic::x86_avx512_pslli_w_512:
+ case Intrinsic::x86_avx512_pslli_d_512:
+ case Intrinsic::x86_avx512_pslli_q_512:
+ case Intrinsic::x86_avx512_psrl_w_512:
+ case Intrinsic::x86_avx512_psrl_d_512:
+ case Intrinsic::x86_avx512_psrl_q_512:
+ case Intrinsic::x86_avx512_psra_w_512:
+ case Intrinsic::x86_avx512_psra_d_512:
+ case Intrinsic::x86_avx512_psra_q_512:
+ case Intrinsic::x86_avx512_psrli_w_512:
+ case Intrinsic::x86_avx512_psrli_d_512:
+ case Intrinsic::x86_avx512_psrli_q_512:
+ case Intrinsic::x86_avx512_psrai_w_512:
+ case Intrinsic::x86_avx512_psrai_d_512:
+ case Intrinsic::x86_avx512_psrai_q_512:
+ case Intrinsic::x86_avx512_psra_q_256:
+ case Intrinsic::x86_avx512_psra_q_128:
+ case Intrinsic::x86_avx512_psrai_q_256:
+ case Intrinsic::x86_avx512_psrai_q_128:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_mmx_psll_w:
+ case Intrinsic::x86_mmx_psll_d:
+ case Intrinsic::x86_mmx_psll_q:
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrl_w:
+ case Intrinsic::x86_mmx_psrl_d:
+ case Intrinsic::x86_mmx_psrl_q:
+ case Intrinsic::x86_mmx_psra_w:
+ case Intrinsic::x86_mmx_psra_d:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d:
+ handleVectorShiftIntrinsic(I, /* Variable */ false);
+ break;
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx512_psllv_d_512:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ case Intrinsic::x86_avx512_psllv_q_512:
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx512_psrlv_d_512:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ case Intrinsic::x86_avx512_psrlv_q_512:
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ case Intrinsic::x86_avx512_psrav_d_512:
+ case Intrinsic::x86_avx512_psrav_q_128:
+ case Intrinsic::x86_avx512_psrav_q_256:
+ case Intrinsic::x86_avx512_psrav_q_512:
+ handleVectorShiftIntrinsic(I, /* Variable */ true);
+ break;
+
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packsswb:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx2_packusdw:
+ handleVectorPackIntrinsic(I);
+ break;
+
+ case Intrinsic::x86_mmx_packsswb:
+ case Intrinsic::x86_mmx_packuswb:
+ handleVectorPackIntrinsic(I, 16);
+ break;
+
+ case Intrinsic::x86_mmx_packssdw:
+ handleVectorPackIntrinsic(I, 32);
+ break;
+
+ case Intrinsic::x86_mmx_psad_bw:
+ case Intrinsic::x86_sse2_psad_bw:
+ case Intrinsic::x86_avx2_psad_bw:
+ handleVectorSadIntrinsic(I);
+ break;
+
+ case Intrinsic::x86_sse2_pmadd_wd:
+ case Intrinsic::x86_avx2_pmadd_wd:
+ case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
+ case Intrinsic::x86_avx2_pmadd_ub_sw:
+ handleVectorPmaddIntrinsic(I);
+ break;
+
+ case Intrinsic::x86_ssse3_pmadd_ub_sw:
+ handleVectorPmaddIntrinsic(I, 8);
+ break;
+
+ case Intrinsic::x86_mmx_pmadd_wd:
+ handleVectorPmaddIntrinsic(I, 16);
+ break;
+
+ case Intrinsic::x86_sse_cmp_ss:
+ case Intrinsic::x86_sse2_cmp_sd:
+ case Intrinsic::x86_sse_comieq_ss:
+ case Intrinsic::x86_sse_comilt_ss:
+ case Intrinsic::x86_sse_comile_ss:
+ case Intrinsic::x86_sse_comigt_ss:
+ case Intrinsic::x86_sse_comige_ss:
+ case Intrinsic::x86_sse_comineq_ss:
+ case Intrinsic::x86_sse_ucomieq_ss:
+ case Intrinsic::x86_sse_ucomilt_ss:
+ case Intrinsic::x86_sse_ucomile_ss:
+ case Intrinsic::x86_sse_ucomigt_ss:
+ case Intrinsic::x86_sse_ucomige_ss:
+ case Intrinsic::x86_sse_ucomineq_ss:
+ case Intrinsic::x86_sse2_comieq_sd:
+ case Intrinsic::x86_sse2_comilt_sd:
+ case Intrinsic::x86_sse2_comile_sd:
+ case Intrinsic::x86_sse2_comigt_sd:
+ case Intrinsic::x86_sse2_comige_sd:
+ case Intrinsic::x86_sse2_comineq_sd:
+ case Intrinsic::x86_sse2_ucomieq_sd:
+ case Intrinsic::x86_sse2_ucomilt_sd:
+ case Intrinsic::x86_sse2_ucomile_sd:
+ case Intrinsic::x86_sse2_ucomigt_sd:
+ case Intrinsic::x86_sse2_ucomige_sd:
+ case Intrinsic::x86_sse2_ucomineq_sd:
+ handleVectorCompareScalarIntrinsic(I);
+ break;
+
+ case Intrinsic::x86_sse_cmp_ps:
+ case Intrinsic::x86_sse2_cmp_pd:
+ // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function
+ // generates reasonably looking IR that fails in the backend with "Do not
+ // know how to split the result of this operator!".
+ handleVectorComparePackedIntrinsic(I);
+ break;
+
+ case Intrinsic::x86_bmi_bextr_32:
+ case Intrinsic::x86_bmi_bextr_64:
+ case Intrinsic::x86_bmi_bzhi_32:
+ case Intrinsic::x86_bmi_bzhi_64:
+ case Intrinsic::x86_bmi_pdep_32:
+ case Intrinsic::x86_bmi_pdep_64:
+ case Intrinsic::x86_bmi_pext_32:
+ case Intrinsic::x86_bmi_pext_64:
+ handleBmiIntrinsic(I);
+ break;
+
+ case Intrinsic::is_constant:
+ // The result of llvm.is.constant() is always defined.
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ break;
+
+ default:
+ if (!handleUnknownIntrinsic(I))
+ visitInstruction(I);
+ break;
+ }
+ }
+
+ void visitCallSite(CallSite CS) {
+ Instruction &I = *CS.getInstruction();
+ assert(!I.getMetadata("nosanitize"));
+ assert((CS.isCall() || CS.isInvoke() || CS.isCallBr()) &&
+ "Unknown type of CallSite");
+ if (CS.isCallBr() || (CS.isCall() && cast<CallInst>(&I)->isInlineAsm())) {
+ // For inline asm (either a call to asm function, or callbr instruction),
+ // do the usual thing: check argument shadow and mark all outputs as
+ // clean. Note that any side effects of the inline asm that are not
+ // immediately visible in its constraints are not handled.
+ if (ClHandleAsmConservative && MS.CompileKernel)
+ visitAsmInstruction(I);
+ else
+ visitInstruction(I);
+ return;
+ }
+ if (CS.isCall()) {
+ CallInst *Call = cast<CallInst>(&I);
+ assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
+
+ // We are going to insert code that relies on the fact that the callee
+ // will become a non-readonly function after it is instrumented by us. To
+ // prevent this code from being optimized out, mark that function
+ // non-readonly in advance.
+ if (Function *Func = Call->getCalledFunction()) {
+ // Clear out readonly/readnone attributes.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
+ Func->removeAttributes(AttributeList::FunctionIndex, B);
+ }
+
+ maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
+ }
+ IRBuilder<> IRB(&I);
+
+ unsigned ArgOffset = 0;
+ LLVM_DEBUG(dbgs() << " CallSite: " << I << "\n");
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ unsigned i = ArgIt - CS.arg_begin();
+ if (!A->getType()->isSized()) {
+ LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
+ continue;
+ }
+ unsigned Size = 0;
+ Value *Store = nullptr;
+ // Compute the Shadow for arg even if it is ByVal, because
+ // in that case getShadow() will copy the actual arg shadow to
+ // __msan_param_tls.
+ Value *ArgShadow = getShadow(A);
+ Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
+ LLVM_DEBUG(dbgs() << " Arg#" << i << ": " << *A
+ << " Shadow: " << *ArgShadow << "\n");
+ bool ArgIsInitialized = false;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ if (CS.paramHasAttr(i, Attribute::ByVal)) {
+ assert(A->getType()->isPointerTy() &&
+ "ByVal argument is not a pointer!");
+ Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
+ if (ArgOffset + Size > kParamTLSSize) break;
+ unsigned ParamAlignment = CS.getParamAlignment(i);
+ unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
+ Value *AShadowPtr =
+ getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
+ /*isStore*/ false)
+ .first;
+
+ Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
+ Alignment, Size);
+ // TODO(glider): need to copy origins.
+ } else {
+ Size = DL.getTypeAllocSize(A->getType());
+ if (ArgOffset + Size > kParamTLSSize) break;
+ Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+ kShadowTLSAlignment);
+ Constant *Cst = dyn_cast<Constant>(ArgShadow);
+ if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
+ }
+ if (MS.TrackOrigins && !ArgIsInitialized)
+ IRB.CreateStore(getOrigin(A),
+ getOriginPtrForArgument(A, IRB, ArgOffset));
+ (void)Store;
+ assert(Size != 0 && Store != nullptr);
+ LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
+ ArgOffset += alignTo(Size, 8);
+ }
+ LLVM_DEBUG(dbgs() << " done with call args\n");
+
+ FunctionType *FT = CS.getFunctionType();
+ if (FT->isVarArg()) {
+ VAHelper->visitCallSite(CS, IRB);
+ }
+
+ // Now, get the shadow for the RetVal.
+ if (!I.getType()->isSized()) return;
+ // Don't emit the epilogue for musttail call returns.
+ if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
+ IRBuilder<> IRBBefore(&I);
+ // Until we have full dynamic coverage, make sure the retval shadow is 0.
+ Value *Base = getShadowPtrForRetval(&I, IRBBefore);
+ IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
+ BasicBlock::iterator NextInsn;
+ if (CS.isCall()) {
+ NextInsn = ++I.getIterator();
+ assert(NextInsn != I.getParent()->end());
+ } else {
+ BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
+ if (!NormalDest->getSinglePredecessor()) {
+ // FIXME: this case is tricky, so we are just conservative here.
+ // Perhaps we need to split the edge between this BB and NormalDest,
+ // but a naive attempt to use SplitEdge leads to a crash.
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ return;
+ }
+ // FIXME: NextInsn is likely in a basic block that has not been visited yet.
+ // Anything inserted there will be instrumented by MSan later!
+ NextInsn = NormalDest->getFirstInsertionPt();
+ assert(NextInsn != NormalDest->end() &&
+ "Could not find insertion point for retval shadow load");
+ }
+ IRBuilder<> IRBAfter(&*NextInsn);
+ Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
+ getShadowTy(&I), getShadowPtrForRetval(&I, IRBAfter),
+ kShadowTLSAlignment, "_msret");
+ setShadow(&I, RetvalShadow);
+ if (MS.TrackOrigins)
+ setOrigin(&I, IRBAfter.CreateLoad(MS.OriginTy,
+ getOriginPtrForRetval(IRBAfter)));
+ }
+
+ bool isAMustTailRetVal(Value *RetVal) {
+ if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
+ RetVal = I->getOperand(0);
+ }
+ if (auto *I = dyn_cast<CallInst>(RetVal)) {
+ return I->isMustTailCall();
+ }
+ return false;
+ }
+
+ void visitReturnInst(ReturnInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *RetVal = I.getReturnValue();
+ if (!RetVal) return;
+ // Don't emit the epilogue for musttail call returns.
+ if (isAMustTailRetVal(RetVal)) return;
+ Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+ if (CheckReturnValue) {
+ insertShadowCheck(RetVal, &I);
+ Value *Shadow = getCleanShadow(RetVal);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ } else {
+ Value *Shadow = getShadow(RetVal);
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+ if (MS.TrackOrigins)
+ IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
+ }
+ }
+
+ void visitPHINode(PHINode &I) {
+ IRBuilder<> IRB(&I);
+ if (!PropagateShadow) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ return;
+ }
+
+ ShadowPHINodes.push_back(&I);
+ setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
+ "_msphi_s"));
+ if (MS.TrackOrigins)
+ setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
+ "_msphi_o"));
+ }
+
+ Value *getLocalVarDescription(AllocaInst &I) {
+ SmallString<2048> StackDescriptionStorage;
+ raw_svector_ostream StackDescription(StackDescriptionStorage);
+ // We create a string with a description of the stack allocation and
+ // pass it into __msan_set_alloca_origin.
+ // It will be printed by the run-time if stack-originated UMR is found.
+ // The first 4 bytes of the string are set to '----' and will be replaced
+ // by __msan_va_arg_overflow_size_tls at the first call.
+ StackDescription << "----" << I.getName() << "@" << F.getName();
+ return createPrivateNonConstGlobalForString(*F.getParent(),
+ StackDescription.str());
+ }
+
+ void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ if (PoisonStack && ClPoisonStackWithCall) {
+ IRB.CreateCall(MS.MsanPoisonStackFn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+ } else {
+ Value *ShadowBase, *OriginBase;
+ std::tie(ShadowBase, OriginBase) =
+ getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), 1, /*isStore*/ true);
+
+ Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
+ IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
+ }
+
+ if (PoisonStack && MS.TrackOrigins) {
+ Value *Descr = getLocalVarDescription(I);
+ IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(&F, MS.IntptrTy)});
+ }
+ }
+
+ void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
+ Value *Descr = getLocalVarDescription(I);
+ if (PoisonStack) {
+ IRB.CreateCall(MS.MsanPoisonAllocaFn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+ IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
+ } else {
+ IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
+ }
+ }
+
+ void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
+ if (!InsPoint)
+ InsPoint = &I;
+ IRBuilder<> IRB(InsPoint->getNextNode());
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
+ Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
+ if (I.isArrayAllocation())
+ Len = IRB.CreateMul(Len, I.getArraySize());
+
+ if (MS.CompileKernel)
+ poisonAllocaKmsan(I, IRB, Len);
+ else
+ poisonAllocaUserspace(I, IRB, Len);
+ }
+
+ void visitAllocaInst(AllocaInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ // We'll get to this alloca later unless it's poisoned at the corresponding
+ // llvm.lifetime.start.
+ AllocaSet.insert(&I);
+ }
+
+ void visitSelectInst(SelectInst& I) {
+ IRBuilder<> IRB(&I);
+ // a = select b, c, d
+ Value *B = I.getCondition();
+ Value *C = I.getTrueValue();
+ Value *D = I.getFalseValue();
+ Value *Sb = getShadow(B);
+ Value *Sc = getShadow(C);
+ Value *Sd = getShadow(D);
+
+ // Result shadow if condition shadow is 0.
+ Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
+ Value *Sa1;
+ if (I.getType()->isAggregateType()) {
+ // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
+ // an extra "select". This results in much more compact IR.
+ // Sa = select Sb, poisoned, (select b, Sc, Sd)
+ Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
+ } else {
+ // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
+ // If Sb (condition is poisoned), look for bits in c and d that are equal
+ // and both unpoisoned.
+ // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
+
+ // Cast arguments to shadow-compatible type.
+ C = CreateAppToShadowCast(IRB, C);
+ D = CreateAppToShadowCast(IRB, D);
+
+ // Result shadow if condition shadow is 1.
+ Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
+ }
+ Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
+ setShadow(&I, Sa);
+ if (MS.TrackOrigins) {
+ // Origins are always i32, so any vector conditions must be flattened.
+ // FIXME: consider tracking vector origins for app vectors?
+ if (B->getType()->isVectorTy()) {
+ Type *FlatTy = getShadowTyNoVec(B->getType());
+ B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
+ ConstantInt::getNullValue(FlatTy));
+ Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
+ ConstantInt::getNullValue(FlatTy));
+ }
+ // a = select b, c, d
+ // Oa = Sb ? Ob : (b ? Oc : Od)
+ setOrigin(
+ &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
+ IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
+ getOrigin(I.getFalseValue()))));
+ }
+ }
+
+ void visitLandingPadInst(LandingPadInst &I) {
+ // Do nothing.
+ // See https://github.com/google/sanitizers/issues/504
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitCatchSwitchInst(CatchSwitchInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitFuncletPadInst(FuncletPadInst &I) {
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &I) {
+ handleShadowOr(I);
+ }
+
+ void visitExtractValueInst(ExtractValueInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Agg = I.getAggregateOperand();
+ LLVM_DEBUG(dbgs() << "ExtractValue: " << I << "\n");
+ Value *AggShadow = getShadow(Agg);
+ LLVM_DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
+ Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
+ LLVM_DEBUG(dbgs() << " ResShadow: " << *ResShadow << "\n");
+ setShadow(&I, ResShadow);
+ setOriginForNaryOp(I);
+ }
+
+ void visitInsertValueInst(InsertValueInst &I) {
+ IRBuilder<> IRB(&I);
+ LLVM_DEBUG(dbgs() << "InsertValue: " << I << "\n");
+ Value *AggShadow = getShadow(I.getAggregateOperand());
+ Value *InsShadow = getShadow(I.getInsertedValueOperand());
+ LLVM_DEBUG(dbgs() << " AggShadow: " << *AggShadow << "\n");
+ LLVM_DEBUG(dbgs() << " InsShadow: " << *InsShadow << "\n");
+ Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
+ LLVM_DEBUG(dbgs() << " Res: " << *Res << "\n");
+ setShadow(&I, Res);
+ setOriginForNaryOp(I);
+ }
+
+ void dumpInst(Instruction &I) {
+ if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+ errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
+ } else {
+ errs() << "ZZZ " << I.getOpcodeName() << "\n";
+ }
+ errs() << "QQQ " << I << "\n";
+ }
+
+ void visitResumeInst(ResumeInst &I) {
+ LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
+ // Nothing to do here.
+ }
+
+ void visitCleanupReturnInst(CleanupReturnInst &CRI) {
+ LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
+ // Nothing to do here.
+ }
+
+ void visitCatchReturnInst(CatchReturnInst &CRI) {
+ LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
+ // Nothing to do here.
+ }
+
+ void instrumentAsmArgument(Value *Operand, Instruction &I, IRBuilder<> &IRB,
+ const DataLayout &DL, bool isOutput) {
+ // For each assembly argument, we check its value for being initialized.
+ // If the argument is a pointer, we assume it points to a single element
+ // of the corresponding type (or to a 8-byte word, if the type is unsized).
+ // Each such pointer is instrumented with a call to the runtime library.
+ Type *OpType = Operand->getType();
+ // Check the operand value itself.
+ insertShadowCheck(Operand, &I);
+ if (!OpType->isPointerTy() || !isOutput) {
+ assert(!isOutput);
+ return;
+ }
+ Type *ElType = OpType->getPointerElementType();
+ if (!ElType->isSized())
+ return;
+ int Size = DL.getTypeStoreSize(ElType);
+ Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
+ Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+ IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
+ }
+
+ /// Get the number of output arguments returned by pointers.
+ int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
+ int NumRetOutputs = 0;
+ int NumOutputs = 0;
+ Type *RetTy = cast<Value>(CB)->getType();
+ if (!RetTy->isVoidTy()) {
+ // Register outputs are returned via the CallInst return value.
+ auto *ST = dyn_cast<StructType>(RetTy);
+ if (ST)
+ NumRetOutputs = ST->getNumElements();
+ else
+ NumRetOutputs = 1;
+ }
+ InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+ for (size_t i = 0, n = Constraints.size(); i < n; i++) {
+ InlineAsm::ConstraintInfo Info = Constraints[i];
+ switch (Info.Type) {
+ case InlineAsm::isOutput:
+ NumOutputs++;
+ break;
+ default:
+ break;
+ }
+ }
+ return NumOutputs - NumRetOutputs;
+ }
+
+ void visitAsmInstruction(Instruction &I) {
+ // Conservative inline assembly handling: check for poisoned shadow of
+ // asm() arguments, then unpoison the result and all the memory locations
+ // pointed to by those arguments.
+ // An inline asm() statement in C++ contains lists of input and output
+ // arguments used by the assembly code. These are mapped to operands of the
+ // CallInst as follows:
+ // - nR register outputs ("=r) are returned by value in a single structure
+ // (SSA value of the CallInst);
+ // - nO other outputs ("=m" and others) are returned by pointer as first
+ // nO operands of the CallInst;
+ // - nI inputs ("r", "m" and others) are passed to CallInst as the
+ // remaining nI operands.
+ // The total number of asm() arguments in the source is nR+nO+nI, and the
+ // corresponding CallInst has nO+nI+1 operands (the last operand is the
+ // function to be called).
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ CallBase *CB = cast<CallBase>(&I);
+ IRBuilder<> IRB(&I);
+ InlineAsm *IA = cast<InlineAsm>(CB->getCalledValue());
+ int OutputArgs = getNumOutputArgs(IA, CB);
+ // The last operand of a CallInst is the function itself.
+ int NumOperands = CB->getNumOperands() - 1;
+
+ // Check input arguments. Doing so before unpoisoning output arguments, so
+ // that we won't overwrite uninit values before checking them.
+ for (int i = OutputArgs; i < NumOperands; i++) {
+ Value *Operand = CB->getOperand(i);
+ instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ false);
+ }
+ // Unpoison output arguments. This must happen before the actual InlineAsm
+ // call, so that the shadow for memory published in the asm() statement
+ // remains valid.
+ for (int i = 0; i < OutputArgs; i++) {
+ Value *Operand = CB->getOperand(i);
+ instrumentAsmArgument(Operand, I, IRB, DL, /*isOutput*/ true);
+ }
+
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+
+ void visitInstruction(Instruction &I) {
+ // Everything else: stop propagating and check for poisoned shadow.
+ if (ClDumpStrictInstructions)
+ dumpInst(I);
+ LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+ for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
+ Value *Operand = I.getOperand(i);
+ if (Operand->getType()->isSized())
+ insertShadowCheck(Operand, &I);
+ }
+ setShadow(&I, getCleanShadow(&I));
+ setOrigin(&I, getCleanOrigin());
+ }
+};
+
+/// AMD64-specific implementation of VarArgHelper.
+struct VarArgAMD64Helper : public VarArgHelper {
+ // An unfortunate workaround for asymmetric lowering of va_arg stuff.
+ // See a comment in visitCallSite for more details.
+ static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
+ static const unsigned AMD64FpEndOffsetSSE = 176;
+ // If SSE is disabled, fp_offset in va_list is zero.
+ static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
+
+ unsigned AMD64FpEndOffset;
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy = nullptr;
+ Value *VAArgTLSOriginCopy = nullptr;
+ Value *VAArgOverflowSize = nullptr;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+ VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV)
+ : F(F), MS(MS), MSV(MSV) {
+ AMD64FpEndOffset = AMD64FpEndOffsetSSE;
+ for (const auto &Attr : F.getAttributes().getFnAttributes()) {
+ if (Attr.isStringAttribute() &&
+ (Attr.getKindAsString() == "target-features")) {
+ if (Attr.getValueAsString().contains("-sse"))
+ AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
+ break;
+ }
+ }
+ }
+
+ ArgKind classifyArgument(Value* arg) {
+ // A very rough approximation of X86_64 argument classification rules.
+ Type *T = arg->getType();
+ if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
+ return AK_FloatingPoint;
+ if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+ return AK_GeneralPurpose;
+ if (T->isPointerTy())
+ return AK_GeneralPurpose;
+ return AK_Memory;
+ }
+
+ // For VarArg functions, store the argument shadow in an ABI-specific format
+ // that corresponds to va_list layout.
+ // We do this because Clang lowers va_arg in the frontend, and this pass
+ // only sees the low level code that deals with va_list internals.
+ // A much easier alternative (provided that Clang emits va_arg instructions)
+ // would have been to associate each live instance of va_list with a copy of
+ // MSanParamTLS, and extract shadow on va_arg() call in the argument list
+ // order.
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ unsigned GpOffset = 0;
+ unsigned FpOffset = AMD64GpEndOffset;
+ unsigned OverflowOffset = AMD64FpEndOffset;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ unsigned ArgNo = CS.getArgumentNo(ArgIt);
+ bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
+ bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
+ if (IsByVal) {
+ // ByVal arguments always go to the overflow area.
+ // Fixed arguments passed through the overflow area will be stepped
+ // over by va_start, so don't count them towards the offset.
+ if (IsFixed)
+ continue;
+ assert(A->getType()->isPointerTy());
+ Type *RealTy = A->getType()->getPointerElementType();
+ uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
+ Value *ShadowBase = getShadowPtrForVAArgument(
+ RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
+ Value *OriginBase = nullptr;
+ if (MS.TrackOrigins)
+ OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
+ OverflowOffset += alignTo(ArgSize, 8);
+ if (!ShadowBase)
+ continue;
+ Value *ShadowPtr, *OriginPtr;
+ std::tie(ShadowPtr, OriginPtr) =
+ MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
+ /*isStore*/ false);
+
+ IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
+ kShadowTLSAlignment, ArgSize);
+ if (MS.TrackOrigins)
+ IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
+ kShadowTLSAlignment, ArgSize);
+ } else {
+ ArgKind AK = classifyArgument(A);
+ if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
+ AK = AK_Memory;
+ if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
+ AK = AK_Memory;
+ Value *ShadowBase, *OriginBase = nullptr;
+ switch (AK) {
+ case AK_GeneralPurpose:
+ ShadowBase =
+ getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
+ if (MS.TrackOrigins)
+ OriginBase =
+ getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
+ GpOffset += 8;
+ break;
+ case AK_FloatingPoint:
+ ShadowBase =
+ getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
+ if (MS.TrackOrigins)
+ OriginBase =
+ getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
+ FpOffset += 16;
+ break;
+ case AK_Memory:
+ if (IsFixed)
+ continue;
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ ShadowBase =
+ getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
+ if (MS.TrackOrigins)
+ OriginBase =
+ getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+ OverflowOffset += alignTo(ArgSize, 8);
+ }
+ // Take fixed arguments into account for GpOffset and FpOffset,
+ // but don't actually store shadows for them.
+ // TODO(glider): don't call get*PtrForVAArgument() for them.
+ if (IsFixed)
+ continue;
+ if (!ShadowBase)
+ continue;
+ Value *Shadow = MSV.getShadow(A);
+ IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
+ if (MS.TrackOrigins) {
+ Value *Origin = MSV.getOrigin(A);
+ unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+ MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
+ std::max(kShadowTLSAlignment, kMinOriginAlignment));
+ }
+ }
+ }
+ Constant *OverflowSize =
+ ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
+ IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg_va_s");
+ }
+
+ /// Compute the origin address for a given va_arg.
+ Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
+ Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
+ // getOriginPtrForVAArgument() is always called after
+ // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
+ // overflow.
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+ "_msarg_va_o");
+ }
+
+ void unpoisonVAListTagForInst(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) =
+ MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
+ /*isStore*/ true);
+
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants.
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 24, Alignment, false);
+ // We shouldn't need to zero out the origins, as they're only checked for
+ // nonzero shadow.
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ if (F.getCallingConv() == CallingConv::Win64)
+ return;
+ VAStartInstrumentationList.push_back(&I);
+ unpoisonVAListTagForInst(I);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ if (F.getCallingConv() == CallingConv::Win64) return;
+ unpoisonVAListTagForInst(I);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ VAArgOverflowSize =
+ IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+ Value *CopySize =
+ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
+ VAArgOverflowSize);
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ if (MS.TrackOrigins) {
+ VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSOriginCopy, 8, MS.VAArgOriginTLS, 8, CopySize);
+ }
+ }
+
+ // Instrument va_start.
+ // Copy va_list shadow from the backup copy of the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+ Value *VAListTag = OrigInst->getArgOperand(0);
+
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, 16)),
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+ unsigned Alignment = 16;
+ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+ Alignment, /*isStore*/ true);
+ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+ AMD64FpEndOffset);
+ if (MS.TrackOrigins)
+ IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
+ Alignment, AMD64FpEndOffset);
+ Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, 8)),
+ PointerType::get(OverflowArgAreaPtrTy, 0));
+ Value *OverflowArgAreaPtr =
+ IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
+ Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
+ std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
+ MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
+ Alignment, /*isStore*/ true);
+ Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+ AMD64FpEndOffset);
+ IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
+ VAArgOverflowSize);
+ if (MS.TrackOrigins) {
+ SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
+ AMD64FpEndOffset);
+ IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
+ VAArgOverflowSize);
+ }
+ }
+ }
+};
+
+/// MIPS64-specific implementation of VarArgHelper.
+struct VarArgMIPS64Helper : public VarArgHelper {
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy = nullptr;
+ Value *VAArgSize = nullptr;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ unsigned VAArgOffset = 0;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin() +
+ CS.getFunctionType()->getNumParams(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Triple TargetTriple(F.getParent()->getTargetTriple());
+ Value *A = *ArgIt;
+ Value *Base;
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ if (TargetTriple.getArch() == Triple::mips64) {
+ // Adjusting the shadow for argument with size < 8 to match the placement
+ // of bits in big endian system
+ if (ArgSize < 8)
+ VAArgOffset += (8 - ArgSize);
+ }
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
+ VAArgOffset += ArgSize;
+ VAArgOffset = alignTo(VAArgOffset, 8);
+ if (!Base)
+ continue;
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+
+ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
+ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
+ // a new class member i.e. it is the total size of all VarArgs.
+ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 8, Alignment, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 8, Alignment, false);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+ Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
+ VAArgSize);
+
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ }
+
+ // Instrument va_start.
+ // Copy va_list shadow from the backup copy of the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+ Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Value *RegSaveAreaPtrPtr =
+ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+ unsigned Alignment = 8;
+ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+ Alignment, /*isStore*/ true);
+ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+ CopySize);
+ }
+ }
+};
+
+/// AArch64-specific implementation of VarArgHelper.
+struct VarArgAArch64Helper : public VarArgHelper {
+ static const unsigned kAArch64GrArgSize = 64;
+ static const unsigned kAArch64VrArgSize = 128;
+
+ static const unsigned AArch64GrBegOffset = 0;
+ static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
+ // Make VR space aligned to 16 bytes.
+ static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
+ static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
+ + kAArch64VrArgSize;
+ static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
+
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy = nullptr;
+ Value *VAArgOverflowSize = nullptr;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+ VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+
+ ArgKind classifyArgument(Value* arg) {
+ Type *T = arg->getType();
+ if (T->isFPOrFPVectorTy())
+ return AK_FloatingPoint;
+ if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+ || (T->isPointerTy()))
+ return AK_GeneralPurpose;
+ return AK_Memory;
+ }
+
+ // The instrumentation stores the argument shadow in a non ABI-specific
+ // format because it does not know which argument is named (since Clang,
+ // like x86_64 case, lowers the va_args in the frontend and this pass only
+ // sees the low level code that deals with va_list internals).
+ // The first seven GR registers are saved in the first 56 bytes of the
+ // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
+ // the remaining arguments.
+ // Using constant offset within the va_arg TLS array allows fast copy
+ // in the finalize instrumentation.
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ unsigned GrOffset = AArch64GrBegOffset;
+ unsigned VrOffset = AArch64VrBegOffset;
+ unsigned OverflowOffset = AArch64VAEndOffset;
+
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ unsigned ArgNo = CS.getArgumentNo(ArgIt);
+ bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
+ ArgKind AK = classifyArgument(A);
+ if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
+ AK = AK_Memory;
+ if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
+ AK = AK_Memory;
+ Value *Base;
+ switch (AK) {
+ case AK_GeneralPurpose:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
+ GrOffset += 8;
+ break;
+ case AK_FloatingPoint:
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
+ VrOffset += 16;
+ break;
+ case AK_Memory:
+ // Don't count fixed arguments in the overflow area - va_start will
+ // skip right over them.
+ if (IsFixed)
+ continue;
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
+ alignTo(ArgSize, 8));
+ OverflowOffset += alignTo(ArgSize, 8);
+ break;
+ }
+ // Count Gp/Vr fixed arguments to their respective offsets, but don't
+ // bother to actually store a shadow.
+ if (IsFixed)
+ continue;
+ if (!Base)
+ continue;
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+ Constant *OverflowSize =
+ ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
+ IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 32, Alignment, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 32, Alignment, false);
+ }
+
+ // Retrieve a va_list field of 'void*' size.
+ Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+ Value *SaveAreaPtrPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, offset)),
+ Type::getInt64PtrTy(*MS.C));
+ return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
+ }
+
+ // Retrieve a va_list field of 'int' size.
+ Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+ Value *SaveAreaPtr =
+ IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ ConstantInt::get(MS.IntptrTy, offset)),
+ Type::getInt32PtrTy(*MS.C));
+ Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
+ return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ VAArgOverflowSize =
+ IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+ Value *CopySize =
+ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
+ VAArgOverflowSize);
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ }
+
+ Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
+ Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
+
+ // Instrument va_start, copy va_list shadow from the backup copy of
+ // the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+
+ Value *VAListTag = OrigInst->getArgOperand(0);
+
+ // The variadic ABI for AArch64 creates two areas to save the incoming
+ // argument registers (one for 64-bit general register xn-x7 and another
+ // for 128-bit FP/SIMD vn-v7).
+ // We need then to propagate the shadow arguments on both regions
+ // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
+ // The remaning arguments are saved on shadow for 'va::stack'.
+ // One caveat is it requires only to propagate the non-named arguments,
+ // however on the call site instrumentation 'all' the arguments are
+ // saved. So to copy the shadow values from the va_arg TLS array
+ // we need to adjust the offset for both GR and VR fields based on
+ // the __{gr,vr}_offs value (since they are stores based on incoming
+ // named arguments).
+
+ // Read the stack pointer from the va_list.
+ Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
+
+ // Read both the __gr_top and __gr_off and add them up.
+ Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
+ Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
+
+ Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
+
+ // Read both the __vr_top and __vr_off and add them up.
+ Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
+ Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
+
+ Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
+
+ // It does not know how many named arguments is being used and, on the
+ // callsite all the arguments were saved. Since __gr_off is defined as
+ // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
+ // argument by ignoring the bytes of shadow from named arguments.
+ Value *GrRegSaveAreaShadowPtrOff =
+ IRB.CreateAdd(GrArgSize, GrOffSaveArea);
+
+ Value *GrRegSaveAreaShadowPtr =
+ MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+ /*Alignment*/ 8, /*isStore*/ true)
+ .first;
+
+ Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ GrRegSaveAreaShadowPtrOff);
+ Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
+
+ IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, 8, GrSrcPtr, 8, GrCopySize);
+
+ // Again, but for FP/SIMD values.
+ Value *VrRegSaveAreaShadowPtrOff =
+ IRB.CreateAdd(VrArgSize, VrOffSaveArea);
+
+ Value *VrRegSaveAreaShadowPtr =
+ MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+ /*Alignment*/ 8, /*isStore*/ true)
+ .first;
+
+ Value *VrSrcPtr = IRB.CreateInBoundsGEP(
+ IRB.getInt8Ty(),
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ IRB.getInt32(AArch64VrBegOffset)),
+ VrRegSaveAreaShadowPtrOff);
+ Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
+
+ IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, 8, VrSrcPtr, 8, VrCopySize);
+
+ // And finally for remaining arguments.
+ Value *StackSaveAreaShadowPtr =
+ MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
+ /*Alignment*/ 16, /*isStore*/ true)
+ .first;
+
+ Value *StackSrcPtr =
+ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+ IRB.getInt32(AArch64VAEndOffset));
+
+ IRB.CreateMemCpy(StackSaveAreaShadowPtr, 16, StackSrcPtr, 16,
+ VAArgOverflowSize);
+ }
+ }
+};
+
+/// PowerPC64-specific implementation of VarArgHelper.
+struct VarArgPowerPC64Helper : public VarArgHelper {
+ Function &F;
+ MemorySanitizer &MS;
+ MemorySanitizerVisitor &MSV;
+ Value *VAArgTLSCopy = nullptr;
+ Value *VAArgSize = nullptr;
+
+ SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+ VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {}
+
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+ // For PowerPC, we need to deal with alignment of stack arguments -
+ // they are mostly aligned to 8 bytes, but vectors and i128 arrays
+ // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
+ // and QPX vectors are aligned to 32 bytes. For that reason, we
+ // compute current offset from stack pointer (which is always properly
+ // aligned), and offset for the first vararg, then subtract them.
+ unsigned VAArgBase;
+ Triple TargetTriple(F.getParent()->getTargetTriple());
+ // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
+ // and 32 bytes for ABIv2. This is usually determined by target
+ // endianness, but in theory could be overriden by function attribute.
+ // For simplicity, we ignore it here (it'd only matter for QPX vectors).
+ if (TargetTriple.getArch() == Triple::ppc64)
+ VAArgBase = 48;
+ else
+ VAArgBase = 32;
+ unsigned VAArgOffset = VAArgBase;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+ ArgIt != End; ++ArgIt) {
+ Value *A = *ArgIt;
+ unsigned ArgNo = CS.getArgumentNo(ArgIt);
+ bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
+ bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
+ if (IsByVal) {
+ assert(A->getType()->isPointerTy());
+ Type *RealTy = A->getType()->getPointerElementType();
+ uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
+ uint64_t ArgAlign = CS.getParamAlignment(ArgNo);
+ if (ArgAlign < 8)
+ ArgAlign = 8;
+ VAArgOffset = alignTo(VAArgOffset, ArgAlign);
+ if (!IsFixed) {
+ Value *Base = getShadowPtrForVAArgument(
+ RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
+ if (Base) {
+ Value *AShadowPtr, *AOriginPtr;
+ std::tie(AShadowPtr, AOriginPtr) =
+ MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
+ kShadowTLSAlignment, /*isStore*/ false);
+
+ IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
+ kShadowTLSAlignment, ArgSize);
+ }
+ }
+ VAArgOffset += alignTo(ArgSize, 8);
+ } else {
+ Value *Base;
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+ uint64_t ArgAlign = 8;
+ if (A->getType()->isArrayTy()) {
+ // Arrays are aligned to element size, except for long double
+ // arrays, which are aligned to 8 bytes.
+ Type *ElementTy = A->getType()->getArrayElementType();
+ if (!ElementTy->isPPC_FP128Ty())
+ ArgAlign = DL.getTypeAllocSize(ElementTy);
+ } else if (A->getType()->isVectorTy()) {
+ // Vectors are naturally aligned.
+ ArgAlign = DL.getTypeAllocSize(A->getType());
+ }
+ if (ArgAlign < 8)
+ ArgAlign = 8;
+ VAArgOffset = alignTo(VAArgOffset, ArgAlign);
+ if (DL.isBigEndian()) {
+ // Adjusting the shadow for argument with size < 8 to match the placement
+ // of bits in big endian system
+ if (ArgSize < 8)
+ VAArgOffset += (8 - ArgSize);
+ }
+ if (!IsFixed) {
+ Base = getShadowPtrForVAArgument(A->getType(), IRB,
+ VAArgOffset - VAArgBase, ArgSize);
+ if (Base)
+ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+ }
+ VAArgOffset += ArgSize;
+ VAArgOffset = alignTo(VAArgOffset, 8);
+ }
+ if (IsFixed)
+ VAArgBase = VAArgOffset;
+ }
+
+ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(),
+ VAArgOffset - VAArgBase);
+ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
+ // a new class member i.e. it is the total size of all VarArgs.
+ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
+ }
+
+ /// Compute the shadow address for a given va_arg.
+ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+ unsigned ArgOffset, unsigned ArgSize) {
+ // Make sure we don't overflow __msan_va_arg_tls.
+ if (ArgOffset + ArgSize > kParamTLSSize)
+ return nullptr;
+ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+ "_msarg");
+ }
+
+ void visitVAStartInst(VAStartInst &I) override {
+ IRBuilder<> IRB(&I);
+ VAStartInstrumentationList.push_back(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 8, Alignment, false);
+ }
+
+ void visitVACopyInst(VACopyInst &I) override {
+ IRBuilder<> IRB(&I);
+ Value *VAListTag = I.getArgOperand(0);
+ Value *ShadowPtr, *OriginPtr;
+ unsigned Alignment = 8;
+ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
+ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
+ // Unpoison the whole __va_list_tag.
+ // FIXME: magic ABI constants.
+ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+ /* size */ 8, Alignment, false);
+ }
+
+ void finalizeInstrumentation() override {
+ assert(!VAArgSize && !VAArgTLSCopy &&
+ "finalizeInstrumentation called twice");
+ IRBuilder<> IRB(MSV.ActualFnStart->getFirstNonPHI());
+ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
+ Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0),
+ VAArgSize);
+
+ if (!VAStartInstrumentationList.empty()) {
+ // If there is a va_start in this function, make a backup copy of
+ // va_arg_tls somewhere in the function entry block.
+ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+ IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize);
+ }
+
+ // Instrument va_start.
+ // Copy va_list shadow from the backup copy of the TLS contents.
+ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+ CallInst *OrigInst = VAStartInstrumentationList[i];
+ IRBuilder<> IRB(OrigInst->getNextNode());
+ Value *VAListTag = OrigInst->getArgOperand(0);
+ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
+ Value *RegSaveAreaPtrPtr =
+ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+ PointerType::get(RegSaveAreaPtrTy, 0));
+ Value *RegSaveAreaPtr =
+ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
+ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
+ unsigned Alignment = 8;
+ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
+ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
+ Alignment, /*isStore*/ true);
+ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
+ CopySize);
+ }
+ }
+};
+
+/// A no-op implementation of VarArgHelper.
+struct VarArgNoOpHelper : public VarArgHelper {
+ VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
+ MemorySanitizerVisitor &MSV) {}
+
+ void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {}
+
+ void visitVAStartInst(VAStartInst &I) override {}
+
+ void visitVACopyInst(VACopyInst &I) override {}
+
+ void finalizeInstrumentation() override {}
+};
+
+} // end anonymous namespace
+
+static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+ MemorySanitizerVisitor &Visitor) {
+ // VarArg handling is only implemented on AMD64. False positives are possible
+ // on other platforms.
+ Triple TargetTriple(Func.getParent()->getTargetTriple());
+ if (TargetTriple.getArch() == Triple::x86_64)
+ return new VarArgAMD64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.isMIPS64())
+ return new VarArgMIPS64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.getArch() == Triple::aarch64)
+ return new VarArgAArch64Helper(Func, Msan, Visitor);
+ else if (TargetTriple.getArch() == Triple::ppc64 ||
+ TargetTriple.getArch() == Triple::ppc64le)
+ return new VarArgPowerPC64Helper(Func, Msan, Visitor);
+ else
+ return new VarArgNoOpHelper(Func, Msan, Visitor);
+}
+
+bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
+ if (!CompileKernel && F.getName() == kMsanModuleCtorName)
+ return false;
+
+ MemorySanitizerVisitor Visitor(F, *this, TLI);
+
+ // Clear out readonly/readnone attributes.
+ AttrBuilder B;
+ B.addAttribute(Attribute::ReadOnly)
+ .addAttribute(Attribute::ReadNone);
+ F.removeAttributes(AttributeList::FunctionIndex, B);
+
+ return Visitor.runOnFunction();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
new file mode 100644
index 000000000000..ca1bb62389e9
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -0,0 +1,1814 @@
+//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PGO instrumentation using a minimum spanning tree based
+// on the following paper:
+// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
+// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
+// Issue 3, pp 313-322
+// The idea of the algorithm based on the fact that for each node (except for
+// the entry and exit), the sum of incoming edge counts equals the sum of
+// outgoing edge counts. The count of edge on spanning tree can be derived from
+// those edges not on the spanning tree. Knuth proves this method instruments
+// the minimum number of edges.
+//
+// The minimal spanning tree here is actually a maximum weight tree -- on-tree
+// edges have higher frequencies (more likely to execute). The idea is to
+// instrument those less frequently executed edges to reduce the runtime
+// overhead of instrumented binaries.
+//
+// This file contains two passes:
+// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
+// count profile, and generates the instrumentation for indirect call
+// profiling.
+// (2) Pass PGOInstrumentationUse which reads the edge count profile and
+// annotates the branch weights. It also reads the indirect call value
+// profiling records and annotate the indirect call instructions.
+//
+// To get the precise counter information, These two passes need to invoke at
+// the same compilation point (so they see the same IR). For pass
+// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
+// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
+// the profile is opened in module level and passed to each PGOUseFunc instance.
+// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
+// in class FuncPGOInstrumentation.
+//
+// Class PGOEdge represents a CFG edge and some auxiliary information. Class
+// BBInfo contains auxiliary information for each BB. These two classes are used
+// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
+// class of PGOEdge and BBInfo, respectively. They contains extra data structure
+// used in populating profile counters.
+// The MST implementation is in Class CFGMST (CFGMST.h).
+//
+//===----------------------------------------------------------------------===//
+
+#include "CFGMST.h"
+#include "ValueProfileCollector.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using ProfileCount = Function::ProfileCount;
+using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
+
+#define DEBUG_TYPE "pgo-instrumentation"
+
+STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
+STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
+STATISTIC(NumOfPGOEdge, "Number of edges.");
+STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
+STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
+STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
+STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
+STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
+STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOSelectInsts,
+ "Number of select instruction instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOMemIntrinsics,
+ "Number of mem intrinsics instrumented in CSPGO.");
+STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
+STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
+STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
+STATISTIC(NumOfCSPGOFunc,
+ "Number of functions having valid profile counts in CSPGO.");
+STATISTIC(NumOfCSPGOMismatch,
+ "Number of functions having mismatch profile in CSPGO.");
+STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
+
+// Command line option to specify the file to read profile from. This is
+// mainly used for testing.
+static cl::opt<std::string>
+ PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path of profile data file. This is"
+ "mainly for test purpose."));
+static cl::opt<std::string> PGOTestProfileRemappingFile(
+ "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path of profile remapping file. This is mainly for "
+ "test purpose."));
+
+// Command line option to disable value profiling. The default is false:
+// i.e. value profiling is enabled by default. This is for debug purpose.
+static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
+ cl::Hidden,
+ cl::desc("Disable Value Profiling"));
+
+// Command line option to set the maximum number of VP annotations to write to
+// the metadata for a single indirect call callsite.
+static cl::opt<unsigned> MaxNumAnnotations(
+ "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of annotations for a single indirect "
+ "call callsite"));
+
+// Command line option to set the maximum number of value annotations
+// to write to the metadata for a single memop intrinsic.
+static cl::opt<unsigned> MaxNumMemOPAnnotations(
+ "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of preicise value annotations for a single memop"
+ "intrinsic"));
+
+// Command line option to control appending FunctionHash to the name of a COMDAT
+// function. This is to avoid the hash mismatch caused by the preinliner.
+static cl::opt<bool> DoComdatRenaming(
+ "do-comdat-renaming", cl::init(false), cl::Hidden,
+ cl::desc("Append function hash to the name of COMDAT function to avoid "
+ "function hash mismatch due to the preinliner"));
+
+// Command line option to enable/disable the warning about missing profile
+// information.
+static cl::opt<bool>
+ PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about missing profile data for "
+ "functions."));
+
+// Command line option to enable/disable the warning about a hash mismatch in
+// the profile data.
+static cl::opt<bool>
+ NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn off/on "
+ "warnings about profile cfg mismatch."));
+
+// Command line option to enable/disable the warning about a hash mismatch in
+// the profile data for Comdat functions, which often turns out to be false
+// positive due to the pre-instrumentation inline.
+static cl::opt<bool>
+ NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
+ cl::Hidden,
+ cl::desc("The option is used to turn on/off "
+ "warnings about hash mismatch for comdat "
+ "functions."));
+
+// Command line option to enable/disable select instruction instrumentation.
+static cl::opt<bool>
+ PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
+ cl::desc("Use this option to turn on/off SELECT "
+ "instruction instrumentation. "));
+
+// Command line option to turn on CFG dot or text dump of raw profile counts
+static cl::opt<PGOViewCountsType> PGOViewRawCounts(
+ "pgo-view-raw-counts", cl::Hidden,
+ cl::desc("A boolean option to show CFG dag or text "
+ "with raw profile counts from "
+ "profile data. See also option "
+ "-pgo-view-counts. To limit graph "
+ "display to only one function, use "
+ "filtering option -view-bfi-func-name."),
+ cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
+ clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
+ clEnumValN(PGOVCT_Text, "text", "show in text.")));
+
+// Command line option to enable/disable memop intrinsic call.size profiling.
+static cl::opt<bool>
+ PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "memory intrinsic size profiling."));
+
+// Emit branch probability as optimization remarks.
+static cl::opt<bool>
+ EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
+ cl::desc("When this option is on, the annotated "
+ "branch probability will be emitted as "
+ "optimization remarks: -{Rpass|"
+ "pass-remarks}=pgo-instrumentation"));
+
+// Command line option to turn on CFG dot dump after profile annotation.
+// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
+extern cl::opt<PGOViewCountsType> PGOViewCounts;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+// Return a string describing the branch condition that can be
+// used in static branch probability heuristics:
+static std::string getBranchCondString(Instruction *TI) {
+ BranchInst *BI = dyn_cast<BranchInst>(TI);
+ if (!BI || !BI->isConditional())
+ return std::string();
+
+ Value *Cond = BI->getCondition();
+ ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+ if (!CI)
+ return std::string();
+
+ std::string result;
+ raw_string_ostream OS(result);
+ OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
+ CI->getOperand(0)->getType()->print(OS, true);
+
+ Value *RHS = CI->getOperand(1);
+ ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
+ if (CV) {
+ if (CV->isZero())
+ OS << "_Zero";
+ else if (CV->isOne())
+ OS << "_One";
+ else if (CV->isMinusOne())
+ OS << "_MinusOne";
+ else
+ OS << "_Const";
+ }
+ OS.flush();
+ return result;
+}
+
+static const char *ValueProfKindDescr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+namespace {
+
+/// The select instruction visitor plays three roles specified
+/// by the mode. In \c VM_counting mode, it simply counts the number of
+/// select instructions. In \c VM_instrument mode, it inserts code to count
+/// the number times TrueValue of select is taken. In \c VM_annotate mode,
+/// it reads the profile data and annotate the select instruction with metadata.
+enum VisitMode { VM_counting, VM_instrument, VM_annotate };
+class PGOUseFunc;
+
+/// Instruction Visitor class to visit select instructions.
+struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
+ Function &F;
+ unsigned NSIs = 0; // Number of select instructions instrumented.
+ VisitMode Mode = VM_counting; // Visiting mode.
+ unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
+ unsigned TotalNumCtrs = 0; // Total number of counters
+ GlobalVariable *FuncNameVar = nullptr;
+ uint64_t FuncHash = 0;
+ PGOUseFunc *UseFunc = nullptr;
+
+ SelectInstVisitor(Function &Func) : F(Func) {}
+
+ void countSelects(Function &Func) {
+ NSIs = 0;
+ Mode = VM_counting;
+ visit(Func);
+ }
+
+ // Visit the IR stream and instrument all select instructions. \p
+ // Ind is a pointer to the counter index variable; \p TotalNC
+ // is the total number of counters; \p FNV is the pointer to the
+ // PGO function name var; \p FHash is the function hash.
+ void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
+ GlobalVariable *FNV, uint64_t FHash) {
+ Mode = VM_instrument;
+ CurCtrIdx = Ind;
+ TotalNumCtrs = TotalNC;
+ FuncHash = FHash;
+ FuncNameVar = FNV;
+ visit(Func);
+ }
+
+ // Visit the IR stream and annotate all select instructions.
+ void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
+ Mode = VM_annotate;
+ UseFunc = UF;
+ CurCtrIdx = Ind;
+ visit(Func);
+ }
+
+ void instrumentOneSelectInst(SelectInst &SI);
+ void annotateOneSelectInst(SelectInst &SI);
+
+ // Visit \p SI instruction and perform tasks according to visit mode.
+ void visitSelectInst(SelectInst &SI);
+
+ // Return the number of select instructions. This needs be called after
+ // countSelects().
+ unsigned getNumOfSelectInsts() const { return NSIs; }
+};
+
+
+class PGOInstrumentationGenLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ PGOInstrumentationGenLegacyPass(bool IsCS = false)
+ : ModulePass(ID), IsCS(IsCS) {
+ initializePGOInstrumentationGenLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
+
+private:
+ // Is this is context-sensitive instrumentation.
+ bool IsCS;
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ }
+};
+
+class PGOInstrumentationUseLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ // Provide the profile filename as the parameter.
+ PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
+ : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
+ if (!PGOTestProfileFile.empty())
+ ProfileFileName = PGOTestProfileFile;
+ initializePGOInstrumentationUseLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
+
+private:
+ std::string ProfileFileName;
+ // Is this is context-sensitive instrumentation use.
+ bool IsCS;
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ }
+};
+
+class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
+public:
+ static char ID;
+ StringRef getPassName() const override {
+ return "PGOInstrumentationGenCreateVarPass";
+ }
+ PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
+ : ModulePass(ID), InstrProfileOutput(CSInstrName) {
+ initializePGOInstrumentationGenCreateVarLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+private:
+ bool runOnModule(Module &M) override {
+ createProfileFileNameVar(M, InstrProfileOutput);
+ createIRLevelProfileFlagVar(M, true);
+ return false;
+ }
+ std::string InstrProfileOutput;
+};
+
+} // end anonymous namespace
+
+char PGOInstrumentationGenLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
+ "PGO instrumentation.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
+ "PGO instrumentation.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
+ return new PGOInstrumentationGenLegacyPass(IsCS);
+}
+
+char PGOInstrumentationUseLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
+ "Read PGO instrumentation profile.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
+ "Read PGO instrumentation profile.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
+ bool IsCS) {
+ return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
+}
+
+char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
+
+INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
+ "pgo-instr-gen-create-var",
+ "Create PGO instrumentation version variable for CSPGO.", false,
+ false)
+
+ModulePass *
+llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
+ return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
+}
+
+namespace {
+
+/// An MST based instrumentation for PGO
+///
+/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
+/// in the function level.
+struct PGOEdge {
+ // This class implements the CFG edges. Note the CFG can be a multi-graph.
+ // So there might be multiple edges with same SrcBB and DestBB.
+ const BasicBlock *SrcBB;
+ const BasicBlock *DestBB;
+ uint64_t Weight;
+ bool InMST = false;
+ bool Removed = false;
+ bool IsCritical = false;
+
+ PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
+ : SrcBB(Src), DestBB(Dest), Weight(W) {}
+
+ // Return the information string of an edge.
+ const std::string infoString() const {
+ return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
+ (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
+ }
+};
+
+// This class stores the auxiliary information for each BB.
+struct BBInfo {
+ BBInfo *Group;
+ uint32_t Index;
+ uint32_t Rank = 0;
+
+ BBInfo(unsigned IX) : Group(this), Index(IX) {}
+
+ // Return the information string of this object.
+ const std::string infoString() const {
+ return (Twine("Index=") + Twine(Index)).str();
+ }
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+
+ // Empty function -- only applicable to UseBBInfo.
+ void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
+};
+
+// This class implements the CFG edges. Note the CFG can be a multi-graph.
+template <class Edge, class BBInfo> class FuncPGOInstrumentation {
+private:
+ Function &F;
+
+ // Is this is context-sensitive instrumentation.
+ bool IsCS;
+
+ // A map that stores the Comdat group in function F.
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
+
+ ValueProfileCollector VPC;
+
+ void computeCFGHash();
+ void renameComdatFunction();
+
+public:
+ std::vector<std::vector<VPCandidateInfo>> ValueSites;
+ SelectInstVisitor SIVisitor;
+ std::string FuncName;
+ GlobalVariable *FuncNameVar;
+
+ // CFG hash value for this function.
+ uint64_t FunctionHash = 0;
+
+ // The Minimum Spanning Tree of function CFG.
+ CFGMST<Edge, BBInfo> MST;
+
+ // Collect all the BBs that will be instrumented, and store them in
+ // InstrumentBBs.
+ void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
+
+ // Give an edge, find the BB that will be instrumented.
+ // Return nullptr if there is no BB to be instrumented.
+ BasicBlock *getInstrBB(Edge *E);
+
+ // Return the auxiliary BB information.
+ BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
+
+ // Return the auxiliary BB information if available.
+ BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
+
+ // Dump edges and BB information.
+ void dumpInfo(std::string Str = "") const {
+ MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
+ Twine(FunctionHash) + "\t" + Str);
+ }
+
+ FuncPGOInstrumentation(
+ Function &Func,
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+ bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
+ BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
+ : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func),
+ ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) {
+ // This should be done before CFG hash computation.
+ SIVisitor.countSelects(Func);
+ ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
+ if (!IsCS) {
+ NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+ NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
+ NumOfPGOBB += MST.BBInfos.size();
+ ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
+ } else {
+ NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+ NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
+ NumOfCSPGOBB += MST.BBInfos.size();
+ }
+
+ FuncName = getPGOFuncName(F);
+ computeCFGHash();
+ if (!ComdatMembers.empty())
+ renameComdatFunction();
+ LLVM_DEBUG(dumpInfo("after CFGMST"));
+
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
+ if (!E->InMST)
+ IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
+ }
+
+ if (CreateGlobalVar)
+ FuncNameVar = createPGOFuncNameVar(F, FuncName);
+ }
+};
+
+} // end anonymous namespace
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
+ std::vector<uint8_t> Indexes;
+ JamCRC JC;
+ for (auto &BB : F) {
+ const Instruction *TI = BB.getTerminator();
+ for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+ BasicBlock *Succ = TI->getSuccessor(I);
+ auto BI = findBBInfo(Succ);
+ if (BI == nullptr)
+ continue;
+ uint32_t Index = BI->Index;
+ for (int J = 0; J < 4; J++)
+ Indexes.push_back((uint8_t)(Index >> (J * 8)));
+ }
+ }
+ JC.update(Indexes);
+
+ // Hash format for context sensitive profile. Reserve 4 bits for other
+ // information.
+ FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
+ (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
+ //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
+ (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+ // Reserve bit 60-63 for other information purpose.
+ FunctionHash &= 0x0FFFFFFFFFFFFFFF;
+ if (IsCS)
+ NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
+ LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
+ << " CRC = " << JC.getCRC()
+ << ", Selects = " << SIVisitor.getNumOfSelectInsts()
+ << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
+ << ValueSites[IPVK_IndirectCallTarget].size()
+ << ", Hash = " << FunctionHash << "\n";);
+}
+
+// Check if we can safely rename this Comdat function.
+static bool canRenameComdat(
+ Function &F,
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+ if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
+ return false;
+
+ // FIXME: Current only handle those Comdat groups that only containing one
+ // function and function aliases.
+ // (1) For a Comdat group containing multiple functions, we need to have a
+ // unique postfix based on the hashes for each function. There is a
+ // non-trivial code refactoring to do this efficiently.
+ // (2) Variables can not be renamed, so we can not rename Comdat function in a
+ // group including global vars.
+ Comdat *C = F.getComdat();
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
+ if (dyn_cast<GlobalAlias>(CM.second))
+ continue;
+ Function *FM = dyn_cast<Function>(CM.second);
+ if (FM != &F)
+ return false;
+ }
+ return true;
+}
+
+// Append the CFGHash to the Comdat function name.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
+ if (!canRenameComdat(F, ComdatMembers))
+ return;
+ std::string OrigName = F.getName().str();
+ std::string NewFuncName =
+ Twine(F.getName() + "." + Twine(FunctionHash)).str();
+ F.setName(Twine(NewFuncName));
+ GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
+ FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
+ Comdat *NewComdat;
+ Module *M = F.getParent();
+ // For AvailableExternallyLinkage functions, change the linkage to
+ // LinkOnceODR and put them into comdat. This is because after renaming, there
+ // is no backup external copy available for the function.
+ if (!F.hasComdat()) {
+ assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
+ NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
+ F.setLinkage(GlobalValue::LinkOnceODRLinkage);
+ F.setComdat(NewComdat);
+ return;
+ }
+
+ // This function belongs to a single function Comdat group.
+ Comdat *OrigComdat = F.getComdat();
+ std::string NewComdatName =
+ Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
+ NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
+ NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
+
+ for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
+ // For aliases, change the name directly.
+ assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
+ std::string OrigGAName = GA->getName().str();
+ GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
+ GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
+ continue;
+ }
+ // Must be a function.
+ Function *CF = dyn_cast<Function>(CM.second);
+ assert(CF);
+ CF->setComdat(NewComdat);
+ }
+}
+
+// Collect all the BBs that will be instruments and return them in
+// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
+ std::vector<BasicBlock *> &InstrumentBBs) {
+ // Use a worklist as we will update the vector during the iteration.
+ std::vector<Edge *> EdgeList;
+ EdgeList.reserve(MST.AllEdges.size());
+ for (auto &E : MST.AllEdges)
+ EdgeList.push_back(E.get());
+
+ for (auto &E : EdgeList) {
+ BasicBlock *InstrBB = getInstrBB(E);
+ if (InstrBB)
+ InstrumentBBs.push_back(InstrBB);
+ }
+
+ // Set up InEdges/OutEdges for all BBs.
+ for (auto &E : MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ BBInfo &SrcInfo = getBBInfo(SrcBB);
+ BBInfo &DestInfo = getBBInfo(DestBB);
+ SrcInfo.addOutEdge(E.get());
+ DestInfo.addInEdge(E.get());
+ }
+}
+
+// Given a CFG E to be instrumented, find which BB to place the instrumented
+// code. The function will split the critical edge if necessary.
+template <class Edge, class BBInfo>
+BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
+ if (E->InMST || E->Removed)
+ return nullptr;
+
+ BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+ BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+ // For a fake edge, instrument the real BB.
+ if (SrcBB == nullptr)
+ return DestBB;
+ if (DestBB == nullptr)
+ return SrcBB;
+
+ auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
+ // There are basic blocks (such as catchswitch) cannot be instrumented.
+ // If the returned first insertion point is the end of BB, skip this BB.
+ if (BB->getFirstInsertionPt() == BB->end())
+ return nullptr;
+ return BB;
+ };
+
+ // Instrument the SrcBB if it has a single successor,
+ // otherwise, the DestBB if this is not a critical edge.
+ Instruction *TI = SrcBB->getTerminator();
+ if (TI->getNumSuccessors() <= 1)
+ return canInstrument(SrcBB);
+ if (!E->IsCritical)
+ return canInstrument(DestBB);
+
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+ if (!InstrBB) {
+ LLVM_DEBUG(
+ dbgs() << "Fail to split critical edge: not instrument this edge.\n");
+ return nullptr;
+ }
+ // For a critical edge, we have to split. Instrument the newly
+ // created BB.
+ IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
+ LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
+ << " --> " << getBBInfo(DestBB).Index << "\n");
+ // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
+ MST.addEdge(SrcBB, InstrBB, 0);
+ // Second one: Add new edge of InstrBB->DestBB.
+ Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
+ NewEdge1.InMST = true;
+ E->Removed = true;
+
+ return canInstrument(InstrBB);
+}
+
+// Visit all edge and instrument the edges not in MST, and do value profiling.
+// Critical edges will be split.
+static void instrumentOneFunc(
+ Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+ bool IsCS) {
+ // Split indirectbr critical edges here before computing the MST rather than
+ // later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, BPI, BFI);
+
+ FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
+ BFI, IsCS);
+ std::vector<BasicBlock *> InstrumentBBs;
+ FuncInfo.getInstrumentBBs(InstrumentBBs);
+ unsigned NumCounters =
+ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+
+ uint32_t I = 0;
+ Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+ for (auto *InstrBB : InstrumentBBs) {
+ IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+ assert(Builder.GetInsertPoint() != InstrBB->end() &&
+ "Cannot get the Instrumentation point");
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+ {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
+ Builder.getInt32(I++)});
+ }
+
+ // Now instrument select instructions:
+ FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
+ FuncInfo.FunctionHash);
+ assert(I == NumCounters);
+
+ if (DisableValueProfiling)
+ return;
+
+ NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
+
+ // For each VP Kind, walk the VP candidates and instrument each one.
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+ unsigned SiteIndex = 0;
+ if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
+ continue;
+
+ for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
+ LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
+ << " site: CallSite Index = " << SiteIndex << "\n");
+
+ IRBuilder<> Builder(Cand.InsertPt);
+ assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
+ "Cannot get the Instrumentation point");
+
+ Value *ToProfile = nullptr;
+ if (Cand.V->getType()->isIntegerTy())
+ ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
+ else if (Cand.V->getType()->isPointerTy())
+ ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
+ assert(ToProfile && "value profiling Value is of unexpected type");
+
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
+ {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
+ Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)});
+ }
+ } // IPVK_First <= Kind <= IPVK_Last
+}
+
+namespace {
+
+// This class represents a CFG edge in profile use compilation.
+struct PGOUseEdge : public PGOEdge {
+ bool CountValid = false;
+ uint64_t CountValue = 0;
+
+ PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
+ : PGOEdge(Src, Dest, W) {}
+
+ // Set edge count value
+ void setEdgeCount(uint64_t Value) {
+ CountValue = Value;
+ CountValid = true;
+ }
+
+ // Return the information string for this object.
+ const std::string infoString() const {
+ if (!CountValid)
+ return PGOEdge::infoString();
+ return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
+ .str();
+ }
+};
+
+using DirectEdges = SmallVector<PGOUseEdge *, 2>;
+
+// This class stores the auxiliary information for each BB.
+struct UseBBInfo : public BBInfo {
+ uint64_t CountValue = 0;
+ bool CountValid;
+ int32_t UnknownCountInEdge = 0;
+ int32_t UnknownCountOutEdge = 0;
+ DirectEdges InEdges;
+ DirectEdges OutEdges;
+
+ UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
+
+ UseBBInfo(unsigned IX, uint64_t C)
+ : BBInfo(IX), CountValue(C), CountValid(true) {}
+
+ // Set the profile count value for this BB.
+ void setBBInfoCount(uint64_t Value) {
+ CountValue = Value;
+ CountValid = true;
+ }
+
+ // Return the information string of this object.
+ const std::string infoString() const {
+ if (!CountValid)
+ return BBInfo::infoString();
+ return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
+ }
+
+ // Add an OutEdge and update the edge count.
+ void addOutEdge(PGOUseEdge *E) {
+ OutEdges.push_back(E);
+ UnknownCountOutEdge++;
+ }
+
+ // Add an InEdge and update the edge count.
+ void addInEdge(PGOUseEdge *E) {
+ InEdges.push_back(E);
+ UnknownCountInEdge++;
+ }
+};
+
+} // end anonymous namespace
+
+// Sum up the count values for all the edges.
+static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
+ uint64_t Total = 0;
+ for (auto &E : Edges) {
+ if (E->Removed)
+ continue;
+ Total += E->CountValue;
+ }
+ return Total;
+}
+
+namespace {
+
+class PGOUseFunc {
+public:
+ PGOUseFunc(Function &Func, Module *Modu,
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+ BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
+ ProfileSummaryInfo *PSI, bool IsCS)
+ : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
+ FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
+ FreqAttr(FFA_Normal), IsCS(IsCS) {}
+
+ // Read counts for the instrumented BB from profile.
+ bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
+
+ // Populate the counts for all BBs.
+ void populateCounters();
+
+ // Set the branch weights based on the count values.
+ void setBranchWeights();
+
+ // Annotate the value profile call sites for all value kind.
+ void annotateValueSites();
+
+ // Annotate the value profile call sites for one value kind.
+ void annotateValueSites(uint32_t Kind);
+
+ // Annotate the irreducible loop header weights.
+ void annotateIrrLoopHeaderWeights();
+
+ // The hotness of the function from the profile count.
+ enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
+
+ // Return the function hotness from the profile.
+ FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
+
+ // Return the function hash.
+ uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
+
+ // Return the profile record for this function;
+ InstrProfRecord &getProfileRecord() { return ProfileRecord; }
+
+ // Return the auxiliary BB information.
+ UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+ return FuncInfo.getBBInfo(BB);
+ }
+
+ // Return the auxiliary BB information if available.
+ UseBBInfo *findBBInfo(const BasicBlock *BB) const {
+ return FuncInfo.findBBInfo(BB);
+ }
+
+ Function &getFunc() const { return F; }
+
+ void dumpInfo(std::string Str = "") const {
+ FuncInfo.dumpInfo(Str);
+ }
+
+ uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
+private:
+ Function &F;
+ Module *M;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
+
+ // This member stores the shared information with class PGOGenFunc.
+ FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
+
+ // The maximum count value in the profile. This is only used in PGO use
+ // compilation.
+ uint64_t ProgramMaxCount;
+
+ // Position of counter that remains to be read.
+ uint32_t CountPosition = 0;
+
+ // Total size of the profile count for this function.
+ uint32_t ProfileCountSize = 0;
+
+ // ProfileRecord for this function.
+ InstrProfRecord ProfileRecord;
+
+ // Function hotness info derived from profile.
+ FuncFreqAttr FreqAttr;
+
+ // Is to use the context sensitive profile.
+ bool IsCS;
+
+ // Find the Instrumented BB and set the value. Return false on error.
+ bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+
+ // Set the edge counter value for the unknown edge -- there should be only
+ // one unknown edge.
+ void setEdgeCount(DirectEdges &Edges, uint64_t Value);
+
+ // Return FuncName string;
+ const std::string getFuncName() const { return FuncInfo.FuncName; }
+
+ // Set the hot/cold inline hints based on the count values.
+ // FIXME: This function should be removed once the functionality in
+ // the inliner is implemented.
+ void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
+ if (PSI->isHotCount(EntryCount))
+ FreqAttr = FFA_Hot;
+ else if (PSI->isColdCount(MaxCount))
+ FreqAttr = FFA_Cold;
+ }
+};
+
+} // end anonymous namespace
+
+// Visit all the edges and assign the count value for the instrumented
+// edges and the BB. Return false on error.
+bool PGOUseFunc::setInstrumentedCounts(
+ const std::vector<uint64_t> &CountFromProfile) {
+
+ std::vector<BasicBlock *> InstrumentBBs;
+ FuncInfo.getInstrumentBBs(InstrumentBBs);
+ unsigned NumCounters =
+ InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
+ // The number of counters here should match the number of counters
+ // in profile. Return if they mismatch.
+ if (NumCounters != CountFromProfile.size()) {
+ return false;
+ }
+ // Set the profile count to the Instrumented BBs.
+ uint32_t I = 0;
+ for (BasicBlock *InstrBB : InstrumentBBs) {
+ uint64_t CountValue = CountFromProfile[I++];
+ UseBBInfo &Info = getBBInfo(InstrBB);
+ Info.setBBInfoCount(CountValue);
+ }
+ ProfileCountSize = CountFromProfile.size();
+ CountPosition = I;
+
+ // Set the edge count and update the count of unknown edges for BBs.
+ auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
+ E->setEdgeCount(Value);
+ this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ this->getBBInfo(E->DestBB).UnknownCountInEdge--;
+ };
+
+ // Set the profile count the Instrumented edges. There are BBs that not in
+ // MST but not instrumented. Need to set the edge count value so that we can
+ // populate the profile counts later.
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed || E->InMST)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+
+ // If only one out-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
+ setEdgeCount(E.get(), SrcInfo.CountValue);
+ else {
+ const BasicBlock *DestBB = E->DestBB;
+ UseBBInfo &DestInfo = getBBInfo(DestBB);
+ // If only one in-edge, the edge profile count should be the same as BB
+ // profile count.
+ if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
+ setEdgeCount(E.get(), DestInfo.CountValue);
+ }
+ if (E->CountValid)
+ continue;
+ // E's count should have been set from profile. If not, this meenas E skips
+ // the instrumentation. We set the count to 0.
+ setEdgeCount(E.get(), 0);
+ }
+ return true;
+}
+
+// Set the count value for the unknown edge. There should be one and only one
+// unknown edge in Edges vector.
+void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
+ for (auto &E : Edges) {
+ if (E->CountValid)
+ continue;
+ E->setEdgeCount(Value);
+
+ getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+ getBBInfo(E->DestBB).UnknownCountInEdge--;
+ return;
+ }
+ llvm_unreachable("Cannot find the unknown count edge");
+}
+
+// Read the profile from ProfileFileName and assign the value to the
+// instrumented BB and the edges. This function also updates ProgramMaxCount.
+// Return true if the profile are successfully read, and false on errors.
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
+ auto &Ctx = M->getContext();
+ Expected<InstrProfRecord> Result =
+ PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
+ if (Error E = Result.takeError()) {
+ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ auto Err = IPE.get();
+ bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+ << FuncInfo.FuncName << ": ");
+ if (Err == instrprof_error::unknown_function) {
+ IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
+ SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
+ } else if (Err == instrprof_error::hash_mismatch ||
+ Err == instrprof_error::malformed) {
+ IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
+ SkipWarning =
+ NoPGOWarnMismatch ||
+ (NoPGOWarnMismatchComdat &&
+ (F.hasComdat() ||
+ F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+ }
+
+ LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
+ if (SkipWarning)
+ return;
+
+ std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
+ std::string(" Hash = ") +
+ std::to_string(FuncInfo.FunctionHash);
+
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ });
+ return false;
+ }
+ ProfileRecord = std::move(Result.get());
+ std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
+
+ IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
+ LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+ uint64_t ValueSum = 0;
+ for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
+ LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
+ ValueSum += CountFromProfile[I];
+ }
+ AllZeros = (ValueSum == 0);
+
+ LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
+
+ getBBInfo(nullptr).UnknownCountOutEdge = 2;
+ getBBInfo(nullptr).UnknownCountInEdge = 2;
+
+ if (!setInstrumentedCounts(CountFromProfile)) {
+ LLVM_DEBUG(
+ dbgs() << "Inconsistent number of counts, skipping this function");
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M->getName().data(),
+ Twine("Inconsistent number of counts in ") + F.getName().str()
+ + Twine(": the profile may be stale or there is a function name collision."),
+ DS_Warning));
+ return false;
+ }
+ ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
+ return true;
+}
+
+// Populate the counters from instrumented BBs to all BBs.
+// In the end of this operation, all BBs should have a valid count value.
+void PGOUseFunc::populateCounters() {
+ bool Changes = true;
+ unsigned NumPasses = 0;
+ while (Changes) {
+ NumPasses++;
+ Changes = false;
+
+ // For efficient traversal, it's better to start from the end as most
+ // of the instrumented edges are at the end.
+ for (auto &BB : reverse(F)) {
+ UseBBInfo *Count = findBBInfo(&BB);
+ if (Count == nullptr)
+ continue;
+ if (!Count->CountValid) {
+ if (Count->UnknownCountOutEdge == 0) {
+ Count->CountValue = sumEdgeCount(Count->OutEdges);
+ Count->CountValid = true;
+ Changes = true;
+ } else if (Count->UnknownCountInEdge == 0) {
+ Count->CountValue = sumEdgeCount(Count->InEdges);
+ Count->CountValid = true;
+ Changes = true;
+ }
+ }
+ if (Count->CountValid) {
+ if (Count->UnknownCountOutEdge == 1) {
+ uint64_t Total = 0;
+ uint64_t OutSum = sumEdgeCount(Count->OutEdges);
+ // If the one of the successor block can early terminate (no-return),
+ // we can end up with situation where out edge sum count is larger as
+ // the source BB's count is collected by a post-dominated block.
+ if (Count->CountValue > OutSum)
+ Total = Count->CountValue - OutSum;
+ setEdgeCount(Count->OutEdges, Total);
+ Changes = true;
+ }
+ if (Count->UnknownCountInEdge == 1) {
+ uint64_t Total = 0;
+ uint64_t InSum = sumEdgeCount(Count->InEdges);
+ if (Count->CountValue > InSum)
+ Total = Count->CountValue - InSum;
+ setEdgeCount(Count->InEdges, Total);
+ Changes = true;
+ }
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
+#ifndef NDEBUG
+ // Assert every BB has a valid counter.
+ for (auto &BB : F) {
+ auto BI = findBBInfo(&BB);
+ if (BI == nullptr)
+ continue;
+ assert(BI->CountValid && "BB count is not valid");
+ }
+#endif
+ uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
+ F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
+ uint64_t FuncMaxCount = FuncEntryCount;
+ for (auto &BB : F) {
+ auto BI = findBBInfo(&BB);
+ if (BI == nullptr)
+ continue;
+ FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
+ }
+ markFunctionAttributes(FuncEntryCount, FuncMaxCount);
+
+ // Now annotate select instructions
+ FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
+ assert(CountPosition == ProfileCountSize);
+
+ LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
+}
+
+// Assign the scaled count values to the BB with multiple out edges.
+void PGOUseFunc::setBranchWeights() {
+ // Generate MD_prof metadata for every branch instruction.
+ LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
+ << " IsCS=" << IsCS << "\n");
+ for (auto &BB : F) {
+ Instruction *TI = BB.getTerminator();
+ if (TI->getNumSuccessors() < 2)
+ continue;
+ if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
+ isa<IndirectBrInst>(TI)))
+ continue;
+
+ if (getBBInfo(&BB).CountValue == 0)
+ continue;
+
+ // We have a non-zero Branch BB.
+ const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+ unsigned Size = BBCountInfo.OutEdges.size();
+ SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
+ uint64_t MaxCount = 0;
+ for (unsigned s = 0; s < Size; s++) {
+ const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ if (DestBB == nullptr)
+ continue;
+ unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+ uint64_t EdgeCount = E->CountValue;
+ if (EdgeCount > MaxCount)
+ MaxCount = EdgeCount;
+ EdgeCounts[SuccNum] = EdgeCount;
+ }
+ setProfMetadata(M, TI, EdgeCounts, MaxCount);
+ }
+}
+
+static bool isIndirectBrTarget(BasicBlock *BB) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ if (isa<IndirectBrInst>((*PI)->getTerminator()))
+ return true;
+ }
+ return false;
+}
+
+void PGOUseFunc::annotateIrrLoopHeaderWeights() {
+ LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
+ // Find irr loop headers
+ for (auto &BB : F) {
+ // As a heuristic also annotate indrectbr targets as they have a high chance
+ // to become an irreducible loop header after the indirectbr tail
+ // duplication.
+ if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
+ Instruction *TI = BB.getTerminator();
+ const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+ setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
+ }
+ }
+}
+
+void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+ Module *M = F.getParent();
+ IRBuilder<> Builder(&SI);
+ Type *Int64Ty = Builder.getInt64Ty();
+ Type *I8PtrTy = Builder.getInt8PtrTy();
+ auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
+ {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
+ Builder.getInt32(*CurCtrIdx), Step});
+ ++(*CurCtrIdx);
+}
+
+void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
+ std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
+ assert(*CurCtrIdx < CountFromProfile.size() &&
+ "Out of bound access of counters");
+ uint64_t SCounts[2];
+ SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
+ ++(*CurCtrIdx);
+ uint64_t TotalCount = 0;
+ auto BI = UseFunc->findBBInfo(SI.getParent());
+ if (BI != nullptr)
+ TotalCount = BI->CountValue;
+ // False Count
+ SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
+ uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
+ if (MaxCount)
+ setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
+}
+
+void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
+ if (!PGOInstrSelect)
+ return;
+ // FIXME: do not handle this yet.
+ if (SI.getCondition()->getType()->isVectorTy())
+ return;
+
+ switch (Mode) {
+ case VM_counting:
+ NSIs++;
+ return;
+ case VM_instrument:
+ instrumentOneSelectInst(SI);
+ return;
+ case VM_annotate:
+ annotateOneSelectInst(SI);
+ return;
+ }
+
+ llvm_unreachable("Unknown visiting mode");
+}
+
+// Traverse all valuesites and annotate the instructions for all value kind.
+void PGOUseFunc::annotateValueSites() {
+ if (DisableValueProfiling)
+ return;
+
+ // Create the PGOFuncName meta data.
+ createPGOFuncNameMetadata(F, FuncInfo.FuncName);
+
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ annotateValueSites(Kind);
+}
+
+// Annotate the instructions for a specific value kind.
+void PGOUseFunc::annotateValueSites(uint32_t Kind) {
+ assert(Kind <= IPVK_Last);
+ unsigned ValueSiteIndex = 0;
+ auto &ValueSites = FuncInfo.ValueSites[Kind];
+ unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
+ if (NumValueSites != ValueSites.size()) {
+ auto &Ctx = M->getContext();
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M->getName().data(),
+ Twine("Inconsistent number of value sites for ") +
+ Twine(ValueProfKindDescr[Kind]) +
+ Twine(" profiling in \"") + F.getName().str() +
+ Twine("\", possibly due to the use of a stale profile."),
+ DS_Warning));
+ return;
+ }
+
+ for (VPCandidateInfo &I : ValueSites) {
+ LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
+ << "): Index = " << ValueSiteIndex << " out of "
+ << NumValueSites << "\n");
+ annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
+ static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
+ Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
+ : MaxNumAnnotations);
+ ValueSiteIndex++;
+ }
+}
+
+// Collect the set of members for each Comdat in module M and store
+// in ComdatMembers.
+static void collectComdatMembers(
+ Module &M,
+ std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+ if (!DoComdatRenaming)
+ return;
+ for (Function &F : M)
+ if (Comdat *C = F.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &F));
+ for (GlobalVariable &GV : M.globals())
+ if (Comdat *C = GV.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GV));
+ for (GlobalAlias &GA : M.aliases())
+ if (Comdat *C = GA.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GA));
+}
+
+static bool InstrumentAllFunctions(
+ Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
+ // For the context-sensitve instrumentation, we should have a separated pass
+ // (before LTO/ThinLTO linking) to create these variables.
+ if (!IsCS)
+ createIRLevelProfileFlagVar(M, /* IsCS */ false);
+ std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+ collectComdatMembers(M, ComdatMembers);
+
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto *BPI = LookupBPI(F);
+ auto *BFI = LookupBFI(F);
+ instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
+ }
+ return true;
+}
+
+PreservedAnalyses
+PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
+ createProfileFileNameVar(M, CSInstrName);
+ createIRLevelProfileFlagVar(M, /* IsCS */ true);
+ return PreservedAnalyses::all();
+}
+
+bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ auto LookupBPI = [this](Function &F) {
+ return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
+ };
+ auto LookupBFI = [this](Function &F) {
+ return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ };
+ return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
+}
+
+PreservedAnalyses PGOInstrumentationGen::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupBPI = [&FAM](Function &F) {
+ return &FAM.getResult<BranchProbabilityAnalysis>(F);
+ };
+
+ auto LookupBFI = [&FAM](Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+static bool annotateAllFunctions(
+ Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
+ function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
+ function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
+ ProfileSummaryInfo *PSI, bool IsCS) {
+ LLVM_DEBUG(dbgs() << "Read in profile counters: ");
+ auto &Ctx = M.getContext();
+ // Read the counter array from file.
+ auto ReaderOrErr =
+ IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
+ if (Error E = ReaderOrErr.takeError()) {
+ handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
+ });
+ return false;
+ }
+
+ std::unique_ptr<IndexedInstrProfReader> PGOReader =
+ std::move(ReaderOrErr.get());
+ if (!PGOReader) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
+ StringRef("Cannot get PGOReader")));
+ return false;
+ }
+ if (!PGOReader->hasCSIRLevelProfile() && IsCS)
+ return false;
+
+ // TODO: might need to change the warning once the clang option is finalized.
+ if (!PGOReader->isIRLevelProfile()) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ ProfileFileName.data(), "Not an IR level instrumentation profile"));
+ return false;
+ }
+
+ // Add the profile summary (read from the header of the indexed summary) here
+ // so that we can use it below when reading counters (which checks if the
+ // function should be marked with a cold or inlinehint attribute).
+ M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
+ IsCS ? ProfileSummary::PSK_CSInstr
+ : ProfileSummary::PSK_Instr);
+
+ std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+ collectComdatMembers(M, ComdatMembers);
+ std::vector<Function *> HotFunctions;
+ std::vector<Function *> ColdFunctions;
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto *BPI = LookupBPI(F);
+ auto *BFI = LookupBFI(F);
+ // Split indirectbr critical edges here before computing the MST rather than
+ // later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, BPI, BFI);
+ PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS);
+ bool AllZeros = false;
+ if (!Func.readCounters(PGOReader.get(), AllZeros))
+ continue;
+ if (AllZeros) {
+ F.setEntryCount(ProfileCount(0, Function::PCT_Real));
+ if (Func.getProgramMaxCount() != 0)
+ ColdFunctions.push_back(&F);
+ continue;
+ }
+ Func.populateCounters();
+ Func.setBranchWeights();
+ Func.annotateValueSites();
+ Func.annotateIrrLoopHeaderWeights();
+ PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
+ if (FreqAttr == PGOUseFunc::FFA_Cold)
+ ColdFunctions.push_back(&F);
+ else if (FreqAttr == PGOUseFunc::FFA_Hot)
+ HotFunctions.push_back(&F);
+ if (PGOViewCounts != PGOVCT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
+ LoopInfo LI{DominatorTree(F)};
+ std::unique_ptr<BranchProbabilityInfo> NewBPI =
+ std::make_unique<BranchProbabilityInfo>(F, LI);
+ std::unique_ptr<BlockFrequencyInfo> NewBFI =
+ std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
+ if (PGOViewCounts == PGOVCT_Graph)
+ NewBFI->view();
+ else if (PGOViewCounts == PGOVCT_Text) {
+ dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
+ NewBFI->print(dbgs());
+ }
+ }
+ if (PGOViewRawCounts != PGOVCT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
+ if (PGOViewRawCounts == PGOVCT_Graph)
+ if (ViewBlockFreqFuncName.empty())
+ WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
+ else
+ ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
+ else if (PGOViewRawCounts == PGOVCT_Text) {
+ dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
+ Func.dumpInfo();
+ }
+ }
+ }
+
+ // Set function hotness attribute from the profile.
+ // We have to apply these attributes at the end because their presence
+ // can affect the BranchProbabilityInfo of any callers, resulting in an
+ // inconsistent MST between prof-gen and prof-use.
+ for (auto &F : HotFunctions) {
+ F->addFnAttr(Attribute::InlineHint);
+ LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
+ << "\n");
+ }
+ for (auto &F : ColdFunctions) {
+ F->addFnAttr(Attribute::Cold);
+ LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
+ << "\n");
+ }
+ return true;
+}
+
+PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
+ std::string RemappingFilename,
+ bool IsCS)
+ : ProfileFileName(std::move(Filename)),
+ ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
+ if (!PGOTestProfileFile.empty())
+ ProfileFileName = PGOTestProfileFile;
+ if (!PGOTestProfileRemappingFile.empty())
+ ProfileRemappingFileName = PGOTestProfileRemappingFile;
+}
+
+PreservedAnalyses PGOInstrumentationUse::run(Module &M,
+ ModuleAnalysisManager &AM) {
+
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto LookupBPI = [&FAM](Function &F) {
+ return &FAM.getResult<BranchProbabilityAnalysis>(F);
+ };
+
+ auto LookupBFI = [&FAM](Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+
+ if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
+ LookupBPI, LookupBFI, PSI, IsCS))
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
+
+bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ auto LookupBPI = [this](Function &F) {
+ return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
+ };
+ auto LookupBFI = [this](Function &F) {
+ return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ };
+
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI,
+ IsCS);
+}
+
+static std::string getSimpleNodeName(const BasicBlock *Node) {
+ if (!Node->getName().empty())
+ return Node->getName();
+
+ std::string SimpleNodeName;
+ raw_string_ostream OS(SimpleNodeName);
+ Node->printAsOperand(OS, false);
+ return OS.str();
+}
+
+void llvm::setProfMetadata(Module *M, Instruction *TI,
+ ArrayRef<uint64_t> EdgeCounts,
+ uint64_t MaxCount) {
+ MDBuilder MDB(M->getContext());
+ assert(MaxCount > 0 && "Bad max count");
+ uint64_t Scale = calculateCountScale(MaxCount);
+ SmallVector<unsigned, 4> Weights;
+ for (const auto &ECI : EdgeCounts)
+ Weights.push_back(scaleBranchCount(ECI, Scale));
+
+ LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
+ : Weights) {
+ dbgs() << W << " ";
+ } dbgs() << "\n";);
+
+ misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
+ TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ if (EmitBranchProbability) {
+ std::string BrCondStr = getBranchCondString(TI);
+ if (BrCondStr.empty())
+ return;
+
+ uint64_t WSum =
+ std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
+ [](uint64_t w1, uint64_t w2) { return w1 + w2; });
+ uint64_t TotalCount =
+ std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
+ [](uint64_t c1, uint64_t c2) { return c1 + c2; });
+ Scale = calculateCountScale(WSum);
+ BranchProbability BP(scaleBranchCount(Weights[0], Scale),
+ scaleBranchCount(WSum, Scale));
+ std::string BranchProbStr;
+ raw_string_ostream OS(BranchProbStr);
+ OS << BP;
+ OS << " (total count : " << TotalCount << ")";
+ OS.flush();
+ Function *F = TI->getParent()->getParent();
+ OptimizationRemarkEmitter ORE(F);
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
+ << BrCondStr << " is true with probability : " << BranchProbStr;
+ });
+ }
+}
+
+namespace llvm {
+
+void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
+ MDBuilder MDB(M->getContext());
+ TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
+ MDB.createIrrLoopHeaderWeight(Count));
+}
+
+template <> struct GraphTraits<PGOUseFunc *> {
+ using NodeRef = const BasicBlock *;
+ using ChildIteratorType = succ_const_iterator;
+ using nodes_iterator = pointer_iterator<Function::const_iterator>;
+
+ static NodeRef getEntryNode(const PGOUseFunc *G) {
+ return &G->getFunc().front();
+ }
+
+ static ChildIteratorType child_begin(const NodeRef N) {
+ return succ_begin(N);
+ }
+
+ static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
+
+ static nodes_iterator nodes_begin(const PGOUseFunc *G) {
+ return nodes_iterator(G->getFunc().begin());
+ }
+
+ static nodes_iterator nodes_end(const PGOUseFunc *G) {
+ return nodes_iterator(G->getFunc().end());
+ }
+};
+
+template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool isSimple = false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const PGOUseFunc *G) {
+ return G->getFunc().getName();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << getSimpleNodeName(Node) << ":\\l";
+ UseBBInfo *BI = Graph->findBBInfo(Node);
+ OS << "Count : ";
+ if (BI && BI->CountValid)
+ OS << BI->CountValue << "\\l";
+ else
+ OS << "Unknown\\l";
+
+ if (!PGOInstrSelect)
+ return Result;
+
+ for (auto BI = Node->begin(); BI != Node->end(); ++BI) {
+ auto *I = &*BI;
+ if (!isa<SelectInst>(I))
+ continue;
+ // Display scaled counts for SELECT instruction:
+ OS << "SELECT : { T = ";
+ uint64_t TC, FC;
+ bool HasProf = I->extractProfMetadata(TC, FC);
+ if (!HasProf)
+ OS << "Unknown, F = Unknown }\\l";
+ else
+ OS << TC << ", F = " << FC << " }\\l";
+ }
+ return Result;
+ }
+};
+
+} // end namespace llvm
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
new file mode 100644
index 000000000000..9f81bb16d0a7
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -0,0 +1,452 @@
+//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the transformation that optimizes memory intrinsics
+// such as memcpy using the size value profile. When memory intrinsic size
+// value profile metadata is available, a single memory intrinsic is expanded
+// to a sequence of guarded specialized versions that are called with the
+// hottest size(s), for later expansion into more optimal inline sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <cassert>
+#include <cstdint>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-memop-opt"
+
+STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
+STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
+
+// The minimum call count to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+ MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
+ cl::init(1000),
+ cl::desc("The minimum count to optimize memory "
+ "intrinsic calls"));
+
+// Command line option to disable memory intrinsic optimization. The default is
+// false. This is for debug purpose.
+static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
+ cl::Hidden, cl::desc("Disable optimize"));
+
+// The percent threshold to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+ MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("The percentage threshold for the "
+ "memory intrinsic calls optimization"));
+
+// Maximum number of versions for optimizing memory intrinsic call.
+static cl::opt<unsigned>
+ MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("The max version for the optimized memory "
+ " intrinsic calls"));
+
+// Scale the counts from the annotation using the BB count value.
+static cl::opt<bool>
+ MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
+ cl::desc("Scale the memop size counts using the basic "
+ " block count value"));
+
+// This option sets the rangge of precise profile memop sizes.
+extern cl::opt<std::string> MemOPSizeRange;
+
+// This option sets the value that groups large memop sizes
+extern cl::opt<unsigned> MemOPSizeLarge;
+
+namespace {
+class PGOMemOPSizeOptLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
+ initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "PGOMemOPSize"; }
+
+private:
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char PGOMemOPSizeOptLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+ "Optimize memory intrinsic using its size value profile",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+ "Optimize memory intrinsic using its size value profile",
+ false, false)
+
+FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
+ return new PGOMemOPSizeOptLegacyPass();
+}
+
+namespace {
+class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
+public:
+ MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
+ OptimizationRemarkEmitter &ORE, DominatorTree *DT)
+ : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
+ ValueDataArray =
+ std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
+ // Get the MemOPSize range information from option MemOPSizeRange,
+ getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
+ PreciseRangeLast);
+ }
+ bool isChanged() const { return Changed; }
+ void perform() {
+ WorkList.clear();
+ visit(Func);
+
+ for (auto &MI : WorkList) {
+ ++NumOfPGOMemOPAnnotate;
+ if (perform(MI)) {
+ Changed = true;
+ ++NumOfPGOMemOPOpt;
+ LLVM_DEBUG(dbgs() << "MemOP call: "
+ << MI->getCalledFunction()->getName()
+ << "is Transformed.\n");
+ }
+ }
+ }
+
+ void visitMemIntrinsic(MemIntrinsic &MI) {
+ Value *Length = MI.getLength();
+ // Not perform on constant length calls.
+ if (dyn_cast<ConstantInt>(Length))
+ return;
+ WorkList.push_back(&MI);
+ }
+
+private:
+ Function &Func;
+ BlockFrequencyInfo &BFI;
+ OptimizationRemarkEmitter &ORE;
+ DominatorTree *DT;
+ bool Changed;
+ std::vector<MemIntrinsic *> WorkList;
+ // Start of the previse range.
+ int64_t PreciseRangeStart;
+ // Last value of the previse range.
+ int64_t PreciseRangeLast;
+ // The space to read the profile annotation.
+ std::unique_ptr<InstrProfValueData[]> ValueDataArray;
+ bool perform(MemIntrinsic *MI);
+
+ // This kind shows which group the value falls in. For PreciseValue, we have
+ // the profile count for that value. LargeGroup groups the values that are in
+ // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
+ enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
+
+ MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
+ if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
+ return LargeGroup;
+ if (Value == PreciseRangeLast + 1)
+ return NonLargeGroup;
+ return PreciseValue;
+ }
+};
+
+static const char *getMIName(const MemIntrinsic *MI) {
+ switch (MI->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ return "memcpy";
+ case Intrinsic::memmove:
+ return "memmove";
+ case Intrinsic::memset:
+ return "memset";
+ default:
+ return "unknown";
+ }
+}
+
+static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
+ assert(Count <= TotalCount);
+ if (Count < MemOPCountThreshold)
+ return false;
+ if (Count < TotalCount * MemOPPercentThreshold / 100)
+ return false;
+ return true;
+}
+
+static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
+ uint64_t Denom) {
+ if (!MemOPScaleCount)
+ return Count;
+ bool Overflowed;
+ uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
+ return ScaleCount / Denom;
+}
+
+bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
+ assert(MI);
+ if (MI->getIntrinsicID() == Intrinsic::memmove)
+ return false;
+
+ uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
+ uint64_t TotalCount;
+ if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
+ ValueDataArray.get(), NumVals, TotalCount))
+ return false;
+
+ uint64_t ActualCount = TotalCount;
+ uint64_t SavedTotalCount = TotalCount;
+ if (MemOPScaleCount) {
+ auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
+ if (!BBEdgeCount)
+ return false;
+ ActualCount = *BBEdgeCount;
+ }
+
+ ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+ LLVM_DEBUG(dbgs() << "Read one memory intrinsic profile with count "
+ << ActualCount << "\n");
+ LLVM_DEBUG(
+ for (auto &VD
+ : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
+
+ if (ActualCount < MemOPCountThreshold)
+ return false;
+ // Skip if the total value profiled count is 0, in which case we can't
+ // scale up the counts properly (and there is no profitable transformation).
+ if (TotalCount == 0)
+ return false;
+
+ TotalCount = ActualCount;
+ if (MemOPScaleCount)
+ LLVM_DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
+ << " denominator = " << SavedTotalCount << "\n");
+
+ // Keeping track of the count of the default case:
+ uint64_t RemainCount = TotalCount;
+ uint64_t SavedRemainCount = SavedTotalCount;
+ SmallVector<uint64_t, 16> SizeIds;
+ SmallVector<uint64_t, 16> CaseCounts;
+ uint64_t MaxCount = 0;
+ unsigned Version = 0;
+ // Default case is in the front -- save the slot here.
+ CaseCounts.push_back(0);
+ for (auto &VD : VDs) {
+ int64_t V = VD.Value;
+ uint64_t C = VD.Count;
+ if (MemOPScaleCount)
+ C = getScaledCount(C, ActualCount, SavedTotalCount);
+
+ // Only care precise value here.
+ if (getMemOPSizeKind(V) != PreciseValue)
+ continue;
+
+ // ValueCounts are sorted on the count. Break at the first un-profitable
+ // value.
+ if (!isProfitable(C, RemainCount))
+ break;
+
+ SizeIds.push_back(V);
+ CaseCounts.push_back(C);
+ if (C > MaxCount)
+ MaxCount = C;
+
+ assert(RemainCount >= C);
+ RemainCount -= C;
+ assert(SavedRemainCount >= VD.Count);
+ SavedRemainCount -= VD.Count;
+
+ if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
+ break;
+ }
+
+ if (Version == 0)
+ return false;
+
+ CaseCounts[0] = RemainCount;
+ if (RemainCount > MaxCount)
+ MaxCount = RemainCount;
+
+ uint64_t SumForOpt = TotalCount - RemainCount;
+
+ LLVM_DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
+ << " Versions (covering " << SumForOpt << " out of "
+ << TotalCount << ")\n");
+
+ // mem_op(..., size)
+ // ==>
+ // switch (size) {
+ // case s1:
+ // mem_op(..., s1);
+ // goto merge_bb;
+ // case s2:
+ // mem_op(..., s2);
+ // goto merge_bb;
+ // ...
+ // default:
+ // mem_op(..., size);
+ // goto merge_bb;
+ // }
+ // merge_bb:
+
+ BasicBlock *BB = MI->getParent();
+ LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
+ LLVM_DEBUG(dbgs() << *BB << "\n");
+ auto OrigBBFreq = BFI.getBlockFreq(BB);
+
+ BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
+ BasicBlock::iterator It(*MI);
+ ++It;
+ assert(It != DefaultBB->end());
+ BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
+ MergeBB->setName("MemOP.Merge");
+ BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
+ DefaultBB->setName("MemOP.Default");
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ auto &Ctx = Func.getContext();
+ IRBuilder<> IRB(BB);
+ BB->getTerminator()->eraseFromParent();
+ Value *SizeVar = MI->getLength();
+ SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
+
+ // Clear the value profile data.
+ MI->setMetadata(LLVMContext::MD_prof, nullptr);
+ // If all promoted, we don't need the MD.prof metadata.
+ if (SavedRemainCount > 0 || Version != NumVals)
+ // Otherwise we need update with the un-promoted records back.
+ annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version),
+ SavedRemainCount, IPVK_MemOPSize, NumVals);
+
+ LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
+
+ std::vector<DominatorTree::UpdateType> Updates;
+ if (DT)
+ Updates.reserve(2 * SizeIds.size());
+
+ for (uint64_t SizeId : SizeIds) {
+ BasicBlock *CaseBB = BasicBlock::Create(
+ Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
+ Instruction *NewInst = MI->clone();
+ // Fix the argument.
+ auto *MemI = cast<MemIntrinsic>(NewInst);
+ auto *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType());
+ assert(SizeType && "Expected integer type size argument.");
+ ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
+ MemI->setLength(CaseSizeId);
+ CaseBB->getInstList().push_back(NewInst);
+ IRBuilder<> IRBCase(CaseBB);
+ IRBCase.CreateBr(MergeBB);
+ SI->addCase(CaseSizeId, CaseBB);
+ if (DT) {
+ Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
+ Updates.push_back({DominatorTree::Insert, BB, CaseBB});
+ }
+ LLVM_DEBUG(dbgs() << *CaseBB << "\n");
+ }
+ DTU.applyUpdates(Updates);
+ Updates.clear();
+
+ setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
+
+ LLVM_DEBUG(dbgs() << *BB << "\n");
+ LLVM_DEBUG(dbgs() << *DefaultBB << "\n");
+ LLVM_DEBUG(dbgs() << *MergeBB << "\n");
+
+ ORE.emit([&]() {
+ using namespace ore;
+ return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI)
+ << "optimized " << NV("Intrinsic", StringRef(getMIName(MI)))
+ << " with count " << NV("Count", SumForOpt) << " out of "
+ << NV("Total", TotalCount) << " for " << NV("Versions", Version)
+ << " versions";
+ });
+
+ return true;
+}
+} // namespace
+
+static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
+ OptimizationRemarkEmitter &ORE,
+ DominatorTree *DT) {
+ if (DisableMemOPOPT)
+ return false;
+
+ if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+ MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
+ MemOPSizeOpt.perform();
+ return MemOPSizeOpt.isChanged();
+}
+
+bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
+}
+
+namespace llvm {
+char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
+
+PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
new file mode 100644
index 000000000000..81d92e724c7d
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -0,0 +1,357 @@
+//===- PoisonChecking.cpp - -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a transform pass which instruments IR such that poison semantics
+// are made explicit. That is, it provides a (possibly partial) executable
+// semantics for every instruction w.r.t. poison as specified in the LLVM
+// LangRef. There are obvious parallels to the sanitizer tools, but this pass
+// is focused purely on the semantics of LLVM IR, not any particular source
+// language. If you're looking for something to see if your C/C++ contains
+// UB, this is not it.
+//
+// The rewritten semantics of each instruction will include the following
+// components:
+//
+// 1) The original instruction, unmodified.
+// 2) A propagation rule which translates dynamic information about the poison
+// state of each input to whether the dynamic output of the instruction
+// produces poison.
+// 3) A flag validation rule which validates any poison producing flags on the
+// instruction itself (e.g. checks for overflow on nsw).
+// 4) A check rule which traps (to a handler function) if this instruction must
+// execute undefined behavior given the poison state of it's inputs.
+//
+// At the moment, the UB detection is done in a best effort manner; that is,
+// the resulting code may produce a false negative result (not report UB when
+// it actually exists according to the LangRef spec), but should never produce
+// a false positive (report UB where it doesn't exist). The intention is to
+// eventually support a "strict" mode which never dynamically reports a false
+// negative at the cost of rejecting some valid inputs to translation.
+//
+// Use cases for this pass include:
+// - Understanding (and testing!) the implications of the definition of poison
+// from the LangRef.
+// - Validating the output of a IR fuzzer to ensure that all programs produced
+// are well defined on the specific input used.
+// - Finding/confirming poison specific miscompiles by checking the poison
+// status of an input/IR pair is the same before and after an optimization
+// transform.
+// - Checking that a bugpoint reduction does not introduce UB which didn't
+// exist in the original program being reduced.
+//
+// The major sources of inaccuracy are currently:
+// - Most validation rules not yet implemented for instructions with poison
+// relavant flags. At the moment, only nsw/nuw on add/sub are supported.
+// - UB which is control dependent on a branch on poison is not yet
+// reported. Currently, only data flow dependence is modeled.
+// - Poison which is propagated through memory is not modeled. As such,
+// storing poison to memory and then reloading it will cause a false negative
+// as we consider the reloaded value to not be poisoned.
+// - Poison propagation across function boundaries is not modeled. At the
+// moment, all arguments and return values are assumed not to be poison.
+// - Undef is not modeled. In particular, the optimizer's freedom to pick
+// concrete values for undef bits so as to maximize potential for producing
+// poison is not modeled.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "poison-checking"
+
+static cl::opt<bool>
+LocalCheck("poison-checking-function-local",
+ cl::init(false),
+ cl::desc("Check that returns are non-poison (for testing)"));
+
+
+static bool isConstantFalse(Value* V) {
+ assert(V->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(V))
+ return CI->isZero();
+ return false;
+}
+
+static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
+ if (Ops.size() == 0)
+ return B.getFalse();
+ unsigned i = 0;
+ for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
+ if (i == Ops.size())
+ return B.getFalse();
+ Value *Accum = Ops[i++];
+ for (; i < Ops.size(); i++)
+ if (!isConstantFalse(Ops[i]))
+ Accum = B.CreateOr(Accum, Ops[i]);
+ return Accum;
+}
+
+static void generatePoisonChecksForBinOp(Instruction &I,
+ SmallVector<Value*, 2> &Checks) {
+ assert(isa<BinaryOperator>(I));
+
+ IRBuilder<> B(&I);
+ Value *LHS = I.getOperand(0);
+ Value *RHS = I.getOperand(1);
+ switch (I.getOpcode()) {
+ default:
+ return;
+ case Instruction::Add: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Sub: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::Mul: {
+ if (I.hasNoSignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ if (I.hasNoUnsignedWrap()) {
+ auto *OverflowOp =
+ B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
+ Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
+ }
+ break;
+ }
+ case Instruction::UDiv: {
+ if (I.isExact()) {
+ auto *Check =
+ B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
+ ConstantInt::get(LHS->getType(), 0));
+ Checks.push_back(Check);
+ }
+ break;
+ }
+ case Instruction::SDiv: {
+ if (I.isExact()) {
+ auto *Check =
+ B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
+ ConstantInt::get(LHS->getType(), 0));
+ Checks.push_back(Check);
+ }
+ break;
+ }
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::Shl: {
+ Value *ShiftCheck =
+ B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
+ ConstantInt::get(RHS->getType(),
+ LHS->getType()->getScalarSizeInBits()));
+ Checks.push_back(ShiftCheck);
+ break;
+ }
+ };
+}
+
+static Value* generatePoisonChecks(Instruction &I) {
+ IRBuilder<> B(&I);
+ SmallVector<Value*, 2> Checks;
+ if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
+ generatePoisonChecksForBinOp(I, Checks);
+
+ // Handle non-binops seperately
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case Instruction::ExtractElement: {
+ Value *Vec = I.getOperand(0);
+ if (Vec->getType()->getVectorIsScalable())
+ break;
+ Value *Idx = I.getOperand(1);
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
+ Value *Check =
+ B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+ ConstantInt::get(Idx->getType(), NumElts));
+ Checks.push_back(Check);
+ break;
+ }
+ case Instruction::InsertElement: {
+ Value *Vec = I.getOperand(0);
+ if (Vec->getType()->getVectorIsScalable())
+ break;
+ Value *Idx = I.getOperand(2);
+ unsigned NumElts = Vec->getType()->getVectorNumElements();
+ Value *Check =
+ B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
+ ConstantInt::get(Idx->getType(), NumElts));
+ Checks.push_back(Check);
+ break;
+ }
+ };
+ return buildOrChain(B, Checks);
+}
+
+static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
+ auto Itr = ValToPoison.find(V);
+ if (Itr != ValToPoison.end())
+ return Itr->second;
+ if (isa<Constant>(V)) {
+ return ConstantInt::getFalse(V->getContext());
+ }
+ // Return false for unknwon values - this implements a non-strict mode where
+ // unhandled IR constructs are simply considered to never produce poison. At
+ // some point in the future, we probably want a "strict mode" for testing if
+ // nothing else.
+ return ConstantInt::getFalse(V->getContext());
+}
+
+static void CreateAssert(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ if (CI->isAllOnesValue())
+ return;
+
+ Module *M = B.GetInsertBlock()->getModule();
+ M->getOrInsertFunction("__poison_checker_assert",
+ Type::getVoidTy(M->getContext()),
+ Type::getInt1Ty(M->getContext()));
+ Function *TrapFunc = M->getFunction("__poison_checker_assert");
+ B.CreateCall(TrapFunc, Cond);
+}
+
+static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
+ assert(Cond->getType()->isIntegerTy(1));
+ CreateAssert(B, B.CreateNot(Cond));
+}
+
+static bool rewrite(Function &F) {
+ auto * const Int1Ty = Type::getInt1Ty(F.getContext());
+
+ DenseMap<Value *, Value *> ValToPoison;
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ auto *NewPHI = PHINode::Create(Int1Ty,
+ OldPHI->getNumIncomingValues());
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
+ NewPHI->addIncoming(UndefValue::get(Int1Ty),
+ OldPHI->getIncomingBlock(i));
+ NewPHI->insertBefore(OldPHI);
+ ValToPoison[OldPHI] = NewPHI;
+ }
+
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB) {
+ if (isa<PHINode>(I)) continue;
+
+ IRBuilder<> B(cast<Instruction>(&I));
+
+ // Note: There are many more sources of documented UB, but this pass only
+ // attempts to find UB triggered by propagation of poison.
+ if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+
+ if (LocalCheck)
+ if (auto *RI = dyn_cast<ReturnInst>(&I))
+ if (RI->getNumOperands() != 0) {
+ Value *Op = RI->getOperand(0);
+ CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
+ }
+
+ SmallVector<Value*, 4> Checks;
+ if (propagatesFullPoison(&I))
+ for (Value *V : I.operands())
+ Checks.push_back(getPoisonFor(ValToPoison, V));
+
+ if (auto *Check = generatePoisonChecks(I))
+ Checks.push_back(Check);
+ ValToPoison[&I] = buildOrChain(B, Checks);
+ }
+
+ for (BasicBlock &BB : F)
+ for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
+ auto *OldPHI = cast<PHINode>(&*I);
+ if (!ValToPoison.count(OldPHI))
+ continue; // skip the newly inserted phis
+ auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
+ for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
+ auto *OldVal = OldPHI->getIncomingValue(i);
+ NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
+ }
+ }
+ return true;
+}
+
+
+PreservedAnalyses PoisonCheckingPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ bool Changed = false;
+ for (auto &F : M)
+ Changed |= rewrite(F);
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+PreservedAnalyses PoisonCheckingPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+
+/* Major TODO Items:
+ - Control dependent poison UB
+ - Strict mode - (i.e. must analyze every operand)
+ - Poison through memory
+ - Function ABIs
+ - Full coverage of intrinsics, etc.. (ouch)
+
+ Instructions w/Unclear Semantics:
+ - shufflevector - It would seem reasonable for an out of bounds mask element
+ to produce poison, but the LangRef does not state.
+ - and/or - It would seem reasonable for poison to propagate from both
+ arguments, but LangRef doesn't state and propagatesFullPoison doesn't
+ include these two.
+ - all binary ops w/vector operands - The likely interpretation would be that
+ any element overflowing should produce poison for the entire result, but
+ the LangRef does not state.
+ - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
+ strange that only certian flags should be documented as producing poison.
+
+ Cases of clear poison semantics not yet implemented:
+ - Exact flags on ashr/lshr produce poison
+ - NSW/NUW flags on shl produce poison
+ - Inbounds flag on getelementptr produce poison
+ - fptosi/fptoui (out of bounds input) produce poison
+ - Scalable vector types for insertelement/extractelement
+ - Floating point binary ops w/fmf nnan/noinfs flags produce poison
+ */
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
new file mode 100644
index 000000000000..f8fa9cad03b8
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -0,0 +1,947 @@
+//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coverage instrumentation done on LLVM IR level, works with Sanitizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sancov"
+
+static const char *const SanCovTracePCIndirName =
+ "__sanitizer_cov_trace_pc_indir";
+static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
+static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";
+static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";
+static const char *const SanCovTraceCmp4 = "__sanitizer_cov_trace_cmp4";
+static const char *const SanCovTraceCmp8 = "__sanitizer_cov_trace_cmp8";
+static const char *const SanCovTraceConstCmp1 =
+ "__sanitizer_cov_trace_const_cmp1";
+static const char *const SanCovTraceConstCmp2 =
+ "__sanitizer_cov_trace_const_cmp2";
+static const char *const SanCovTraceConstCmp4 =
+ "__sanitizer_cov_trace_const_cmp4";
+static const char *const SanCovTraceConstCmp8 =
+ "__sanitizer_cov_trace_const_cmp8";
+static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
+static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
+static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
+static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
+static const char *const SanCovModuleCtorTracePcGuardName =
+ "sancov.module_ctor_trace_pc_guard";
+static const char *const SanCovModuleCtor8bitCountersName =
+ "sancov.module_ctor_8bit_counters";
+static const uint64_t SanCtorAndDtorPriority = 2;
+
+static const char *const SanCovTracePCGuardName =
+ "__sanitizer_cov_trace_pc_guard";
+static const char *const SanCovTracePCGuardInitName =
+ "__sanitizer_cov_trace_pc_guard_init";
+static const char *const SanCov8bitCountersInitName =
+ "__sanitizer_cov_8bit_counters_init";
+static const char *const SanCovPCsInitName = "__sanitizer_cov_pcs_init";
+
+static const char *const SanCovGuardsSectionName = "sancov_guards";
+static const char *const SanCovCountersSectionName = "sancov_cntrs";
+static const char *const SanCovPCsSectionName = "sancov_pcs";
+
+static const char *const SanCovLowestStackName = "__sancov_lowest_stack";
+
+static cl::opt<int> ClCoverageLevel(
+ "sanitizer-coverage-level",
+ cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
+ "3: all blocks and critical edges"),
+ cl::Hidden, cl::init(0));
+
+static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
+ cl::desc("Experimental pc tracing"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
+ cl::desc("pc tracing with a guard"),
+ cl::Hidden, cl::init(false));
+
+// If true, we create a global variable that contains PCs of all instrumented
+// BBs, put this global into a named section, and pass this section's bounds
+// to __sanitizer_cov_pcs_init.
+// This way the coverage instrumentation does not need to acquire the PCs
+// at run-time. Works with trace-pc-guard and inline-8bit-counters.
+static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table",
+ cl::desc("create a static PC table"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters",
+ cl::desc("increments 8-bit counter for every edge"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClCMPTracing("sanitizer-coverage-trace-compares",
+ cl::desc("Tracing of CMP and similar instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
+ cl::desc("Tracing of DIV instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
+ cl::desc("Tracing of GEP instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool>
+ ClPruneBlocks("sanitizer-coverage-prune-blocks",
+ cl::desc("Reduce the number of instrumented blocks"),
+ cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth",
+ cl::desc("max stack depth tracing"),
+ cl::Hidden, cl::init(false));
+
+namespace {
+
+SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
+ SanitizerCoverageOptions Res;
+ switch (LegacyCoverageLevel) {
+ case 0:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_None;
+ break;
+ case 1:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
+ break;
+ case 2:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
+ break;
+ case 3:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ break;
+ case 4:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ Res.IndirectCalls = true;
+ break;
+ }
+ return Res;
+}
+
+SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
+ // Sets CoverageType and IndirectCalls.
+ SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
+ Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
+ Options.IndirectCalls |= CLOpts.IndirectCalls;
+ Options.TraceCmp |= ClCMPTracing;
+ Options.TraceDiv |= ClDIVTracing;
+ Options.TraceGep |= ClGEPTracing;
+ Options.TracePC |= ClTracePC;
+ Options.TracePCGuard |= ClTracePCGuard;
+ Options.Inline8bitCounters |= ClInline8bitCounters;
+ Options.PCTable |= ClCreatePCTable;
+ Options.NoPrune |= !ClPruneBlocks;
+ Options.StackDepth |= ClStackDepth;
+ if (!Options.TracePCGuard && !Options.TracePC &&
+ !Options.Inline8bitCounters && !Options.StackDepth)
+ Options.TracePCGuard = true; // TracePCGuard is default.
+ return Options;
+}
+
+using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>;
+using PostDomTreeCallback =
+ function_ref<const PostDominatorTree *(Function &F)>;
+
+class ModuleSanitizerCoverage {
+public:
+ ModuleSanitizerCoverage(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+ : Options(OverrideFromCL(Options)) {}
+ bool instrumentModule(Module &M, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
+
+private:
+ void instrumentFunction(Function &F, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
+ void InjectCoverageForIndirectCalls(Function &F,
+ ArrayRef<Instruction *> IndirCalls);
+ void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+ void InjectTraceForDiv(Function &F,
+ ArrayRef<BinaryOperator *> DivTraceTargets);
+ void InjectTraceForGep(Function &F,
+ ArrayRef<GetElementPtrInst *> GepTraceTargets);
+ void InjectTraceForSwitch(Function &F,
+ ArrayRef<Instruction *> SwitchTraceTargets);
+ bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
+ bool IsLeafFunc = true);
+ GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+ Function &F, Type *Ty,
+ const char *Section);
+ GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
+ bool IsLeafFunc = true);
+ Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+ const char *InitFunctionName, Type *Ty,
+ const char *Section);
+ std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
+ Type *Ty);
+
+ void SetNoSanitizeMetadata(Instruction *I) {
+ I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+ }
+
+ std::string getSectionName(const std::string &Section) const;
+ std::string getSectionStart(const std::string &Section) const;
+ std::string getSectionEnd(const std::string &Section) const;
+ FunctionCallee SanCovTracePCIndir;
+ FunctionCallee SanCovTracePC, SanCovTracePCGuard;
+ FunctionCallee SanCovTraceCmpFunction[4];
+ FunctionCallee SanCovTraceConstCmpFunction[4];
+ FunctionCallee SanCovTraceDivFunction[2];
+ FunctionCallee SanCovTraceGepFunction;
+ FunctionCallee SanCovTraceSwitchFunction;
+ GlobalVariable *SanCovLowestStack;
+ InlineAsm *EmptyAsm;
+ Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+ *Int16Ty, *Int8Ty, *Int8PtrTy;
+ Module *CurModule;
+ std::string CurModuleUniqueId;
+ Triple TargetTriple;
+ LLVMContext *C;
+ const DataLayout *DL;
+
+ GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
+ GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters.
+ GlobalVariable *FunctionPCsArray; // for pc-table.
+ SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed;
+ SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed;
+
+ SanitizerCoverageOptions Options;
+};
+
+class ModuleSanitizerCoverageLegacyPass : public ModulePass {
+public:
+ ModuleSanitizerCoverageLegacyPass(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+ : ModulePass(ID), Options(Options) {
+ initializeModuleSanitizerCoverageLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override {
+ ModuleSanitizerCoverage ModuleSancov(Options);
+ auto DTCallback = [this](Function &F) -> const DominatorTree * {
+ return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+ };
+ auto PDTCallback = [this](Function &F) -> const PostDominatorTree * {
+ return &this->getAnalysis<PostDominatorTreeWrapperPass>(F)
+ .getPostDomTree();
+ };
+ return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback);
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+ StringRef getPassName() const override { return "ModuleSanitizerCoverage"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ }
+
+private:
+ SanitizerCoverageOptions Options;
+};
+
+} // namespace
+
+PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ ModuleSanitizerCoverage ModuleSancov(Options);
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto DTCallback = [&FAM](Function &F) -> const DominatorTree * {
+ return &FAM.getResult<DominatorTreeAnalysis>(F);
+ };
+ auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * {
+ return &FAM.getResult<PostDominatorTreeAnalysis>(F);
+ };
+ if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+std::pair<Value *, Value *>
+ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
+ Type *Ty) {
+ GlobalVariable *SecStart =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSectionStart(Section));
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionEnd(Section));
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+ IRBuilder<> IRB(M.getContext());
+ Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty);
+ if (!TargetTriple.isOSBinFormatCOFF())
+ return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr);
+
+ // Account for the fact that on windows-msvc __start_* symbols actually
+ // point to a uint64_t before the start of the array.
+ auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
+ auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
+ ConstantInt::get(IntptrTy, sizeof(uint64_t)));
+ return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
+}
+
+Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
+ Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
+ const char *Section) {
+ auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
+ auto SecStart = SecStartEnd.first;
+ auto SecEnd = SecStartEnd.second;
+ Function *CtorFunc;
+ std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+ assert(CtorFunc->getName() == CtorName);
+
+ if (TargetTriple.supportsCOMDAT()) {
+ // Use comdat to dedup CtorFunc.
+ CtorFunc->setComdat(M.getOrInsertComdat(CtorName));
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+ } else {
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+ }
+
+ if (TargetTriple.isOSBinFormatCOFF()) {
+ // In COFF files, if the contructors are set as COMDAT (they are because
+ // COFF supports COMDAT) and the linker flag /OPT:REF (strip unreferenced
+ // functions and data) is used, the constructors get stripped. To prevent
+ // this, give the constructors weak ODR linkage and ensure the linker knows
+ // to include the sancov constructor. This way the linker can deduplicate
+ // the constructors but always leave one copy.
+ CtorFunc->setLinkage(GlobalValue::WeakODRLinkage);
+ appendToUsed(M, CtorFunc);
+ }
+ return CtorFunc;
+}
+
+bool ModuleSanitizerCoverage::instrumentModule(
+ Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
+ return false;
+ C = &(M.getContext());
+ DL = &M.getDataLayout();
+ CurModule = &M;
+ CurModuleUniqueId = getUniqueModuleId(CurModule);
+ TargetTriple = Triple(M.getTargetTriple());
+ FunctionGuardArray = nullptr;
+ Function8bitCounterArray = nullptr;
+ FunctionPCsArray = nullptr;
+ IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
+ IntptrPtrTy = PointerType::getUnqual(IntptrTy);
+ Type *VoidTy = Type::getVoidTy(*C);
+ IRBuilder<> IRB(*C);
+ Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+ Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
+ Int64Ty = IRB.getInt64Ty();
+ Int32Ty = IRB.getInt32Ty();
+ Int16Ty = IRB.getInt16Ty();
+ Int8Ty = IRB.getInt8Ty();
+
+ SanCovTracePCIndir =
+ M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
+ // Make sure smaller parameters are zero-extended to i64 as required by the
+ // x86_64 ABI.
+ AttributeList SanCovTraceCmpZeroExtAL;
+ if (TargetTriple.getArch() == Triple::x86_64) {
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
+ }
+
+ SanCovTraceCmpFunction[0] =
+ M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt8Ty(), IRB.getInt8Ty());
+ SanCovTraceCmpFunction[1] =
+ M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt16Ty(), IRB.getInt16Ty());
+ SanCovTraceCmpFunction[2] =
+ M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt32Ty(), IRB.getInt32Ty());
+ SanCovTraceCmpFunction[3] =
+ M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction(
+ SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty);
+ SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction(
+ SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty);
+ SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction(
+ SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty);
+ SanCovTraceConstCmpFunction[3] =
+ M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ {
+ AttributeList AL;
+ if (TargetTriple.getArch() == Triple::x86_64)
+ AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceDivFunction[0] =
+ M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
+ }
+ SanCovTraceDivFunction[1] =
+ M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty);
+ SanCovTraceGepFunction =
+ M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
+ SanCovTraceSwitchFunction =
+ M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
+
+ Constant *SanCovLowestStackConstant =
+ M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
+ SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+ if (!SanCovLowestStack) {
+ C->emitError(StringRef("'") + SanCovLowestStackName +
+ "' should not be declared by the user");
+ return true;
+ }
+ SanCovLowestStack->setThreadLocalMode(
+ GlobalValue::ThreadLocalMode::InitialExecTLSModel);
+ if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
+ SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy));
+
+ // We insert an empty inline asm after cov callbacks to avoid callback merge.
+ EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+ StringRef(""), StringRef(""),
+ /*hasSideEffects=*/true);
+
+ SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+ SanCovTracePCGuard =
+ M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
+
+ for (auto &F : M)
+ instrumentFunction(F, DTCallback, PDTCallback);
+
+ Function *Ctor = nullptr;
+
+ if (FunctionGuardArray)
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+ SanCovTracePCGuardInitName, Int32PtrTy,
+ SanCovGuardsSectionName);
+ if (Function8bitCounterArray)
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+ SanCov8bitCountersInitName, Int8PtrTy,
+ SanCovCountersSectionName);
+ if (Ctor && Options.PCTable) {
+ auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
+ FunctionCallee InitFunction = declareSanitizerInitFunction(
+ M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
+ IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
+ IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
+ }
+ // We don't reference these arrays directly in any of our runtime functions,
+ // so we need to prevent them from being dead stripped.
+ if (TargetTriple.isOSBinFormatMachO())
+ appendToUsed(M, GlobalsToAppendToUsed);
+ appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed);
+ return true;
+}
+
+// True if block has successors and it dominates all of them.
+static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
+ if (succ_begin(BB) == succ_end(BB))
+ return false;
+
+ for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
+ if (!DT->dominates(BB, SUCC))
+ return false;
+ }
+
+ return true;
+}
+
+// True if block has predecessors and it postdominates all of them.
+static bool isFullPostDominator(const BasicBlock *BB,
+ const PostDominatorTree *PDT) {
+ if (pred_begin(BB) == pred_end(BB))
+ return false;
+
+ for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
+ if (!PDT->dominates(BB, PRED))
+ return false;
+ }
+
+ return true;
+}
+
+static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT,
+ const SanitizerCoverageOptions &Options) {
+ // Don't insert coverage for blocks containing nothing but unreachable: we
+ // will never call __sanitizer_cov() for them, so counting them in
+ // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
+ // percentage. Also, unreachable instructions frequently have no debug
+ // locations.
+ if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime()))
+ return false;
+
+ // Don't insert coverage into blocks without a valid insertion point
+ // (catchswitch blocks).
+ if (BB->getFirstInsertionPt() == BB->end())
+ return false;
+
+ if (Options.NoPrune || &F.getEntryBlock() == BB)
+ return true;
+
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function &&
+ &F.getEntryBlock() != BB)
+ return false;
+
+ // Do not instrument full dominators, or full post-dominators with multiple
+ // predecessors.
+ return !isFullDominator(BB, DT)
+ && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
+}
+
+
+// Returns true iff From->To is a backedge.
+// A twist here is that we treat From->To as a backedge if
+// * To dominates From or
+// * To->UniqueSuccessor dominates From
+static bool IsBackEdge(BasicBlock *From, BasicBlock *To,
+ const DominatorTree *DT) {
+ if (DT->dominates(To, From))
+ return true;
+ if (auto Next = To->getUniqueSuccessor())
+ if (DT->dominates(Next, From))
+ return true;
+ return false;
+}
+
+// Prunes uninteresting Cmp instrumentation:
+// * CMP instructions that feed into loop backedge branch.
+//
+// Note that Cmp pruning is controlled by the same flag as the
+// BB pruning.
+static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
+ const SanitizerCoverageOptions &Options) {
+ if (!Options.NoPrune)
+ if (CMP->hasOneUse())
+ if (auto BR = dyn_cast<BranchInst>(CMP->user_back()))
+ for (BasicBlock *B : BR->successors())
+ if (IsBackEdge(BR->getParent(), B, DT))
+ return false;
+ return true;
+}
+
+void ModuleSanitizerCoverage::instrumentFunction(
+ Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+ if (F.empty())
+ return;
+ if (F.getName().find(".module_ctor") != std::string::npos)
+ return; // Should not instrument sanitizer init functions.
+ if (F.getName().startswith("__sanitizer_"))
+ return; // Don't instrument __sanitizer_* callbacks.
+ // Don't touch available_externally functions, their actual body is elewhere.
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
+ return;
+ // Don't instrument MSVC CRT configuration helpers. They may run before normal
+ // initialization.
+ if (F.getName() == "__local_stdio_printf_options" ||
+ F.getName() == "__local_stdio_scanf_options")
+ return;
+ if (isa<UnreachableInst>(F.getEntryBlock().getTerminator()))
+ return;
+ // Don't instrument functions using SEH for now. Splitting basic blocks like
+ // we do for coverage breaks WinEHPrepare.
+ // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+ if (F.hasPersonalityFn() &&
+ isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ return;
+ if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
+ SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
+ SmallVector<Instruction *, 8> IndirCalls;
+ SmallVector<BasicBlock *, 16> BlocksToInstrument;
+ SmallVector<Instruction *, 8> CmpTraceTargets;
+ SmallVector<Instruction *, 8> SwitchTraceTargets;
+ SmallVector<BinaryOperator *, 8> DivTraceTargets;
+ SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
+
+ const DominatorTree *DT = DTCallback(F);
+ const PostDominatorTree *PDT = PDTCallback(F);
+ bool IsLeafFunc = true;
+
+ for (auto &BB : F) {
+ if (shouldInstrumentBlock(F, &BB, DT, PDT, Options))
+ BlocksToInstrument.push_back(&BB);
+ for (auto &Inst : BB) {
+ if (Options.IndirectCalls) {
+ CallSite CS(&Inst);
+ if (CS && !CS.getCalledFunction())
+ IndirCalls.push_back(&Inst);
+ }
+ if (Options.TraceCmp) {
+ if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst))
+ if (IsInterestingCmp(CMP, DT, Options))
+ CmpTraceTargets.push_back(&Inst);
+ if (isa<SwitchInst>(&Inst))
+ SwitchTraceTargets.push_back(&Inst);
+ }
+ if (Options.TraceDiv)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&Inst))
+ if (BO->getOpcode() == Instruction::SDiv ||
+ BO->getOpcode() == Instruction::UDiv)
+ DivTraceTargets.push_back(BO);
+ if (Options.TraceGep)
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))
+ GepTraceTargets.push_back(GEP);
+ if (Options.StackDepth)
+ if (isa<InvokeInst>(Inst) ||
+ (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))
+ IsLeafFunc = false;
+ }
+ }
+
+ InjectCoverage(F, BlocksToInstrument, IsLeafFunc);
+ InjectCoverageForIndirectCalls(F, IndirCalls);
+ InjectTraceForCmp(F, CmpTraceTargets);
+ InjectTraceForSwitch(F, SwitchTraceTargets);
+ InjectTraceForDiv(F, DivTraceTargets);
+ InjectTraceForGep(F, GepTraceTargets);
+}
+
+GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
+ size_t NumElements, Function &F, Type *Ty, const char *Section) {
+ ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+ auto Array = new GlobalVariable(
+ *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(ArrayTy), "__sancov_gen_");
+
+ if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
+ if (auto Comdat =
+ GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+ Array->setComdat(Comdat);
+ Array->setSection(getSectionName(Section));
+ Array->setAlignment(Align(Ty->isPointerTy()
+ ? DL->getPointerSize()
+ : Ty->getPrimitiveSizeInBits() / 8));
+ GlobalsToAppendToUsed.push_back(Array);
+ GlobalsToAppendToCompilerUsed.push_back(Array);
+ MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
+ Array->addMetadata(LLVMContext::MD_associated, *MD);
+
+ return Array;
+}
+
+GlobalVariable *
+ModuleSanitizerCoverage::CreatePCArray(Function &F,
+ ArrayRef<BasicBlock *> AllBlocks) {
+ size_t N = AllBlocks.size();
+ assert(N);
+ SmallVector<Constant *, 32> PCs;
+ IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt());
+ for (size_t i = 0; i < N; i++) {
+ if (&F.getEntryBlock() == AllBlocks[i]) {
+ PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreateIntToPtr(
+ ConstantInt::get(IntptrTy, 1), IntptrPtrTy));
+ } else {
+ PCs.push_back((Constant *)IRB.CreatePointerCast(
+ BlockAddress::get(AllBlocks[i]), IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreateIntToPtr(
+ ConstantInt::get(IntptrTy, 0), IntptrPtrTy));
+ }
+ }
+ auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy,
+ SanCovPCsSectionName);
+ PCArray->setInitializer(
+ ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs));
+ PCArray->setConstant(true);
+
+ return PCArray;
+}
+
+void ModuleSanitizerCoverage::CreateFunctionLocalArrays(
+ Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+ if (Options.TracePCGuard)
+ FunctionGuardArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
+
+ if (Options.Inline8bitCounters)
+ Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName);
+
+ if (Options.PCTable)
+ FunctionPCsArray = CreatePCArray(F, AllBlocks);
+}
+
+bool ModuleSanitizerCoverage::InjectCoverage(Function &F,
+ ArrayRef<BasicBlock *> AllBlocks,
+ bool IsLeafFunc) {
+ if (AllBlocks.empty()) return false;
+ CreateFunctionLocalArrays(F, AllBlocks);
+ for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
+ InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
+ return true;
+}
+
+// On every indirect call we call a run-time function
+// __sanitizer_cov_indir_call* with two parameters:
+// - callee address,
+// - global cache array that contains CacheSize pointers (zero-initialized).
+// The cache is used to speed up recording the caller-callee pairs.
+// The address of the caller is passed implicitly via caller PC.
+// CacheSize is encoded in the name of the run-time function.
+void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
+ Function &F, ArrayRef<Instruction *> IndirCalls) {
+ if (IndirCalls.empty())
+ return;
+ assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters);
+ for (auto I : IndirCalls) {
+ IRBuilder<> IRB(I);
+ CallSite CS(I);
+ Value *Callee = CS.getCalledValue();
+ if (isa<InlineAsm>(Callee))
+ continue;
+ IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy));
+ }
+}
+
+// For every switch statement we insert a call:
+// __sanitizer_cov_trace_switch(CondValue,
+// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
+
+void ModuleSanitizerCoverage::InjectTraceForSwitch(
+ Function &, ArrayRef<Instruction *> SwitchTraceTargets) {
+ for (auto I : SwitchTraceTargets) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+ IRBuilder<> IRB(I);
+ SmallVector<Constant *, 16> Initializers;
+ Value *Cond = SI->getCondition();
+ if (Cond->getType()->getScalarSizeInBits() >
+ Int64Ty->getScalarSizeInBits())
+ continue;
+ Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases()));
+ Initializers.push_back(
+ ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits()));
+ if (Cond->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
+ for (auto It : SI->cases()) {
+ Constant *C = It.getCaseValue();
+ if (C->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+ Initializers.push_back(C);
+ }
+ llvm::sort(Initializers.begin() + 2, Initializers.end(),
+ [](const Constant *A, const Constant *B) {
+ return cast<ConstantInt>(A)->getLimitedValue() <
+ cast<ConstantInt>(B)->getLimitedValue();
+ });
+ ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
+ GlobalVariable *GV = new GlobalVariable(
+ *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
+ ConstantArray::get(ArrayOfInt64Ty, Initializers),
+ "__sancov_gen_cov_switch_values");
+ IRB.CreateCall(SanCovTraceSwitchFunction,
+ {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+ }
+ }
+}
+
+void ModuleSanitizerCoverage::InjectTraceForDiv(
+ Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
+ for (auto BO : DivTraceTargets) {
+ IRBuilder<> IRB(BO);
+ Value *A1 = BO->getOperand(1);
+ if (isa<ConstantInt>(A1)) continue;
+ if (!A1->getType()->isIntegerTy())
+ continue;
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(A1->getType());
+ int CallbackIdx = TypeSize == 32 ? 0 :
+ TypeSize == 64 ? 1 : -1;
+ if (CallbackIdx < 0) continue;
+ auto Ty = Type::getIntNTy(*C, TypeSize);
+ IRB.CreateCall(SanCovTraceDivFunction[CallbackIdx],
+ {IRB.CreateIntCast(A1, Ty, true)});
+ }
+}
+
+void ModuleSanitizerCoverage::InjectTraceForGep(
+ Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
+ for (auto GEP : GepTraceTargets) {
+ IRBuilder<> IRB(GEP);
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+ if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+ IRB.CreateCall(SanCovTraceGepFunction,
+ {IRB.CreateIntCast(*I, IntptrTy, true)});
+ }
+}
+
+void ModuleSanitizerCoverage::InjectTraceForCmp(
+ Function &, ArrayRef<Instruction *> CmpTraceTargets) {
+ for (auto I : CmpTraceTargets) {
+ if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
+ IRBuilder<> IRB(ICMP);
+ Value *A0 = ICMP->getOperand(0);
+ Value *A1 = ICMP->getOperand(1);
+ if (!A0->getType()->isIntegerTy())
+ continue;
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+ int CallbackIdx = TypeSize == 8 ? 0 :
+ TypeSize == 16 ? 1 :
+ TypeSize == 32 ? 2 :
+ TypeSize == 64 ? 3 : -1;
+ if (CallbackIdx < 0) continue;
+ // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1);
+ auto CallbackFunc = SanCovTraceCmpFunction[CallbackIdx];
+ bool FirstIsConst = isa<ConstantInt>(A0);
+ bool SecondIsConst = isa<ConstantInt>(A1);
+ // If both are const, then we don't need such a comparison.
+ if (FirstIsConst && SecondIsConst) continue;
+ // If only one is const, then make it the first callback argument.
+ if (FirstIsConst || SecondIsConst) {
+ CallbackFunc = SanCovTraceConstCmpFunction[CallbackIdx];
+ if (SecondIsConst)
+ std::swap(A0, A1);
+ }
+
+ auto Ty = Type::getIntNTy(*C, TypeSize);
+ IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true),
+ IRB.CreateIntCast(A1, Ty, true)});
+ }
+ }
+}
+
+void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+ size_t Idx,
+ bool IsLeafFunc) {
+ BasicBlock::iterator IP = BB.getFirstInsertionPt();
+ bool IsEntryBB = &BB == &F.getEntryBlock();
+ DebugLoc EntryLoc;
+ if (IsEntryBB) {
+ if (auto SP = F.getSubprogram())
+ EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ // Keep static allocas and llvm.localescape calls in the entry block. Even
+ // if we aren't splitting the block, it's nice for allocas to be before
+ // calls.
+ IP = PrepareToSplitEntryBlock(BB, IP);
+ } else {
+ EntryLoc = IP->getDebugLoc();
+ }
+
+ IRBuilder<> IRB(&*IP);
+ IRB.SetCurrentDebugLocation(EntryLoc);
+ if (Options.TracePC) {
+ IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
+ IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
+ }
+ if (Options.TracePCGuard) {
+ auto GuardPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+ ConstantInt::get(IntptrTy, Idx * 4)),
+ Int32PtrTy);
+ IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
+ IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
+ }
+ if (Options.Inline8bitCounters) {
+ auto CounterPtr = IRB.CreateGEP(
+ Function8bitCounterArray->getValueType(), Function8bitCounterArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
+ auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+ auto Store = IRB.CreateStore(Inc, CounterPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+ }
+ if (Options.StackDepth && IsEntryBB && !IsLeafFunc) {
+ // Check stack depth. If it's the deepest so far, record it.
+ Module *M = F.getParent();
+ Function *GetFrameAddr = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ auto FrameAddrPtr =
+ IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
+ auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
+ auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack);
+ auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack);
+ auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
+ IRBuilder<> ThenIRB(ThenTerm);
+ auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack);
+ SetNoSanitizeMetadata(LowestStack);
+ SetNoSanitizeMetadata(Store);
+ }
+}
+
+std::string
+ModuleSanitizerCoverage::getSectionName(const std::string &Section) const {
+ if (TargetTriple.isOSBinFormatCOFF()) {
+ if (Section == SanCovCountersSectionName)
+ return ".SCOV$CM";
+ if (Section == SanCovPCsSectionName)
+ return ".SCOVP$M";
+ return ".SCOV$GM"; // For SanCovGuardsSectionName.
+ }
+ if (TargetTriple.isOSBinFormatMachO())
+ return "__DATA,__" + Section;
+ return "__" + Section;
+}
+
+std::string
+ModuleSanitizerCoverage::getSectionStart(const std::string &Section) const {
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$start$__DATA$__" + Section;
+ return "__start___" + Section;
+}
+
+std::string
+ModuleSanitizerCoverage::getSectionEnd(const std::string &Section) const {
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$end$__DATA$__" + Section;
+ return "__stop___" + Section;
+}
+
+char ModuleSanitizerCoverageLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
+ const SanitizerCoverageOptions &Options) {
+ return new ModuleSanitizerCoverageLegacyPass(Options);
+}
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
new file mode 100644
index 000000000000..ac274a155a80
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -0,0 +1,735 @@
+//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer, a race detector.
+//
+// The tool is under development, for the details about previous versions see
+// http://code.google.com/p/data-race-test
+//
+// The instrumentation phase is quite simple:
+// - Insert calls to run-time library before every memory access.
+// - Optimizations may apply to avoid instrumenting some of the accesses.
+// - Insert calls at function entry/exit.
+// The rest is handled by the run-time library.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "tsan"
+
+static cl::opt<bool> ClInstrumentMemoryAccesses(
+ "tsan-instrument-memory-accesses", cl::init(true),
+ cl::desc("Instrument memory accesses"), cl::Hidden);
+static cl::opt<bool> ClInstrumentFuncEntryExit(
+ "tsan-instrument-func-entry-exit", cl::init(true),
+ cl::desc("Instrument function entry and exit"), cl::Hidden);
+static cl::opt<bool> ClHandleCxxExceptions(
+ "tsan-handle-cxx-exceptions", cl::init(true),
+ cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"),
+ cl::Hidden);
+static cl::opt<bool> ClInstrumentAtomics(
+ "tsan-instrument-atomics", cl::init(true),
+ cl::desc("Instrument atomics"), cl::Hidden);
+static cl::opt<bool> ClInstrumentMemIntrinsics(
+ "tsan-instrument-memintrinsics", cl::init(true),
+ cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
+
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOmittedReadsBeforeWrite,
+ "Number of reads ignored due to following writes");
+STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
+STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
+STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads");
+STATISTIC(NumOmittedReadsFromConstantGlobals,
+ "Number of reads from constant globals");
+STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads");
+STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing");
+
+static const char *const kTsanModuleCtorName = "tsan.module_ctor";
+static const char *const kTsanInitName = "__tsan_init";
+
+namespace {
+
+/// ThreadSanitizer: instrument the code in module to find races.
+///
+/// Instantiating ThreadSanitizer inserts the tsan runtime library API function
+/// declarations into the module if they don't exist already. Instantiating
+/// ensures the __tsan_init function is in the list of global constructors for
+/// the module.
+struct ThreadSanitizer {
+ bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
+
+private:
+ void initialize(Module &M);
+ bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
+ bool instrumentAtomic(Instruction *I, const DataLayout &DL);
+ bool instrumentMemIntrinsic(Instruction *I);
+ void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
+ SmallVectorImpl<Instruction *> &All,
+ const DataLayout &DL);
+ bool addrPointsToConstantData(Value *Addr);
+ int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
+ void InsertRuntimeIgnores(Function &F);
+
+ Type *IntptrTy;
+ FunctionCallee TsanFuncEntry;
+ FunctionCallee TsanFuncExit;
+ FunctionCallee TsanIgnoreBegin;
+ FunctionCallee TsanIgnoreEnd;
+ // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+ static const size_t kNumberOfAccessSizes = 5;
+ FunctionCallee TsanRead[kNumberOfAccessSizes];
+ FunctionCallee TsanWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
+ FunctionCallee TsanUnalignedWrite[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
+ [kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicCAS[kNumberOfAccessSizes];
+ FunctionCallee TsanAtomicThreadFence;
+ FunctionCallee TsanAtomicSignalFence;
+ FunctionCallee TsanVptrUpdate;
+ FunctionCallee TsanVptrLoad;
+ FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
+};
+
+struct ThreadSanitizerLegacyPass : FunctionPass {
+ ThreadSanitizerLegacyPass() : FunctionPass(ID) {}
+ StringRef getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M) override;
+ static char ID; // Pass identification, replacement for typeid.
+private:
+ Optional<ThreadSanitizer> TSan;
+};
+
+void insertModuleCtor(Module &M) {
+ getOrCreateSanitizerCtorAndInitFunctions(
+ M, kTsanModuleCtorName, kTsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{},
+ // This callback is invoked when the functions are created the first
+ // time. Hook them into the global ctors list in that case:
+ [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); });
+}
+
+} // namespace
+
+PreservedAnalyses ThreadSanitizerPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ ThreadSanitizer TSan;
+ if (TSan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+PreservedAnalyses ThreadSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ insertModuleCtor(M);
+ return PreservedAnalyses::none();
+}
+
+char ThreadSanitizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ThreadSanitizerLegacyPass, "tsan",
+ "ThreadSanitizer: detects data races.", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ThreadSanitizerLegacyPass, "tsan",
+ "ThreadSanitizer: detects data races.", false, false)
+
+StringRef ThreadSanitizerLegacyPass::getPassName() const {
+ return "ThreadSanitizerLegacyPass";
+}
+
+void ThreadSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+bool ThreadSanitizerLegacyPass::doInitialization(Module &M) {
+ insertModuleCtor(M);
+ TSan.emplace();
+ return true;
+}
+
+bool ThreadSanitizerLegacyPass::runOnFunction(Function &F) {
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ TSan->sanitizeFunction(F, TLI);
+ return true;
+}
+
+FunctionPass *llvm::createThreadSanitizerLegacyPassPass() {
+ return new ThreadSanitizerLegacyPass();
+}
+
+void ThreadSanitizer::initialize(Module &M) {
+ const DataLayout &DL = M.getDataLayout();
+ IntptrTy = DL.getIntPtrType(M.getContext());
+
+ IRBuilder<> IRB(M.getContext());
+ AttributeList Attr;
+ Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
+ // Initialize the callbacks.
+ TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
+ IRB.getVoidTy(), IRB.getInt8PtrTy());
+ TsanFuncExit =
+ M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy());
+ TsanIgnoreBegin = M.getOrInsertFunction("__tsan_ignore_thread_begin", Attr,
+ IRB.getVoidTy());
+ TsanIgnoreEnd =
+ M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
+ IntegerType *OrdTy = IRB.getInt32Ty();
+ for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
+ const unsigned ByteSize = 1U << i;
+ const unsigned BitSize = ByteSize * 8;
+ std::string ByteSizeStr = utostr(ByteSize);
+ std::string BitSizeStr = utostr(BitSize);
+ SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
+ TsanRead[i] = M.getOrInsertFunction(ReadName, Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy());
+
+ SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
+ TsanWrite[i] = M.getOrInsertFunction(WriteName, Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy());
+
+ SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
+ TsanUnalignedRead[i] = M.getOrInsertFunction(
+ UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+ SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
+ TsanUnalignedWrite[i] = M.getOrInsertFunction(
+ UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+ Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
+ TsanAtomicLoad[i] =
+ M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy);
+
+ SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
+ TsanAtomicStore[i] = M.getOrInsertFunction(
+ AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy);
+
+ for (int op = AtomicRMWInst::FIRST_BINOP;
+ op <= AtomicRMWInst::LAST_BINOP; ++op) {
+ TsanAtomicRMW[op][i] = nullptr;
+ const char *NamePart = nullptr;
+ if (op == AtomicRMWInst::Xchg)
+ NamePart = "_exchange";
+ else if (op == AtomicRMWInst::Add)
+ NamePart = "_fetch_add";
+ else if (op == AtomicRMWInst::Sub)
+ NamePart = "_fetch_sub";
+ else if (op == AtomicRMWInst::And)
+ NamePart = "_fetch_and";
+ else if (op == AtomicRMWInst::Or)
+ NamePart = "_fetch_or";
+ else if (op == AtomicRMWInst::Xor)
+ NamePart = "_fetch_xor";
+ else if (op == AtomicRMWInst::Nand)
+ NamePart = "_fetch_nand";
+ else
+ continue;
+ SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
+ TsanAtomicRMW[op][i] =
+ M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy);
+ }
+
+ SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
+ "_compare_exchange_val");
+ TsanAtomicCAS[i] = M.getOrInsertFunction(AtomicCASName, Attr, Ty, PtrTy, Ty,
+ Ty, OrdTy, OrdTy);
+ }
+ TsanVptrUpdate =
+ M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
+ TsanVptrLoad = M.getOrInsertFunction("__tsan_vptr_read", Attr,
+ IRB.getVoidTy(), IRB.getInt8PtrTy());
+ TsanAtomicThreadFence = M.getOrInsertFunction("__tsan_atomic_thread_fence",
+ Attr, IRB.getVoidTy(), OrdTy);
+ TsanAtomicSignalFence = M.getOrInsertFunction("__tsan_atomic_signal_fence",
+ Attr, IRB.getVoidTy(), OrdTy);
+
+ MemmoveFn =
+ M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemcpyFn =
+ M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
+ MemsetFn =
+ M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
+}
+
+static bool isVtableAccess(Instruction *I) {
+ if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
+ return Tag->isTBAAVtableAccess();
+ return false;
+}
+
+// Do not instrument known races/"benign races" that come from compiler
+// instrumentatin. The user has no way of suppressing them.
+static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
+ // Peel off GEPs and BitCasts.
+ Addr = Addr->stripInBoundsOffsets();
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ if (GV->hasSection()) {
+ StringRef SectionName = GV->getSection();
+ // Check if the global is in the PGO counters section.
+ auto OF = Triple(M->getTargetTriple()).getObjectFormat();
+ if (SectionName.endswith(
+ getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
+ return false;
+ }
+
+ // Check if the global is private gcov data.
+ if (GV->getName().startswith("__llvm_gcov") ||
+ GV->getName().startswith("__llvm_gcda"))
+ return false;
+ }
+
+ // Do not instrument acesses from different address spaces; we cannot deal
+ // with them.
+ if (Addr) {
+ Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+ if (PtrTy->getPointerAddressSpace() != 0)
+ return false;
+ }
+
+ return true;
+}
+
+bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
+ // If this is a GEP, just analyze its pointer operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
+ Addr = GEP->getPointerOperand();
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+ if (GV->isConstant()) {
+ // Reads from constant globals can not race with any writes.
+ NumOmittedReadsFromConstantGlobals++;
+ return true;
+ }
+ } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+ if (isVtableAccess(L)) {
+ // Reads from a vtable pointer can not race with any writes.
+ NumOmittedReadsFromVtable++;
+ return true;
+ }
+ }
+ return false;
+}
+
+// Instrumenting some of the accesses may be proven redundant.
+// Currently handled:
+// - read-before-write (within same BB, no calls between)
+// - not captured variables
+//
+// We do not handle some of the patterns that should not survive
+// after the classic compiler optimizations.
+// E.g. two reads from the same temp should be eliminated by CSE,
+// two writes should be eliminated by DSE, etc.
+//
+// 'Local' is a vector of insns within the same BB (no calls between).
+// 'All' is a vector of insns that will be instrumented.
+void ThreadSanitizer::chooseInstructionsToInstrument(
+ SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
+ const DataLayout &DL) {
+ SmallPtrSet<Value*, 8> WriteTargets;
+ // Iterate from the end.
+ for (Instruction *I : reverse(Local)) {
+ if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
+ Value *Addr = Store->getPointerOperand();
+ if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
+ continue;
+ WriteTargets.insert(Addr);
+ } else {
+ LoadInst *Load = cast<LoadInst>(I);
+ Value *Addr = Load->getPointerOperand();
+ if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
+ continue;
+ if (WriteTargets.count(Addr)) {
+ // We will write to this temp, so no reason to analyze the read.
+ NumOmittedReadsBeforeWrite++;
+ continue;
+ }
+ if (addrPointsToConstantData(Addr)) {
+ // Addr points to some constant data -- it can not race with any writes.
+ continue;
+ }
+ }
+ Value *Addr = isa<StoreInst>(*I)
+ ? cast<StoreInst>(I)->getPointerOperand()
+ : cast<LoadInst>(I)->getPointerOperand();
+ if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+ !PointerMayBeCaptured(Addr, true, true)) {
+ // The variable is addressable but not captured, so it cannot be
+ // referenced from a different thread and participate in a data race
+ // (see llvm/Analysis/CaptureTracking.h for details).
+ NumOmittedNonCaptured++;
+ continue;
+ }
+ All.push_back(I);
+ }
+ Local.clear();
+}
+
+static bool isAtomic(Instruction *I) {
+ // TODO: Ask TTI whether synchronization scope is between threads.
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread;
+ if (isa<AtomicRMWInst>(I))
+ return true;
+ if (isa<AtomicCmpXchgInst>(I))
+ return true;
+ if (isa<FenceInst>(I))
+ return true;
+ return false;
+}
+
+void ThreadSanitizer::InsertRuntimeIgnores(Function &F) {
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ IRB.CreateCall(TsanIgnoreBegin);
+ EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions);
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ AtExit->CreateCall(TsanIgnoreEnd);
+ }
+}
+
+bool ThreadSanitizer::sanitizeFunction(Function &F,
+ const TargetLibraryInfo &TLI) {
+ // This is required to prevent instrumenting call to __tsan_init from within
+ // the module constructor.
+ if (F.getName() == kTsanModuleCtorName)
+ return false;
+ initialize(*F.getParent());
+ SmallVector<Instruction*, 8> AllLoadsAndStores;
+ SmallVector<Instruction*, 8> LocalLoadsAndStores;
+ SmallVector<Instruction*, 8> AtomicAccesses;
+ SmallVector<Instruction*, 8> MemIntrinCalls;
+ bool Res = false;
+ bool HasCalls = false;
+ bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ // Traverse all instructions, collect loads/stores/returns, check for calls.
+ for (auto &BB : F) {
+ for (auto &Inst : BB) {
+ if (isAtomic(&Inst))
+ AtomicAccesses.push_back(&Inst);
+ else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+ LocalLoadsAndStores.push_back(&Inst);
+ else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+ maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
+ if (isa<MemIntrinsic>(Inst))
+ MemIntrinCalls.push_back(&Inst);
+ HasCalls = true;
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
+ DL);
+ }
+ }
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL);
+ }
+
+ // We have collected all loads and stores.
+ // FIXME: many of these accesses do not need to be checked for races
+ // (e.g. variables that do not escape, etc).
+
+ // Instrument memory accesses only if we want to report bugs in the function.
+ if (ClInstrumentMemoryAccesses && SanitizeFunction)
+ for (auto Inst : AllLoadsAndStores) {
+ Res |= instrumentLoadOrStore(Inst, DL);
+ }
+
+ // Instrument atomic memory accesses in any case (they can be used to
+ // implement synchronization).
+ if (ClInstrumentAtomics)
+ for (auto Inst : AtomicAccesses) {
+ Res |= instrumentAtomic(Inst, DL);
+ }
+
+ if (ClInstrumentMemIntrinsics && SanitizeFunction)
+ for (auto Inst : MemIntrinCalls) {
+ Res |= instrumentMemIntrinsic(Inst);
+ }
+
+ if (F.hasFnAttribute("sanitize_thread_no_checking_at_run_time")) {
+ assert(!F.hasFnAttribute(Attribute::SanitizeThread));
+ if (HasCalls)
+ InsertRuntimeIgnores(F);
+ }
+
+ // Instrument function entry/exit points if there were instrumented accesses.
+ if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
+ IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+ Value *ReturnAddress = IRB.CreateCall(
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
+ IRB.getInt32(0));
+ IRB.CreateCall(TsanFuncEntry, ReturnAddress);
+
+ EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions);
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ AtExit->CreateCall(TsanFuncExit, {});
+ }
+ Res = true;
+ }
+ return Res;
+}
+
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
+ const DataLayout &DL) {
+ IRBuilder<> IRB(I);
+ bool IsWrite = isa<StoreInst>(*I);
+ Value *Addr = IsWrite
+ ? cast<StoreInst>(I)->getPointerOperand()
+ : cast<LoadInst>(I)->getPointerOperand();
+
+ // swifterror memory addresses are mem2reg promoted by instruction selection.
+ // As such they cannot have regular uses like an instrumentation function and
+ // it makes no sense to track them as memory.
+ if (Addr->isSwiftError())
+ return false;
+
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
+ if (Idx < 0)
+ return false;
+ if (IsWrite && isVtableAccess(I)) {
+ LLVM_DEBUG(dbgs() << " VPTR : " << *I << "\n");
+ Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+ // StoredValue may be a vector type if we are storing several vptrs at once.
+ // In this case, just take the first element of the vector since this is
+ // enough to find vptr races.
+ if (isa<VectorType>(StoredValue->getType()))
+ StoredValue = IRB.CreateExtractElement(
+ StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0));
+ if (StoredValue->getType()->isIntegerTy())
+ StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy());
+ // Call TsanVptrUpdate.
+ IRB.CreateCall(TsanVptrUpdate,
+ {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())});
+ NumInstrumentedVtableWrites++;
+ return true;
+ }
+ if (!IsWrite && isVtableAccess(I)) {
+ IRB.CreateCall(TsanVptrLoad,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ NumInstrumentedVtableReads++;
+ return true;
+ }
+ const unsigned Alignment = IsWrite
+ ? cast<StoreInst>(I)->getAlignment()
+ : cast<LoadInst>(I)->getAlignment();
+ Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
+ const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
+ FunctionCallee OnAccessFunc = nullptr;
+ if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
+ OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+ else
+ OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
+ IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+ if (IsWrite) NumInstrumentedWrites++;
+ else NumInstrumentedReads++;
+ return true;
+}
+
+static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+ uint32_t v = 0;
+ switch (ord) {
+ case AtomicOrdering::NotAtomic:
+ llvm_unreachable("unexpected atomic ordering!");
+ case AtomicOrdering::Unordered: LLVM_FALLTHROUGH;
+ case AtomicOrdering::Monotonic: v = 0; break;
+ // Not specified yet:
+ // case AtomicOrdering::Consume: v = 1; break;
+ case AtomicOrdering::Acquire: v = 2; break;
+ case AtomicOrdering::Release: v = 3; break;
+ case AtomicOrdering::AcquireRelease: v = 4; break;
+ case AtomicOrdering::SequentiallyConsistent: v = 5; break;
+ }
+ return IRB->getInt32(v);
+}
+
+// If a memset intrinsic gets inlined by the code gen, we will miss races on it.
+// So, we either need to ensure the intrinsic is not inlined, or instrument it.
+// We do not instrument memset/memmove/memcpy intrinsics (too complicated),
+// instead we simply replace them with regular function calls, which are then
+// intercepted by the run-time.
+// Since tsan is running after everyone else, the calls should not be
+// replaced back with intrinsics. If that becomes wrong at some point,
+// we will need to call e.g. __tsan_memset to avoid the intrinsics.
+bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
+ IRBuilder<> IRB(I);
+ if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
+ IRB.CreateCall(
+ MemsetFn,
+ {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+ I->eraseFromParent();
+ } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) {
+ IRB.CreateCall(
+ isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn,
+ {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)});
+ I->eraseFromParent();
+ }
+ return false;
+}
+
+// Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
+// standards. For background see C++11 standard. A slightly older, publicly
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
+bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
+ IRBuilder<> IRB(I);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ Value *Addr = LI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
+ if (Idx < 0)
+ return false;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ createOrdering(&IRB, LI->getOrdering())};
+ Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
+ Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args);
+ Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy);
+ I->replaceAllUsesWith(Cast);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ Value *Addr = SI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
+ if (Idx < 0)
+ return false;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty),
+ createOrdering(&IRB, SI->getOrdering())};
+ CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
+ ReplaceInstWithInst(I, C);
+ } else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
+ Value *Addr = RMWI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
+ if (Idx < 0)
+ return false;
+ FunctionCallee F = TsanAtomicRMW[RMWI->getOperation()][Idx];
+ if (!F)
+ return false;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ IRB.CreateIntCast(RMWI->getValOperand(), Ty, false),
+ createOrdering(&IRB, RMWI->getOrdering())};
+ CallInst *C = CallInst::Create(F, Args);
+ ReplaceInstWithInst(I, C);
+ } else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
+ Value *Addr = CASI->getPointerOperand();
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
+ if (Idx < 0)
+ return false;
+ const unsigned ByteSize = 1U << Idx;
+ const unsigned BitSize = ByteSize * 8;
+ Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+ Type *PtrTy = Ty->getPointerTo();
+ Value *CmpOperand =
+ IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty);
+ Value *NewOperand =
+ IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty);
+ Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+ CmpOperand,
+ NewOperand,
+ createOrdering(&IRB, CASI->getSuccessOrdering()),
+ createOrdering(&IRB, CASI->getFailureOrdering())};
+ CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args);
+ Value *Success = IRB.CreateICmpEQ(C, CmpOperand);
+ Value *OldVal = C;
+ Type *OrigOldValTy = CASI->getNewValOperand()->getType();
+ if (Ty != OrigOldValTy) {
+ // The value is a pointer, so we need to cast the return value.
+ OldVal = IRB.CreateIntToPtr(C, OrigOldValTy);
+ }
+
+ Value *Res =
+ IRB.CreateInsertValue(UndefValue::get(CASI->getType()), OldVal, 0);
+ Res = IRB.CreateInsertValue(Res, Success, 1);
+
+ I->replaceAllUsesWith(Res);
+ I->eraseFromParent();
+ } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
+ Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
+ FunctionCallee F = FI->getSyncScopeID() == SyncScope::SingleThread
+ ? TsanAtomicSignalFence
+ : TsanAtomicThreadFence;
+ CallInst *C = CallInst::Create(F, Args);
+ ReplaceInstWithInst(I, C);
+ }
+ return true;
+}
+
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr,
+ const DataLayout &DL) {
+ Type *OrigPtrTy = Addr->getType();
+ Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+ assert(OrigTy->isSized());
+ uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
+ if (TypeSize != 8 && TypeSize != 16 &&
+ TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+ NumAccessesWithBadSize++;
+ // Ignore all unusual sizes.
+ return -1;
+ }
+ size_t Idx = countTrailingZeros(TypeSize / 8);
+ assert(Idx < kNumberOfAccessSizes);
+ return Idx;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
new file mode 100644
index 000000000000..604726d4f40f
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
@@ -0,0 +1,78 @@
+//===- ValueProfileCollector.cpp - determine what to value profile --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation of the ValueProfileCollector via ValueProfileCollectorImpl
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueProfilePlugins.inc"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+/// A plugin-based class that takes an arbitrary number of Plugin types.
+/// Each plugin type must satisfy the following API:
+/// 1) the constructor must take a `Function &f`. Typically, the plugin would
+/// scan the function looking for candidates.
+/// 2) contain a member function with the following signature and name:
+/// void run(std::vector<CandidateInfo> &Candidates);
+/// such that the plugin would append its result into the vector parameter.
+///
+/// Plugins are defined in ValueProfilePlugins.inc
+template <class... Ts> class PluginChain;
+
+/// The type PluginChainFinal is the final chain of plugins that will be used by
+/// ValueProfileCollectorImpl.
+using PluginChainFinal = PluginChain<VP_PLUGIN_LIST>;
+
+template <> class PluginChain<> {
+public:
+ PluginChain(Function &F) {}
+ void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {}
+};
+
+template <class PluginT, class... Ts>
+class PluginChain<PluginT, Ts...> : public PluginChain<Ts...> {
+ PluginT Plugin;
+ using Base = PluginChain<Ts...>;
+
+public:
+ PluginChain(Function &F) : PluginChain<Ts...>(F), Plugin(F) {}
+
+ void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {
+ if (K == PluginT::Kind)
+ Plugin.run(Candidates);
+ Base::get(K, Candidates);
+ }
+};
+
+} // end anonymous namespace
+
+/// ValueProfileCollectorImpl inherits the API of PluginChainFinal.
+class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal {
+public:
+ using PluginChainFinal::PluginChainFinal;
+};
+
+ValueProfileCollector::ValueProfileCollector(Function &F)
+ : PImpl(new ValueProfileCollectorImpl(F)) {}
+
+ValueProfileCollector::~ValueProfileCollector() = default;
+
+std::vector<CandidateInfo>
+ValueProfileCollector::get(InstrProfValueKind Kind) const {
+ std::vector<CandidateInfo> Result;
+ PImpl->get(Kind, Result);
+ return Result;
+}
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
new file mode 100644
index 000000000000..ff883c8d0c77
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
@@ -0,0 +1,79 @@
+//===- ValueProfileCollector.h - determine what to value profile ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a utility class, ValueProfileCollector, that is used to
+// determine what kind of llvm::Value's are worth value-profiling, at which
+// point in the program, and which instruction holds the Value Profile metadata.
+// Currently, the only users of this utility is the PGOInstrumentation[Gen|Use]
+// passes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
+#define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+
+namespace llvm {
+
+/// Utility analysis that determines what values are worth profiling.
+/// The actual logic is inside the ValueProfileCollectorImpl, whose job is to
+/// populate the Candidates vector.
+///
+/// Value profiling an expression means to track the values that this expression
+/// takes at runtime and the frequency of each value.
+/// It is important to distinguish between two sets of value profiles for a
+/// particular expression:
+/// 1) The set of values at the point of evaluation.
+/// 2) The set of values at the point of use.
+/// In some cases, the two sets are identical, but it's not unusual for the two
+/// to differ.
+///
+/// To elaborate more, consider this C code, and focus on the expression `nn`:
+/// void foo(int nn, bool b) {
+/// if (b) memcpy(x, y, nn);
+/// }
+/// The point of evaluation can be as early as the start of the function, and
+/// let's say the value profile for `nn` is:
+/// total=100; (value,freq) set = {(8,10), (32,50)}
+/// The point of use is right before we call memcpy, and since we execute the
+/// memcpy conditionally, the value profile of `nn` can be:
+/// total=15; (value,freq) set = {(8,10), (4,5)}
+///
+/// For this reason, a plugin is responsible for computing the insertion point
+/// for each value to be profiled. The `CandidateInfo` structure encapsulates
+/// all the information needed for each value profile site.
+class ValueProfileCollector {
+public:
+ struct CandidateInfo {
+ Value *V; // The value to profile.
+ Instruction *InsertPt; // Insert the VP lib call before this instr.
+ Instruction *AnnotatedInst; // Where metadata is attached.
+ };
+
+ ValueProfileCollector(Function &Fn);
+ ValueProfileCollector(ValueProfileCollector &&) = delete;
+ ValueProfileCollector &operator=(ValueProfileCollector &&) = delete;
+
+ ValueProfileCollector(const ValueProfileCollector &) = delete;
+ ValueProfileCollector &operator=(const ValueProfileCollector &) = delete;
+ ~ValueProfileCollector();
+
+ /// returns a list of value profiling candidates of the given kind
+ std::vector<CandidateInfo> get(InstrProfValueKind Kind) const;
+
+private:
+ class ValueProfileCollectorImpl;
+ std::unique_ptr<ValueProfileCollectorImpl> PImpl;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
new file mode 100644
index 000000000000..4cc4c6c848c3
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -0,0 +1,75 @@
+//=== ValueProfilePlugins.inc - set of plugins used by ValueProfileCollector =//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of plugin classes used in ValueProfileCollectorImpl.
+// Each plugin is responsible for collecting Value Profiling candidates for a
+// particular optimization.
+// Each plugin must satisfy the interface described in ValueProfileCollector.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueProfileCollector.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
+#include "llvm/IR/InstVisitor.h"
+
+using namespace llvm;
+using CandidateInfo = ValueProfileCollector::CandidateInfo;
+
+///--------------------------- MemIntrinsicPlugin ------------------------------
+class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> {
+ Function &F;
+ std::vector<CandidateInfo> *Candidates;
+
+public:
+ static constexpr InstrProfValueKind Kind = IPVK_MemOPSize;
+
+ MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {}
+
+ void run(std::vector<CandidateInfo> &Cs) {
+ Candidates = &Cs;
+ visit(F);
+ Candidates = nullptr;
+ }
+ void visitMemIntrinsic(MemIntrinsic &MI) {
+ Value *Length = MI.getLength();
+ // Not instrument constant length calls.
+ if (dyn_cast<ConstantInt>(Length))
+ return;
+
+ Instruction *InsertPt = &MI;
+ Instruction *AnnotatedInst = &MI;
+ Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst});
+ }
+};
+
+///------------------------ IndirectCallPromotionPlugin ------------------------
+class IndirectCallPromotionPlugin {
+ Function &F;
+
+public:
+ static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget;
+
+ IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {}
+
+ void run(std::vector<CandidateInfo> &Candidates) {
+ std::vector<Instruction *> Result = findIndirectCalls(F);
+ for (Instruction *I : Result) {
+ Value *Callee = CallSite(I).getCalledValue();
+ Instruction *InsertPt = I;
+ Instruction *AnnotatedInst = I;
+ Candidates.emplace_back(CandidateInfo{Callee, InsertPt, AnnotatedInst});
+ }
+ }
+};
+
+///----------------------- Registration of the plugins -------------------------
+/// For now, registering a plugin with the ValueProfileCollector is done by
+/// adding the plugin type to the VP_PLUGIN_LIST macro.
+#define VP_PLUGIN_LIST \
+ MemIntrinsicPlugin, \
+ IndirectCallPromotionPlugin