summaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp5
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp (renamed from llvm/lib/Target/Hexagon/RDFGraph.cpp)10
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp (renamed from llvm/lib/Target/Hexagon/RDFLiveness.cpp)6
-rw-r--r--llvm/lib/CodeGen/RDFRegisters.cpp (renamed from llvm/lib/Target/Hexagon/RDFRegisters.cpp)2
-rw-r--r--llvm/lib/LTO/LTO.cpp28
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp32
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.h6
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.h4
-rw-r--r--llvm/lib/Target/Hexagon/RDFGraph.h968
-rw-r--r--llvm/lib/Target/Hexagon/RDFLiveness.h151
-rw-r--r--llvm/lib/Target/Hexagon/RDFRegisters.h240
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td1
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp27
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td17
-rw-r--r--llvm/lib/Target/X86/ImmutableGraph.h446
-rw-r--r--llvm/lib/Target/X86/X86.h8
-rw-r--r--llvm/lib/Target/X86/X86.td16
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp4
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp81
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86IndirectThunks.cpp364
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td22
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td4
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp900
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp143
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp8
-rw-r--r--llvm/lib/Target/X86/X86RetpolineThunks.cpp286
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h27
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp9
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp2
41 files changed, 2129 insertions, 1754 deletions
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index e852d663c6b4..e439c94a7325 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -2059,12 +2059,13 @@ char BasicAAWrapperPass::ID = 0;
void BasicAAWrapperPass::anchor() {}
INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", false, true)
+ "Basic Alias Analysis (stateless AA impl)", true, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
- "Basic Alias Analysis (stateless AA impl)", false, true)
+ "Basic Alias Analysis (stateless AA impl)", true, true)
FunctionPass *llvm::createBasicAAWrapperPass() {
return new BasicAAWrapperPass();
diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index 0cb35dc98819..437a6b030096 100644
--- a/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,8 +8,6 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -20,6 +18,8 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
if (RegisterId R = TLI.getExceptionPointerRegister(PF))
LR.insert(RegisterRef(R));
- if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
- LR.insert(RegisterRef(R));
+ if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
+ if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
+ LR.insert(RegisterRef(R));
+ }
return LR;
}
diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index e2c007c9d01a..0bcd27f8ea45 100644
--- a/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,9 +22,6 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
-#include "RDFLiveness.h"
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -33,6 +30,9 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp
index b5675784e34b..bd8661816e71 100644
--- a/llvm/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 297b11de17a9..fa2f0777897b 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey(
// Include the hash for the current module
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+
+ std::vector<uint64_t> ExportsGUID;
+ ExportsGUID.reserve(ExportList.size());
for (const auto &VI : ExportList) {
auto GUID = VI.getGUID();
+ ExportsGUID.push_back(GUID);
+ }
+
+ // Sort the export list elements GUIDs.
+ llvm::sort(ExportsGUID);
+ for (uint64_t GUID : ExportsGUID) {
// The export list can impact the internalization, be conservative here
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
}
@@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey(
// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
// sensitive to link order, so include that as well.
- for (auto &Entry : ImportList) {
- auto ModHash = Index.getModuleHash(Entry.first());
+ using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
+ std::vector<ImportMapIteratorTy> ImportModulesVector;
+ ImportModulesVector.reserve(ImportList.size());
+
+ for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
+ ++It) {
+ ImportModulesVector.push_back(It);
+ }
+ llvm::sort(ImportModulesVector,
+ [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
+ -> bool { return Lhs->getKey() < Rhs->getKey(); });
+ for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
+ auto ModHash = Index.getModuleHash(EntryIt->first());
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- AddUint64(Entry.second.size());
- for (auto &Fn : Entry.second)
+ AddUint64(EntryIt->second.size());
+ for (auto &Fn : EntryIt->second)
AddUint64(Fn);
}
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 6f5f58554d09..d407edfbd966 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
<< val << '\n');
- SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+ SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));
// After replacement, the current node is dead, we need to
// go backward one step to make iterator still work
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index a9fb04f20d1c..6daeb3b4b63b 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
TypeId = DIToIdMap[Ty];
+
+ // To handle the case like the following:
+ // struct t;
+ // typedef struct t _t;
+ // struct s1 { _t *c; };
+ // int test1(struct s1 *arg) { ... }
+ //
+ // struct t { int a; int b; };
+ // struct s2 { _t c; }
+ // int test2(struct s2 *arg) { ... }
+ //
+ // During traversing test1() argument, "_t" is recorded
+ // in DIToIdMap and a forward declaration fixup is created
+ // for "struct t" to avoid pointee type traversal.
+ //
+ // During traversing test2() argument, even if we see "_t" is
+ // already defined, we should keep moving to eventually
+ // bring in types for "struct t". Otherwise, the "struct s2"
+ // definition won't be correct.
+ if (Ty && (!CheckPointer || !SeenPointer)) {
+ if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
+ Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
+ uint32_t TmpTypeId;
+ visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
+ SeenPointer);
+ }
+ }
+ }
+
return;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 886034d9601a..f1fe51f5e54f 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -12,9 +12,6 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
@@ -27,6 +24,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 517ad1c6ee7b..f26e23befde2 100644
--- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -11,9 +11,6 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "RDFCopy.h"
#include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -24,6 +21,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
index a9d39fd4b2dc..34d58f0a7a23 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -11,13 +11,13 @@
//===----------------------------------------------------------------------===//
#include "RDFCopy.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h
index 1450ab884849..99b18a75d8c2 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
#include <vector>
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index af86c7b1956b..5a98debd3c00 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -9,13 +9,13 @@
// RDF-based generic dead code elimination.
#include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/Support/Debug.h"
#include <queue>
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h
index 7f91977e1d6c..859c8161d355 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.h
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -23,8 +23,8 @@
#ifndef RDF_DEADCODE_H
#define RDF_DEADCODE_H
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/RDFGraph.h b/llvm/lib/Target/Hexagon/RDFGraph.h
deleted file mode 100644
index 585f43e116f9..000000000000
--- a/llvm/lib/Target/Hexagon/RDFGraph.h
+++ /dev/null
@@ -1,968 +0,0 @@
-//===- RDFGraph.h -----------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Target-independent, SSA-based data flow graph for register data flow (RDF)
-// for a non-SSA program representation (e.g. post-RA machine code).
-//
-//
-// *** Introduction
-//
-// The RDF graph is a collection of nodes, each of which denotes some element
-// of the program. There are two main types of such elements: code and refe-
-// rences. Conceptually, "code" is something that represents the structure
-// of the program, e.g. basic block or a statement, while "reference" is an
-// instance of accessing a register, e.g. a definition or a use. Nodes are
-// connected with each other based on the structure of the program (such as
-// blocks, instructions, etc.), and based on the data flow (e.g. reaching
-// definitions, reached uses, etc.). The single-reaching-definition principle
-// of SSA is generally observed, although, due to the non-SSA representation
-// of the program, there are some differences between the graph and a "pure"
-// SSA representation.
-//
-//
-// *** Implementation remarks
-//
-// Since the graph can contain a large number of nodes, memory consumption
-// was one of the major design considerations. As a result, there is a single
-// base class NodeBase which defines all members used by all possible derived
-// classes. The members are arranged in a union, and a derived class cannot
-// add any data members of its own. Each derived class only defines the
-// functional interface, i.e. member functions. NodeBase must be a POD,
-// which implies that all of its members must also be PODs.
-// Since nodes need to be connected with other nodes, pointers have been
-// replaced with 32-bit identifiers: each node has an id of type NodeId.
-// There are mapping functions in the graph that translate between actual
-// memory addresses and the corresponding identifiers.
-// A node id of 0 is equivalent to nullptr.
-//
-//
-// *** Structure of the graph
-//
-// A code node is always a collection of other nodes. For example, a code
-// node corresponding to a basic block will contain code nodes corresponding
-// to instructions. In turn, a code node corresponding to an instruction will
-// contain a list of reference nodes that correspond to the definitions and
-// uses of registers in that instruction. The members are arranged into a
-// circular list, which is yet another consequence of the effort to save
-// memory: for each member node it should be possible to obtain its owner,
-// and it should be possible to access all other members. There are other
-// ways to accomplish that, but the circular list seemed the most natural.
-//
-// +- CodeNode -+
-// | | <---------------------------------------------------+
-// +-+--------+-+ |
-// |FirstM |LastM |
-// | +-------------------------------------+ |
-// | | |
-// V V |
-// +----------+ Next +----------+ Next Next +----------+ Next |
-// | |----->| |-----> ... ----->| |----->-+
-// +- Member -+ +- Member -+ +- Member -+
-//
-// The order of members is such that related reference nodes (see below)
-// should be contiguous on the member list.
-//
-// A reference node is a node that encapsulates an access to a register,
-// in other words, data flowing into or out of a register. There are two
-// major kinds of reference nodes: defs and uses. A def node will contain
-// the id of the first reached use, and the id of the first reached def.
-// Each def and use will contain the id of the reaching def, and also the
-// id of the next reached def (for def nodes) or use (for use nodes).
-// The "next node sharing the same reaching def" is denoted as "sibling".
-// In summary:
-// - Def node contains: reaching def, sibling, first reached def, and first
-// reached use.
-// - Use node contains: reaching def and sibling.
-//
-// +-- DefNode --+
-// | R2 = ... | <---+--------------------+
-// ++---------+--+ | |
-// |Reached |Reached | |
-// |Def |Use | |
-// | | |Reaching |Reaching
-// | V |Def |Def
-// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib
-// | | ... = R2 |----->| ... = R2 |----> ... ----> 0
-// | +-------------+ +-------------+
-// V
-// +-- DefNode --+ Sib
-// | R2 = ... |----> ...
-// ++---------+--+
-// | |
-// | |
-// ... ...
-//
-// To get a full picture, the circular lists connecting blocks within a
-// function, instructions within a block, etc. should be superimposed with
-// the def-def, def-use links shown above.
-// To illustrate this, consider a small example in a pseudo-assembly:
-// foo:
-// add r2, r0, r1 ; r2 = r0+r1
-// addi r0, r2, 1 ; r0 = r2+1
-// ret r0 ; return value in r0
-//
-// The graph (in a format used by the debugging functions) would look like:
-//
-// DFG dump:[
-// f1: Function foo
-// b2: === %bb.0 === preds(0), succs(0):
-// p3: phi [d4<r0>(,d12,u9):]
-// p5: phi [d6<r1>(,,u10):]
-// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):]
-// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):]
-// s14: ret [u15<r0>(d12):]
-// ]
-//
-// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the
-// kind of the node (i.e. f - function, b - basic block, p - phi, s - state-
-// ment, d - def, u - use).
-// The format of a def node is:
-// dN<R>(rd,d,u):sib,
-// where
-// N - numeric node id,
-// R - register being defined
-// rd - reaching def,
-// d - reached def,
-// u - reached use,
-// sib - sibling.
-// The format of a use node is:
-// uN<R>[!](rd):sib,
-// where
-// N - numeric node id,
-// R - register being used,
-// rd - reaching def,
-// sib - sibling.
-// Possible annotations (usually preceding the node id):
-// + - preserving def,
-// ~ - clobbering def,
-// " - shadow ref (follows the node id),
-// ! - fixed register (appears after register name).
-//
-// The circular lists are not explicit in the dump.
-//
-//
-// *** Node attributes
-//
-// NodeBase has a member "Attrs", which is the primary way of determining
-// the node's characteristics. The fields in this member decide whether
-// the node is a code node or a reference node (i.e. node's "type"), then
-// within each type, the "kind" determines what specifically this node
-// represents. The remaining bits, "flags", contain additional information
-// that is even more detailed than the "kind".
-// CodeNode's kinds are:
-// - Phi: Phi node, members are reference nodes.
-// - Stmt: Statement, members are reference nodes.
-// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt).
-// - Func: The whole function. The members are basic block nodes.
-// RefNode's kinds are:
-// - Use.
-// - Def.
-//
-// Meaning of flags:
-// - Preserving: applies only to defs. A preserving def is one that can
-// preserve some of the original bits among those that are included in
-// the register associated with that def. For example, if R0 is a 32-bit
-// register, but a def can only change the lower 16 bits, then it will
-// be marked as preserving.
-// - Shadow: a reference that has duplicates holding additional reaching
-// defs (see more below).
-// - Clobbering: applied only to defs, indicates that the value generated
-// by this def is unspecified. A typical example would be volatile registers
-// after function calls.
-// - Fixed: the register in this def/use cannot be replaced with any other
-// register. A typical case would be a parameter register to a call, or
-// the register with the return value from a function.
-// - Undef: the register in this reference the register is assumed to have
-// no pre-existing value, even if it appears to be reached by some def.
-// This is typically used to prevent keeping registers artificially live
-// in cases when they are defined via predicated instructions. For example:
-// r0 = add-if-true cond, r10, r11 (1)
-// r0 = add-if-false cond, r12, r13, implicit r0 (2)
-// ... = r0 (3)
-// Before (1), r0 is not intended to be live, and the use of r0 in (3) is
-// not meant to be reached by any def preceding (1). However, since the
-// defs in (1) and (2) are both preserving, these properties alone would
-// imply that the use in (3) may indeed be reached by some prior def.
-// Adding Undef flag to the def in (1) prevents that. The Undef flag
-// may be applied to both defs and uses.
-// - Dead: applies only to defs. The value coming out of a "dead" def is
-// assumed to be unused, even if the def appears to be reaching other defs
-// or uses. The motivation for this flag comes from dead defs on function
-// calls: there is no way to determine if such a def is dead without
-// analyzing the target's ABI. Hence the graph should contain this info,
-// as it is unavailable otherwise. On the other hand, a def without any
-// uses on a typical instruction is not the intended target for this flag.
-//
-// *** Shadow references
-//
-// It may happen that a super-register can have two (or more) non-overlapping
-// sub-registers. When both of these sub-registers are defined and followed
-// by a use of the super-register, the use of the super-register will not
-// have a unique reaching def: both defs of the sub-registers need to be
-// accounted for. In such cases, a duplicate use of the super-register is
-// added and it points to the extra reaching def. Both uses are marked with
-// a flag "shadow". Example:
-// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap:
-// set r0, 1 ; r0 = 1
-// set r1, 1 ; r1 = 1
-// addi t1, t0, 1 ; t1 = t0+1
-//
-// The DFG:
-// s1: set [d2<r0>(,,u9):]
-// s3: set [d4<r1>(,,u10):]
-// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):]
-//
-// The statement s5 has two use nodes for t0: u7" and u9". The quotation
-// mark " indicates that the node is a shadow.
-//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
-#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
-
-#include "RDFRegisters.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/LaneBitmask.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/MathExtras.h"
-#include <cassert>
-#include <cstdint>
-#include <cstring>
-#include <map>
-#include <set>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-// RDF uses uint32_t to refer to registers. This is to ensure that the type
-// size remains specific. In other places, registers are often stored using
-// unsigned.
-static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineDominanceFrontier;
-class MachineDominatorTree;
-class MachineFunction;
-class MachineInstr;
-class MachineOperand;
-class raw_ostream;
-class TargetInstrInfo;
-class TargetRegisterInfo;
-
-namespace rdf {
-
- using NodeId = uint32_t;
-
- struct DataFlowGraph;
-
- struct NodeAttrs {
- enum : uint16_t {
- None = 0x0000, // Nothing
-
- // Types: 2 bits
- TypeMask = 0x0003,
- Code = 0x0001, // 01, Container
- Ref = 0x0002, // 10, Reference
-
- // Kind: 3 bits
- KindMask = 0x0007 << 2,
- Def = 0x0001 << 2, // 001
- Use = 0x0002 << 2, // 010
- Phi = 0x0003 << 2, // 011
- Stmt = 0x0004 << 2, // 100
- Block = 0x0005 << 2, // 101
- Func = 0x0006 << 2, // 110
-
- // Flags: 7 bits for now
- FlagMask = 0x007F << 5,
- Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs.
- Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values.
- PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode.
- Preserving = 0x0008 << 5, // 0001000, Def can keep original bits.
- Fixed = 0x0010 << 5, // 0010000, Fixed register.
- Undef = 0x0020 << 5, // 0100000, Has no pre-existing value.
- Dead = 0x0040 << 5, // 1000000, Does not define a value.
- };
-
- static uint16_t type(uint16_t T) { return T & TypeMask; }
- static uint16_t kind(uint16_t T) { return T & KindMask; }
- static uint16_t flags(uint16_t T) { return T & FlagMask; }
-
- static uint16_t set_type(uint16_t A, uint16_t T) {
- return (A & ~TypeMask) | T;
- }
-
- static uint16_t set_kind(uint16_t A, uint16_t K) {
- return (A & ~KindMask) | K;
- }
-
- static uint16_t set_flags(uint16_t A, uint16_t F) {
- return (A & ~FlagMask) | F;
- }
-
- // Test if A contains B.
- static bool contains(uint16_t A, uint16_t B) {
- if (type(A) != Code)
- return false;
- uint16_t KB = kind(B);
- switch (kind(A)) {
- case Func:
- return KB == Block;
- case Block:
- return KB == Phi || KB == Stmt;
- case Phi:
- case Stmt:
- return type(B) == Ref;
- }
- return false;
- }
- };
-
- struct BuildOptions {
- enum : unsigned {
- None = 0x00,
- KeepDeadPhis = 0x01, // Do not remove dead phis during build.
- };
- };
-
- template <typename T> struct NodeAddr {
- NodeAddr() = default;
- NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
-
- // Type cast (casting constructor). The reason for having this class
- // instead of std::pair.
- template <typename S> NodeAddr(const NodeAddr<S> &NA)
- : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
-
- bool operator== (const NodeAddr<T> &NA) const {
- assert((Addr == NA.Addr) == (Id == NA.Id));
- return Addr == NA.Addr;
- }
- bool operator!= (const NodeAddr<T> &NA) const {
- return !operator==(NA);
- }
-
- T Addr = nullptr;
- NodeId Id = 0;
- };
-
- struct NodeBase;
-
- // Fast memory allocation and translation between node id and node address.
- // This is really the same idea as the one underlying the "bump pointer
- // allocator", the difference being in the translation. A node id is
- // composed of two components: the index of the block in which it was
- // allocated, and the index within the block. With the default settings,
- // where the number of nodes per block is 4096, the node id (minus 1) is:
- //
- // bit position: 11 0
- // +----------------------------+--------------+
- // | Index of the block |Index in block|
- // +----------------------------+--------------+
- //
- // The actual node id is the above plus 1, to avoid creating a node id of 0.
- //
- // This method significantly improved the build time, compared to using maps
- // (std::unordered_map or DenseMap) to translate between pointers and ids.
- struct NodeAllocator {
- // Amount of storage for a single node.
- enum { NodeMemSize = 32 };
-
- NodeAllocator(uint32_t NPB = 4096)
- : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
- IndexMask((1 << BitsPerIndex)-1) {
- assert(isPowerOf2_32(NPB));
- }
-
- NodeBase *ptr(NodeId N) const {
- uint32_t N1 = N-1;
- uint32_t BlockN = N1 >> BitsPerIndex;
- uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
- return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
- }
-
- NodeId id(const NodeBase *P) const;
- NodeAddr<NodeBase*> New();
- void clear();
-
- private:
- void startNewBlock();
- bool needNewBlock();
-
- uint32_t makeId(uint32_t Block, uint32_t Index) const {
- // Add 1 to the id, to avoid the id of 0, which is treated as "null".
- return ((Block << BitsPerIndex) | Index) + 1;
- }
-
- const uint32_t NodesPerBlock;
- const uint32_t BitsPerIndex;
- const uint32_t IndexMask;
- char *ActiveEnd = nullptr;
- std::vector<char*> Blocks;
- using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>;
- AllocatorTy MemPool;
- };
-
- using RegisterSet = std::set<RegisterRef>;
-
- struct TargetOperandInfo {
- TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
- virtual ~TargetOperandInfo() = default;
-
- virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
- virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
- virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
-
- const TargetInstrInfo &TII;
- };
-
- // Packed register reference. Only used for storage.
- struct PackedRegisterRef {
- RegisterId Reg;
- uint32_t MaskId;
- };
-
- struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
- LaneMaskIndex() = default;
-
- LaneBitmask getLaneMaskForIndex(uint32_t K) const {
- return K == 0 ? LaneBitmask::getAll() : get(K);
- }
-
- uint32_t getIndexForLaneMask(LaneBitmask LM) {
- assert(LM.any());
- return LM.all() ? 0 : insert(LM);
- }
-
- uint32_t getIndexForLaneMask(LaneBitmask LM) const {
- assert(LM.any());
- return LM.all() ? 0 : find(LM);
- }
- };
-
- struct NodeBase {
- public:
- // Make sure this is a POD.
- NodeBase() = default;
-
- uint16_t getType() const { return NodeAttrs::type(Attrs); }
- uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
- uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
- NodeId getNext() const { return Next; }
-
- uint16_t getAttrs() const { return Attrs; }
- void setAttrs(uint16_t A) { Attrs = A; }
- void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
-
- // Insert node NA after "this" in the circular chain.
- void append(NodeAddr<NodeBase*> NA);
-
- // Initialize all members to 0.
- void init() { memset(this, 0, sizeof *this); }
-
- void setNext(NodeId N) { Next = N; }
-
- protected:
- uint16_t Attrs;
- uint16_t Reserved;
- NodeId Next; // Id of the next node in the circular chain.
- // Definitions of nested types. Using anonymous nested structs would make
- // this class definition clearer, but unnamed structs are not a part of
- // the standard.
- struct Def_struct {
- NodeId DD, DU; // Ids of the first reached def and use.
- };
- struct PhiU_struct {
- NodeId PredB; // Id of the predecessor block for a phi use.
- };
- struct Code_struct {
- void *CP; // Pointer to the actual code.
- NodeId FirstM, LastM; // Id of the first member and last.
- };
- struct Ref_struct {
- NodeId RD, Sib; // Ids of the reaching def and the sibling.
- union {
- Def_struct Def;
- PhiU_struct PhiU;
- };
- union {
- MachineOperand *Op; // Non-phi refs point to a machine operand.
- PackedRegisterRef PR; // Phi refs store register info directly.
- };
- };
-
- // The actual payload.
- union {
- Ref_struct Ref;
- Code_struct Code;
- };
- };
- // The allocator allocates chunks of 32 bytes for each node. The fact that
- // each node takes 32 bytes in memory is used for fast translation between
- // the node id and the node address.
- static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
- "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
-
- using NodeList = SmallVector<NodeAddr<NodeBase *>, 4>;
- using NodeSet = std::set<NodeId>;
-
- struct RefNode : public NodeBase {
- RefNode() = default;
-
- RegisterRef getRegRef(const DataFlowGraph &G) const;
-
- MachineOperand &getOp() {
- assert(!(getFlags() & NodeAttrs::PhiRef));
- return *Ref.Op;
- }
-
- void setRegRef(RegisterRef RR, DataFlowGraph &G);
- void setRegRef(MachineOperand *Op, DataFlowGraph &G);
-
- NodeId getReachingDef() const {
- return Ref.RD;
- }
- void setReachingDef(NodeId RD) {
- Ref.RD = RD;
- }
-
- NodeId getSibling() const {
- return Ref.Sib;
- }
- void setSibling(NodeId Sib) {
- Ref.Sib = Sib;
- }
-
- bool isUse() const {
- assert(getType() == NodeAttrs::Ref);
- return getKind() == NodeAttrs::Use;
- }
-
- bool isDef() const {
- assert(getType() == NodeAttrs::Ref);
- return getKind() == NodeAttrs::Def;
- }
-
- template <typename Predicate>
- NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
- const DataFlowGraph &G);
- NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
- };
-
- struct DefNode : public RefNode {
- NodeId getReachedDef() const {
- return Ref.Def.DD;
- }
- void setReachedDef(NodeId D) {
- Ref.Def.DD = D;
- }
- NodeId getReachedUse() const {
- return Ref.Def.DU;
- }
- void setReachedUse(NodeId U) {
- Ref.Def.DU = U;
- }
-
- void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
- };
-
- struct UseNode : public RefNode {
- void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
- };
-
- struct PhiUseNode : public UseNode {
- NodeId getPredecessor() const {
- assert(getFlags() & NodeAttrs::PhiRef);
- return Ref.PhiU.PredB;
- }
- void setPredecessor(NodeId B) {
- assert(getFlags() & NodeAttrs::PhiRef);
- Ref.PhiU.PredB = B;
- }
- };
-
- struct CodeNode : public NodeBase {
- template <typename T> T getCode() const {
- return static_cast<T>(Code.CP);
- }
- void setCode(void *C) {
- Code.CP = C;
- }
-
- NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
- NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
- void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
- void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
- const DataFlowGraph &G);
- void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
-
- NodeList members(const DataFlowGraph &G) const;
- template <typename Predicate>
- NodeList members_if(Predicate P, const DataFlowGraph &G) const;
- };
-
- struct InstrNode : public CodeNode {
- NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
- };
-
- struct PhiNode : public InstrNode {
- MachineInstr *getCode() const {
- return nullptr;
- }
- };
-
- struct StmtNode : public InstrNode {
- MachineInstr *getCode() const {
- return CodeNode::getCode<MachineInstr*>();
- }
- };
-
- struct BlockNode : public CodeNode {
- MachineBasicBlock *getCode() const {
- return CodeNode::getCode<MachineBasicBlock*>();
- }
-
- void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
- };
-
- struct FuncNode : public CodeNode {
- MachineFunction *getCode() const {
- return CodeNode::getCode<MachineFunction*>();
- }
-
- NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
- const DataFlowGraph &G) const;
- NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
- };
-
- struct DataFlowGraph {
- DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi);
-
- NodeBase *ptr(NodeId N) const;
- template <typename T> T ptr(NodeId N) const {
- return static_cast<T>(ptr(N));
- }
-
- NodeId id(const NodeBase *P) const;
-
- template <typename T> NodeAddr<T> addr(NodeId N) const {
- return { ptr<T>(N), N };
- }
-
- NodeAddr<FuncNode*> getFunc() const { return Func; }
- MachineFunction &getMF() const { return MF; }
- const TargetInstrInfo &getTII() const { return TII; }
- const TargetRegisterInfo &getTRI() const { return TRI; }
- const PhysicalRegisterInfo &getPRI() const { return PRI; }
- const MachineDominatorTree &getDT() const { return MDT; }
- const MachineDominanceFrontier &getDF() const { return MDF; }
- const RegisterAggr &getLiveIns() const { return LiveIns; }
-
- struct DefStack {
- DefStack() = default;
-
- bool empty() const { return Stack.empty() || top() == bottom(); }
-
- private:
- using value_type = NodeAddr<DefNode *>;
- struct Iterator {
- using value_type = DefStack::value_type;
-
- Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
- Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
-
- value_type operator*() const {
- assert(Pos >= 1);
- return DS.Stack[Pos-1];
- }
- const value_type *operator->() const {
- assert(Pos >= 1);
- return &DS.Stack[Pos-1];
- }
- bool operator==(const Iterator &It) const { return Pos == It.Pos; }
- bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
-
- private:
- friend struct DefStack;
-
- Iterator(const DefStack &S, bool Top);
-
- // Pos-1 is the index in the StorageType object that corresponds to
- // the top of the DefStack.
- const DefStack &DS;
- unsigned Pos;
- };
-
- public:
- using iterator = Iterator;
-
- iterator top() const { return Iterator(*this, true); }
- iterator bottom() const { return Iterator(*this, false); }
- unsigned size() const;
-
- void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
- void pop();
- void start_block(NodeId N);
- void clear_block(NodeId N);
-
- private:
- friend struct Iterator;
-
- using StorageType = std::vector<value_type>;
-
- bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
- return (P.Addr == nullptr) && (N == 0 || P.Id == N);
- }
-
- unsigned nextUp(unsigned P) const;
- unsigned nextDown(unsigned P) const;
-
- StorageType Stack;
- };
-
- // Make this std::unordered_map for speed of accessing elements.
- // Map: Register (physical or virtual) -> DefStack
- using DefStackMap = std::unordered_map<RegisterId, DefStack>;
-
- void build(unsigned Options = BuildOptions::None);
- void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- void markBlock(NodeId B, DefStackMap &DefM);
- void releaseBlock(NodeId B, DefStackMap &DefM);
-
- PackedRegisterRef pack(RegisterRef RR) {
- return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
- }
- PackedRegisterRef pack(RegisterRef RR) const {
- return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
- }
- RegisterRef unpack(PackedRegisterRef PR) const {
- return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
- }
-
- RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
- RegisterRef makeRegRef(const MachineOperand &Op) const;
- RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
-
- NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
- NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create);
- NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
- NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create);
- NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
-
- NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
-
- NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const {
- return BlockNodes.at(BB);
- }
-
- void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) {
- unlinkUseDF(UA);
- if (RemoveFromOwner)
- removeFromOwner(UA);
- }
-
- void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
- unlinkDefDF(DA);
- if (RemoveFromOwner)
- removeFromOwner(DA);
- }
-
- // Some useful filters.
- template <uint16_t Kind>
- static bool IsRef(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == Kind;
- }
-
- template <uint16_t Kind>
- static bool IsCode(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Code &&
- BA.Addr->getKind() == Kind;
- }
-
- static bool IsDef(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == NodeAttrs::Def;
- }
-
- static bool IsUse(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == NodeAttrs::Use;
- }
-
- static bool IsPhi(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Code &&
- BA.Addr->getKind() == NodeAttrs::Phi;
- }
-
- static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
- uint16_t Flags = DA.Addr->getFlags();
- return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
- }
-
- private:
- void reset();
-
- RegisterSet getLandingPadLiveIns() const;
-
- NodeAddr<NodeBase*> newNode(uint16_t Attrs);
- NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
- NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
- NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
- RegisterRef RR, NodeAddr<BlockNode*> PredB,
- uint16_t Flags = NodeAttrs::PhiRef);
- NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
- NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
- RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
- NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
- NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
- MachineInstr *MI);
- NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
- MachineBasicBlock *BB);
- NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
-
- template <typename Predicate>
- std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
- locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- Predicate P) const;
-
- using BlockRefsMap = std::map<NodeId, RegisterSet>;
-
- void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
- void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr<BlockNode*> BA);
- void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
- NodeAddr<BlockNode*> BA);
- void removeUnusedPhis();
-
- void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
- NodeAddr<T> TA, DefStack &DS);
- template <typename Predicate> void linkStmtRefs(DefStackMap &DefM,
- NodeAddr<StmtNode*> SA, Predicate P);
- void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
-
- void unlinkUseDF(NodeAddr<UseNode*> UA);
- void unlinkDefDF(NodeAddr<DefNode*> DA);
-
- void removeFromOwner(NodeAddr<RefNode*> RA) {
- NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
- IA.Addr->removeMember(RA, *this);
- }
-
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const PhysicalRegisterInfo PRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- const TargetOperandInfo &TOI;
-
- RegisterAggr LiveIns;
- NodeAddr<FuncNode*> Func;
- NodeAllocator Memory;
- // Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
- std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
- // Lane mask map.
- LaneMaskIndex LMI;
- }; // struct DataFlowGraph
-
- template <typename Predicate>
- NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
- bool NextOnly, const DataFlowGraph &G) {
- // Get the "Next" reference in the circular list that references RR and
- // satisfies predicate "Pred".
- auto NA = G.addr<NodeBase*>(getNext());
-
- while (NA.Addr != this) {
- if (NA.Addr->getType() == NodeAttrs::Ref) {
- NodeAddr<RefNode*> RA = NA;
- if (RA.Addr->getRegRef(G) == RR && P(NA))
- return NA;
- if (NextOnly)
- break;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
- } else {
- // We've hit the beginning of the chain.
- assert(NA.Addr->getType() == NodeAttrs::Code);
- NodeAddr<CodeNode*> CA = NA;
- NA = CA.Addr->getFirstMember(G);
- }
- }
- // Return the equivalent of "nullptr" if such a node was not found.
- return NodeAddr<RefNode*>();
- }
-
- template <typename Predicate>
- NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
- NodeList MM;
- auto M = getFirstMember(G);
- if (M.Id == 0)
- return MM;
-
- while (M.Addr != this) {
- if (P(M))
- MM.push_back(M);
- M = G.addr<NodeBase*>(M.Addr->getNext());
- }
- return MM;
- }
-
- template <typename T>
- struct Print {
- Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
-
- const T &Obj;
- const DataFlowGraph &G;
- };
-
- template <typename T>
- struct PrintNode : Print<NodeAddr<T>> {
- PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
- : Print<NodeAddr<T>>(x, g) {}
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<PhiUseNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<StmtNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<InstrNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<BlockNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<FuncNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<DataFlowGraph::DefStack> &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.h b/llvm/lib/Target/Hexagon/RDFLiveness.h
deleted file mode 100644
index ea4890271726..000000000000
--- a/llvm/lib/Target/Hexagon/RDFLiveness.h
+++ /dev/null
@@ -1,151 +0,0 @@
-//===- RDFLiveness.h --------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Recalculate the liveness information given a data flow graph.
-// This includes block live-ins and kill flags.
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
-#define LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
-
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/LaneBitmask.h"
-#include <map>
-#include <set>
-#include <utility>
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineDominanceFrontier;
-class MachineDominatorTree;
-class MachineRegisterInfo;
-class TargetRegisterInfo;
-
-namespace rdf {
-
- struct Liveness {
- public:
- // This is really a std::map, except that it provides a non-trivial
- // default constructor to the element accessed via [].
- struct LiveMapType {
- LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {}
-
- RegisterAggr &operator[] (MachineBasicBlock *B) {
- return Map.emplace(B, Empty).first->second;
- }
-
- private:
- RegisterAggr Empty;
- std::map<MachineBasicBlock*,RegisterAggr> Map;
- };
-
- using NodeRef = std::pair<NodeId, LaneBitmask>;
- using NodeRefSet = std::set<NodeRef>;
- // RegisterId in RefMap must be normalized.
- using RefMap = std::map<RegisterId, NodeRefSet>;
-
- Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
- : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
- MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {}
-
- NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
-
- NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false,
- false, NoRegs);
- }
-
- NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
- }
-
- NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
- const RegisterAggr &DefRRs);
-
- NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
- return getAllReachedUses(RefRR, DefA, NoRegs);
- }
-
- std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs);
-
- NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA);
-
- LiveMapType &getLiveMap() { return LiveMap; }
- const LiveMapType &getLiveMap() const { return LiveMap; }
-
- const RefMap &getRealUses(NodeId P) const {
- auto F = RealUseMap.find(P);
- return F == RealUseMap.end() ? Empty : F->second;
- }
-
- void computePhiInfo();
- void computeLiveIns();
- void resetLiveIns();
- void resetKills();
- void resetKills(MachineBasicBlock *B);
-
- void trace(bool T) { Trace = T; }
-
- private:
- const DataFlowGraph &DFG;
- const TargetRegisterInfo &TRI;
- const PhysicalRegisterInfo &PRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- LiveMapType LiveMap;
- const RefMap Empty;
- const RegisterAggr NoRegs;
- bool Trace = false;
-
- // Cache of mapping from node ids (for RefNodes) to the containing
- // basic blocks. Not computing it each time for each node reduces
- // the liveness calculation time by a large fraction.
- using NodeBlockMap = DenseMap<NodeId, MachineBasicBlock *>;
- NodeBlockMap NBMap;
-
- // Phi information:
- //
- // RealUseMap
- // map: NodeId -> (map: RegisterId -> NodeRefSet)
- // phi id -> (map: register -> set of reached non-phi uses)
- std::map<NodeId, RefMap> RealUseMap;
-
- // Inverse iterated dominance frontier.
- std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
-
- // Live on entry.
- std::map<MachineBasicBlock*,RefMap> PhiLON;
-
- // Phi uses are considered to be located at the end of the block that
- // they are associated with. The reaching def of a phi use dominates the
- // block that the use corresponds to, but not the block that contains
- // the phi itself. To include these uses in the liveness propagation (up
- // the dominator tree), create a map: block -> set of uses live on exit.
- std::map<MachineBasicBlock*,RefMap> PhiLOX;
-
- MachineBasicBlock *getBlockWithRef(NodeId RN) const;
- void traverse(MachineBasicBlock *B, RefMap &LiveIn);
- void emptify(RefMap &M);
-
- std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs,
- unsigned Nest, unsigned MaxNest);
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H
diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.h b/llvm/lib/Target/Hexagon/RDFRegisters.h
deleted file mode 100644
index 4afaf80e4659..000000000000
--- a/llvm/lib/Target/Hexagon/RDFRegisters.h
+++ /dev/null
@@ -1,240 +0,0 @@
-//===- RDFRegisters.h -------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
-#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
-
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/LaneBitmask.h"
-#include <cassert>
-#include <cstdint>
-#include <map>
-#include <set>
-#include <vector>
-
-namespace llvm {
-
-class MachineFunction;
-class raw_ostream;
-
-namespace rdf {
-
- using RegisterId = uint32_t;
-
- // Template class for a map translating uint32_t into arbitrary types.
- // The map will act like an indexed set: upon insertion of a new object,
- // it will automatically assign a new index to it. Index of 0 is treated
- // as invalid and is never allocated.
- template <typename T, unsigned N = 32>
- struct IndexedSet {
- IndexedSet() { Map.reserve(N); }
-
- T get(uint32_t Idx) const {
- // Index Idx corresponds to Map[Idx-1].
- assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
- return Map[Idx-1];
- }
-
- uint32_t insert(T Val) {
- // Linear search.
- auto F = llvm::find(Map, Val);
- if (F != Map.end())
- return F - Map.begin() + 1;
- Map.push_back(Val);
- return Map.size(); // Return actual_index + 1.
- }
-
- uint32_t find(T Val) const {
- auto F = llvm::find(Map, Val);
- assert(F != Map.end());
- return F - Map.begin() + 1;
- }
-
- uint32_t size() const { return Map.size(); }
-
- using const_iterator = typename std::vector<T>::const_iterator;
-
- const_iterator begin() const { return Map.begin(); }
- const_iterator end() const { return Map.end(); }
-
- private:
- std::vector<T> Map;
- };
-
- struct RegisterRef {
- RegisterId Reg = 0;
- LaneBitmask Mask = LaneBitmask::getNone();
-
- RegisterRef() = default;
- explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
- : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
-
- operator bool() const {
- return Reg != 0 && Mask.any();
- }
-
- bool operator== (const RegisterRef &RR) const {
- return Reg == RR.Reg && Mask == RR.Mask;
- }
-
- bool operator!= (const RegisterRef &RR) const {
- return !operator==(RR);
- }
-
- bool operator< (const RegisterRef &RR) const {
- return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
- }
- };
-
-
- struct PhysicalRegisterInfo {
- PhysicalRegisterInfo(const TargetRegisterInfo &tri,
- const MachineFunction &mf);
-
- static bool isRegMaskId(RegisterId R) {
- return Register::isStackSlot(R);
- }
-
- RegisterId getRegMaskId(const uint32_t *RM) const {
- return Register::index2StackSlot(RegMasks.find(RM));
- }
-
- const uint32_t *getRegMaskBits(RegisterId R) const {
- return RegMasks.get(Register::stackSlot2Index(R));
- }
-
- RegisterRef normalize(RegisterRef RR) const;
-
- bool alias(RegisterRef RA, RegisterRef RB) const {
- if (!isRegMaskId(RA.Reg))
- return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
- return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB);
- }
-
- std::set<RegisterId> getAliasSet(RegisterId Reg) const;
-
- RegisterRef getRefForUnit(uint32_t U) const {
- return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
- }
-
- const BitVector &getMaskUnits(RegisterId MaskId) const {
- return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
- }
-
- RegisterRef mapTo(RegisterRef RR, unsigned R) const;
- const TargetRegisterInfo &getTRI() const { return TRI; }
-
- private:
- struct RegInfo {
- const TargetRegisterClass *RegClass = nullptr;
- };
- struct UnitInfo {
- RegisterId Reg = 0;
- LaneBitmask Mask;
- };
- struct MaskInfo {
- BitVector Units;
- };
-
- const TargetRegisterInfo &TRI;
- IndexedSet<const uint32_t*> RegMasks;
- std::vector<RegInfo> RegInfos;
- std::vector<UnitInfo> UnitInfos;
- std::vector<MaskInfo> MaskInfos;
-
- bool aliasRR(RegisterRef RA, RegisterRef RB) const;
- bool aliasRM(RegisterRef RR, RegisterRef RM) const;
- bool aliasMM(RegisterRef RM, RegisterRef RN) const;
- };
-
- struct RegisterAggr {
- RegisterAggr(const PhysicalRegisterInfo &pri)
- : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
- RegisterAggr(const RegisterAggr &RG) = default;
-
- bool empty() const { return Units.none(); }
- bool hasAliasOf(RegisterRef RR) const;
- bool hasCoverOf(RegisterRef RR) const;
-
- static bool isCoverOf(RegisterRef RA, RegisterRef RB,
- const PhysicalRegisterInfo &PRI) {
- return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
- }
-
- RegisterAggr &insert(RegisterRef RR);
- RegisterAggr &insert(const RegisterAggr &RG);
- RegisterAggr &intersect(RegisterRef RR);
- RegisterAggr &intersect(const RegisterAggr &RG);
- RegisterAggr &clear(RegisterRef RR);
- RegisterAggr &clear(const RegisterAggr &RG);
-
- RegisterRef intersectWith(RegisterRef RR) const;
- RegisterRef clearIn(RegisterRef RR) const;
- RegisterRef makeRegRef() const;
-
- void print(raw_ostream &OS) const;
-
- struct rr_iterator {
- using MapType = std::map<RegisterId, LaneBitmask>;
-
- private:
- MapType Masks;
- MapType::iterator Pos;
- unsigned Index;
- const RegisterAggr *Owner;
-
- public:
- rr_iterator(const RegisterAggr &RG, bool End);
-
- RegisterRef operator*() const {
- return RegisterRef(Pos->first, Pos->second);
- }
-
- rr_iterator &operator++() {
- ++Pos;
- ++Index;
- return *this;
- }
-
- bool operator==(const rr_iterator &I) const {
- assert(Owner == I.Owner);
- (void)Owner;
- return Index == I.Index;
- }
-
- bool operator!=(const rr_iterator &I) const {
- return !(*this == I);
- }
- };
-
- rr_iterator rr_begin() const {
- return rr_iterator(*this, false);
- }
- rr_iterator rr_end() const {
- return rr_iterator(*this, true);
- }
-
- private:
- BitVector Units;
- const PhysicalRegisterInfo &PRI;
- };
-
- // Optionally print the lane mask, if it is not ~0.
- struct PrintLaneMaskOpt {
- PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
- LaneBitmask Mask;
- };
- raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 9b3d13989ee2..d7e3519d5539 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VMSUMSHS,
VMSUMUBM,
VMSUMUHM,
+ VMSUMUDM,
VMSUMUHS,
VMULESB,
VMULESH,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 00f59bba52e8..ca1649fae258 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
+ if (Subtarget.isISA3_0()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f64, MVT::f16, Legal);
+ setTruncStoreAction(MVT::f32, MVT::f16, Legal);
+ } else {
+ // No extending loads from f16 or HW conversions back and forth.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ }
+
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
@@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
+ setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
if (!Subtarget.hasP8Vector()) {
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
@@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND");
+ // FIXME: handle extends from half precision float vectors on P9.
// We only want to custom lower an extend from v2f32 to v2f64.
if (Op.getValueType() != MVT::v2f64 ||
Op.getOperand(0).getValueType() != MVT::v2f32)
@@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
// Don't handle bitcast here.
return;
+ case ISD::FP_EXTEND:
+ SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
+ if (Lowered)
+ Results.push_back(Lowered);
+ return;
}
}
@@ -15255,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
- if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ if (VT.isFloatingPoint() && !VT.isVector() &&
+ !Subtarget.allowsUnalignedFPAccess())
return false;
if (VT.getSimpleVT().isVector()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index e0c381827b87..2e1485373d19 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -637,7 +637,7 @@ namespace llvm {
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
- if (VT.getScalarSizeInBits() % 8 == 0)
+ if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index f94816a35f79..6e8635f2413c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
+// Vector Multiply-Sum
+def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
+ v1i128, v2i64, v1i128>;
+
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 30906a32b00c..d7925befcd37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
return false;
+ // The operand may not necessarily be an immediate - it could be a relocation.
+ if (!ADDIMI.getOperand(2).isImm())
+ return false;
+
Imm = ADDIMI.getOperand(2).getImm();
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index be6b30ffa08b..95e5ff6b130d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
+ // Load/convert and convert/store patterns for f16.
+ def : Pat<(f64 (extloadf16 xoaddr:$src)),
+ (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+ def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+ def : Pat<(f32 (extloadf16 xoaddr:$src)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+ def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+ def : Pat<(f64 (f16_to_fp i32:$A)),
+ (f64 (XSCVHPDP (MTVSRWZ $A)))>;
+ def : Pat<(f32 (f16_to_fp i32:$A)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
+ def : Pat<(i32 (fp_to_f16 f32:$A)),
+ (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
+ def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+
let Predicates = [IsBigEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
diff --git a/llvm/lib/Target/X86/ImmutableGraph.h b/llvm/lib/Target/X86/ImmutableGraph.h
new file mode 100644
index 000000000000..5833017037a5
--- /dev/null
+++ b/llvm/lib/Target/X86/ImmutableGraph.h
@@ -0,0 +1,446 @@
+//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: ImmutableGraph is a fast DAG implementation that cannot be
+/// modified, except by creating a new ImmutableGraph. ImmutableGraph is
+/// implemented as two arrays: one containing nodes, and one containing edges.
+/// The advantages to this implementation are two-fold:
+/// 1. Iteration and traversal operations benefit from cache locality.
+/// 2. Operations on sets of nodes/edges are efficient, and representations of
+/// those sets in memory are compact. For instance, a set of edges is
+/// implemented as a bit vector, wherein each bit corresponds to one edge in
+/// the edge array. This implies a lower bound of 64x spatial improvement
+/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that
+/// insert/erase/contains operations complete in negligible constant time:
+/// insert and erase require one load and one store, and contains requires
+/// just one load.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph {
+ using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>;
+ template <typename> friend class ImmutableGraphBuilder;
+
+public:
+ using node_value_type = NodeValueT;
+ using edge_value_type = EdgeValueT;
+ using size_type = int;
+ class Node;
+ class Edge {
+ friend class ImmutableGraph;
+ template <typename> friend class ImmutableGraphBuilder;
+
+ const Node *Dest;
+ edge_value_type Value;
+
+ public:
+ const Node *getDest() const { return Dest; };
+ const edge_value_type &getValue() const { return Value; }
+ };
+ class Node {
+ friend class ImmutableGraph;
+ template <typename> friend class ImmutableGraphBuilder;
+
+ const Edge *Edges;
+ node_value_type Value;
+
+ public:
+ const node_value_type &getValue() const { return Value; }
+
+ const Edge *edges_begin() const { return Edges; }
+ // Nodes are allocated sequentially. Edges for a node are stored together.
+ // The end of this Node's edges is the beginning of the next node's edges.
+ // An extra node was allocated to hold the end pointer for the last real
+ // node.
+ const Edge *edges_end() const { return (this + 1)->Edges; }
+ ArrayRef<Edge> edges() const {
+ return makeArrayRef(edges_begin(), edges_end());
+ }
+ };
+
+protected:
+ ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges,
+ size_type NodesSize, size_type EdgesSize)
+ : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize),
+ EdgesSize(EdgesSize) {}
+ ImmutableGraph(const ImmutableGraph &) = delete;
+ ImmutableGraph(ImmutableGraph &&) = delete;
+ ImmutableGraph &operator=(const ImmutableGraph &) = delete;
+ ImmutableGraph &operator=(ImmutableGraph &&) = delete;
+
+public:
+ ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); }
+ const Node *nodes_begin() const { return nodes().begin(); }
+ const Node *nodes_end() const { return nodes().end(); }
+
+ ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); }
+ const Edge *edges_begin() const { return edges().begin(); }
+ const Edge *edges_end() const { return edges().end(); }
+
+ size_type nodes_size() const { return NodesSize; }
+ size_type edges_size() const { return EdgesSize; }
+
+ // Node N must belong to this ImmutableGraph.
+ size_type getNodeIndex(const Node &N) const {
+ return std::distance(nodes_begin(), &N);
+ }
+ // Edge E must belong to this ImmutableGraph.
+ size_type getEdgeIndex(const Edge &E) const {
+ return std::distance(edges_begin(), &E);
+ }
+
+ // FIXME: Could NodeSet and EdgeSet be templated to share code?
+ class NodeSet {
+ const ImmutableGraph &G;
+ BitVector V;
+
+ public:
+ NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
+ : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {}
+ bool insert(const Node &N) {
+ size_type Idx = G.getNodeIndex(N);
+ bool AlreadyExists = V.test(Idx);
+ V.set(Idx);
+ return !AlreadyExists;
+ }
+ void erase(const Node &N) {
+ size_type Idx = G.getNodeIndex(N);
+ V.reset(Idx);
+ }
+ bool contains(const Node &N) const {
+ size_type Idx = G.getNodeIndex(N);
+ return V.test(Idx);
+ }
+ void clear() { V.reset(); }
+ size_type empty() const { return V.none(); }
+ /// Return the number of elements in the set
+ size_type count() const { return V.count(); }
+ /// Return the size of the set's domain
+ size_type size() const { return V.size(); }
+ /// Set union
+ NodeSet &operator|=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V |= RHS.V;
+ return *this;
+ }
+ /// Set intersection
+ NodeSet &operator&=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V &= RHS.V;
+ return *this;
+ }
+ /// Set disjoint union
+ NodeSet &operator^=(const NodeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V ^= RHS.V;
+ return *this;
+ }
+
+ using index_iterator = typename BitVector::const_set_bits_iterator;
+ index_iterator index_begin() const { return V.set_bits_begin(); }
+ index_iterator index_end() const { return V.set_bits_end(); }
+ void set(size_type Idx) { V.set(Idx); }
+ void reset(size_type Idx) { V.reset(Idx); }
+
+ class iterator {
+ const NodeSet &Set;
+ size_type Current;
+
+ void advance() {
+ assert(Current != -1);
+ Current = Set.V.find_next(Current);
+ }
+
+ public:
+ iterator(const NodeSet &Set, size_type Begin)
+ : Set{Set}, Current{Begin} {}
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ advance();
+ return Tmp;
+ }
+ iterator &operator++() {
+ advance();
+ return *this;
+ }
+ Node *operator*() const {
+ assert(Current != -1);
+ return Set.G.nodes_begin() + Current;
+ }
+ bool operator==(const iterator &other) const {
+ assert(&this->Set == &other.Set);
+ return this->Current == other.Current;
+ }
+ bool operator!=(const iterator &other) const { return !(*this == other); }
+ };
+
+ iterator begin() const { return iterator{*this, V.find_first()}; }
+ iterator end() const { return iterator{*this, -1}; }
+ };
+
+ class EdgeSet {
+ const ImmutableGraph &G;
+ BitVector V;
+
+ public:
+ EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
+ : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {}
+ bool insert(const Edge &E) {
+ size_type Idx = G.getEdgeIndex(E);
+ bool AlreadyExists = V.test(Idx);
+ V.set(Idx);
+ return !AlreadyExists;
+ }
+ void erase(const Edge &E) {
+ size_type Idx = G.getEdgeIndex(E);
+ V.reset(Idx);
+ }
+ bool contains(const Edge &E) const {
+ size_type Idx = G.getEdgeIndex(E);
+ return V.test(Idx);
+ }
+ void clear() { V.reset(); }
+ bool empty() const { return V.none(); }
+ /// Return the number of elements in the set
+ size_type count() const { return V.count(); }
+ /// Return the size of the set's domain
+ size_type size() const { return V.size(); }
+ /// Set union
+ EdgeSet &operator|=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V |= RHS.V;
+ return *this;
+ }
+ /// Set intersection
+ EdgeSet &operator&=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V &= RHS.V;
+ return *this;
+ }
+ /// Set disjoint union
+ EdgeSet &operator^=(const EdgeSet &RHS) {
+ assert(&this->G == &RHS.G);
+ V ^= RHS.V;
+ return *this;
+ }
+
+ using index_iterator = typename BitVector::const_set_bits_iterator;
+ index_iterator index_begin() const { return V.set_bits_begin(); }
+ index_iterator index_end() const { return V.set_bits_end(); }
+ void set(size_type Idx) { V.set(Idx); }
+ void reset(size_type Idx) { V.reset(Idx); }
+
+ class iterator {
+ const EdgeSet &Set;
+ size_type Current;
+
+ void advance() {
+ assert(Current != -1);
+ Current = Set.V.find_next(Current);
+ }
+
+ public:
+ iterator(const EdgeSet &Set, size_type Begin)
+ : Set{Set}, Current{Begin} {}
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ advance();
+ return Tmp;
+ }
+ iterator &operator++() {
+ advance();
+ return *this;
+ }
+ Edge *operator*() const {
+ assert(Current != -1);
+ return Set.G.edges_begin() + Current;
+ }
+ bool operator==(const iterator &other) const {
+ assert(&this->Set == &other.Set);
+ return this->Current == other.Current;
+ }
+ bool operator!=(const iterator &other) const { return !(*this == other); }
+ };
+
+ iterator begin() const { return iterator{*this, V.find_first()}; }
+ iterator end() const { return iterator{*this, -1}; }
+ };
+
+private:
+ std::unique_ptr<Node[]> Nodes;
+ std::unique_ptr<Edge[]> Edges;
+ size_type NodesSize;
+ size_type EdgesSize;
+};
+
+template <typename GraphT> class ImmutableGraphBuilder {
+ using node_value_type = typename GraphT::node_value_type;
+ using edge_value_type = typename GraphT::edge_value_type;
+ static_assert(
+ std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>,
+ GraphT>::value,
+ "Template argument to ImmutableGraphBuilder must derive from "
+ "ImmutableGraph<>");
+ using size_type = typename GraphT::size_type;
+ using NodeSet = typename GraphT::NodeSet;
+ using Node = typename GraphT::Node;
+ using EdgeSet = typename GraphT::EdgeSet;
+ using Edge = typename GraphT::Edge;
+ using BuilderEdge = std::pair<edge_value_type, size_type>;
+ using EdgeList = std::vector<BuilderEdge>;
+ using BuilderVertex = std::pair<node_value_type, EdgeList>;
+ using VertexVec = std::vector<BuilderVertex>;
+
+public:
+ using BuilderNodeRef = size_type;
+
+ BuilderNodeRef addVertex(const node_value_type &V) {
+ auto I = AdjList.emplace(AdjList.end(), V, EdgeList{});
+ return std::distance(AdjList.begin(), I);
+ }
+
+ void addEdge(const edge_value_type &E, BuilderNodeRef From,
+ BuilderNodeRef To) {
+ AdjList[From].second.emplace_back(E, To);
+ }
+
+ bool empty() const { return AdjList.empty(); }
+
+ template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) {
+ size_type VertexSize = AdjList.size(), EdgeSize = 0;
+ for (const auto &V : AdjList) {
+ EdgeSize += V.second.size();
+ }
+ auto VertexArray =
+ std::make_unique<Node[]>(VertexSize + 1 /* terminator node */);
+ auto EdgeArray = std::make_unique<Edge[]>(EdgeSize);
+ size_type VI = 0, EI = 0;
+ for (; VI < VertexSize; ++VI) {
+ VertexArray[VI].Value = std::move(AdjList[VI].first);
+ VertexArray[VI].Edges = &EdgeArray[EI];
+ auto NumEdges = static_cast<size_type>(AdjList[VI].second.size());
+ for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
+ auto &E = AdjList[VI].second[VEI];
+ EdgeArray[EI].Value = std::move(E.first);
+ EdgeArray[EI].Dest = &VertexArray[E.second];
+ }
+ }
+ assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed");
+ VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node
+ return std::make_unique<GraphT>(std::move(VertexArray),
+ std::move(EdgeArray), VertexSize, EdgeSize,
+ std::forward<ArgT>(Args)...);
+ }
+
+ template <typename... ArgT>
+ static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes,
+ const EdgeSet &TrimEdges,
+ ArgT &&... Args) {
+ size_type NewVertexSize = G.nodes_size() - TrimNodes.count();
+ size_type NewEdgeSize = G.edges_size() - TrimEdges.count();
+ auto NewVertexArray =
+ std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */);
+ auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize);
+
+ // Walk the nodes and determine the new index for each node.
+ size_type NewNodeIndex = 0;
+ std::vector<size_type> RemappedNodeIndex(G.nodes_size());
+ for (const Node &N : G.nodes()) {
+ if (TrimNodes.contains(N))
+ continue;
+ RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++;
+ }
+ assert(NewNodeIndex == NewVertexSize &&
+ "Should have assigned NewVertexSize indices");
+
+ size_type VertexI = 0, EdgeI = 0;
+ for (const Node &N : G.nodes()) {
+ if (TrimNodes.contains(N))
+ continue;
+ NewVertexArray[VertexI].Value = N.getValue();
+ NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI];
+ for (const Edge &E : N.edges()) {
+ if (TrimEdges.contains(E))
+ continue;
+ NewEdgeArray[EdgeI].Value = E.getValue();
+ size_type DestIdx = G.getNodeIndex(*E.getDest());
+ size_type NewIdx = RemappedNodeIndex[DestIdx];
+ assert(NewIdx < NewVertexSize);
+ NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx];
+ ++EdgeI;
+ }
+ ++VertexI;
+ }
+ assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
+ "Gadget graph malformed");
+ NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator
+ return std::make_unique<GraphT>(std::move(NewVertexArray),
+ std::move(NewEdgeArray), NewVertexSize,
+ NewEdgeSize, std::forward<ArgT>(Args)...);
+ }
+
+private:
+ VertexVec AdjList;
+};
+
+template <typename NodeValueT, typename EdgeValueT>
+struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
+ using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
+ using NodeRef = typename GraphT::Node const *;
+ using EdgeRef = typename GraphT::Edge const &;
+
+ static NodeRef edge_dest(EdgeRef E) { return E.getDest(); }
+ using ChildIteratorType =
+ mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>;
+
+ static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return {N->edges_begin(), &edge_dest};
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return {N->edges_end(), &edge_dest};
+ }
+
+ static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; }
+ using nodes_iterator =
+ mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>;
+ static nodes_iterator nodes_begin(GraphT *G) {
+ return {G->nodes_begin(), &getNode};
+ }
+ static nodes_iterator nodes_end(GraphT *G) {
+ return {G->nodes_end(), &getNode};
+ }
+
+ using ChildEdgeIteratorType = typename GraphT::Edge const *;
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->edges_begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+ return N->edges_end();
+ }
+ static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 0481a40d462a..a0ab5c3a5b3c 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();
FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
-FunctionPass *createX86RetpolineThunksPass();
+FunctionPass *createX86IndirectThunksPass();
/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
@@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
+FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
+FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
+FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
void initializeEvexToVexInstPassPass(PassRegistry &);
@@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a2b11d55f650..bb8952f54e3a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk
"ourselves. Only has effect when combined with some other retpoline "
"feature", [FeatureRetpolineIndirectCalls]>;
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+ : SubtargetFeature<
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+ "Prevent indirect calls/branches from using a memory operand, and "
+ "precede all indirect calls/branches from a register with an "
+ "LFENCE instruction to serialize control flow. Also decompose RET "
+ "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+ : SubtargetFeature<
+ "lvi-load-hardening", "UseLVILoadHardening", "true",
+ "Insert LFENCE instructions to prevent data speculatively injected "
+ "into loads from being used maliciously.">;
+
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 1dbf40683564..a1d256ea872d 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
return false;
- // Functions using retpoline for indirect calls need to use SDISel.
- if (Subtarget->useRetpolineIndirectCalls())
+ // Functions using thunks for indirect calls need to use SDISel.
+ if (Subtarget->useIndirectThunkCalls())
return false;
// Handle only C, fastcc, and webkit_js calling conventions for now.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 799c1f5d1285..1da20371caf5 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
bool InProlog) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
- // FIXME: Add retpoline support and remove this.
- if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
+ // FIXME: Add indirect thunk support and remove this.
+ if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
- "code model and retpoline not yet implemented.");
+ "code model and indirect thunks not yet implemented.");
unsigned CallOp;
if (Is64Bit)
@@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// is laid out within 2^31 bytes of each function body, but this seems
// to be sufficient for JIT.
// FIXME: Add retpoline support and remove the error here..
- if (STI.useRetpolineIndirectCalls())
+ if (STI.useIndirectThunkCalls())
report_fatal_error("Emitting morestack calls on 64-bit with the large "
- "code model and retpoline not yet implemented.");
+ "code model and thunks not yet implemented.");
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
.addReg(X86::RIP)
.addImm(0)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index bf33f399db28..88af0ebcfd0e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
- !Subtarget->useRetpolineIndirectCalls() &&
+ !Subtarget->useIndirectThunkCalls() &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1523d56cc4e7..c8720d9ae3a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
}
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
- // If the subtarget is using retpolines, we need to not generate jump tables.
- if (Subtarget.useRetpolineIndirectBranches())
+ // If the subtarget is using thunks, we need to not generate jump tables.
+ if (Subtarget.useIndirectThunkBranches())
return false;
// Otherwise, fallback on the generic logic.
@@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
return BB;
}
-static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
+static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
switch (RPOpc) {
- case X86::RETPOLINE_CALL32:
+ case X86::INDIRECT_THUNK_CALL32:
return X86::CALLpcrel32;
- case X86::RETPOLINE_CALL64:
+ case X86::INDIRECT_THUNK_CALL64:
return X86::CALL64pcrel32;
- case X86::RETPOLINE_TCRETURN32:
+ case X86::INDIRECT_THUNK_TCRETURN32:
return X86::TCRETURNdi;
- case X86::RETPOLINE_TCRETURN64:
+ case X86::INDIRECT_THUNK_TCRETURN64:
return X86::TCRETURNdi64;
}
- llvm_unreachable("not retpoline opcode");
+ llvm_unreachable("not indirect thunk opcode");
}
-static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
- unsigned Reg) {
+static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
+ unsigned Reg) {
if (Subtarget.useRetpolineExternalThunk()) {
// When using an external thunk for retpolines, we pick names that match the
// names GCC happens to use as well. This helps simplify the implementation
@@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
}
+ llvm_unreachable("unexpected reg for external indirect thunk");
+ }
+
+ if (Subtarget.useRetpolineIndirectCalls() ||
+ Subtarget.useRetpolineIndirectBranches()) {
+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
+ switch (Reg) {
+ case X86::EAX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_eax";
+ case X86::ECX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_ecx";
+ case X86::EDX:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edx";
+ case X86::EDI:
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+ return "__llvm_retpoline_edi";
+ case X86::R11:
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+ return "__llvm_retpoline_r11";
+ }
llvm_unreachable("unexpected reg for retpoline");
}
- // When targeting an internal COMDAT thunk use an LLVM-specific name.
- switch (Reg) {
- case X86::EAX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_eax";
- case X86::ECX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_ecx";
- case X86::EDX:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_edx";
- case X86::EDI:
- assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
- return "__llvm_retpoline_edi";
- case X86::R11:
+ if (Subtarget.useLVIControlFlowIntegrity()) {
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
- return "__llvm_retpoline_r11";
+ return "__llvm_lvi_thunk_r11";
}
- llvm_unreachable("unexpected reg for retpoline");
+ llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
}
MachineBasicBlock *
-X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
DebugLoc DL = MI.getDebugLoc();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
- unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
+ unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
// Find an available scratch register to hold the callee. On 64-bit, we can
// just use R11, but we scan for uses anyway to ensure we don't generate
@@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
report_fatal_error("calling convention incompatible with retpoline, no "
"available registers");
- const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
+ const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
@@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return EmitLoweredTLSAddr(MI, BB);
- case X86::RETPOLINE_CALL32:
- case X86::RETPOLINE_CALL64:
- case X86::RETPOLINE_TCRETURN32:
- case X86::RETPOLINE_TCRETURN64:
- return EmitLoweredRetpoline(MI, BB);
+ case X86::INDIRECT_THUNK_CALL32:
+ case X86::INDIRECT_THUNK_CALL64:
+ case X86::INDIRECT_THUNK_TCRETURN32:
+ case X86::INDIRECT_THUNK_TCRETURN64:
+ return EmitLoweredIndirectThunk(MI, BB);
case X86::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case X86::CATCHPAD:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3a17099da38f..830cdfc79c0a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1482,8 +1482,8 @@ namespace llvm {
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
+ MachineBasicBlock *BB) const;
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
new file mode 100644
index 000000000000..36b9c3ccc959
--- /dev/null
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -0,0 +1,364 @@
+//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that injects an MI thunk that is used to lower indirect calls in a way
+/// that prevents speculation on some x86 processors and can be used to mitigate
+/// security vulnerabilities due to targeted speculative execution and side
+/// channels such as CVE-2017-5715.
+///
+/// Currently supported thunks include:
+/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
+/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
+/// before making an indirect call/jump
+///
+/// Note that the reason that this is implemented as a MachineFunctionPass and
+/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
+/// serialize all transformations, which can consume lots of memory.
+///
+/// TODO(chandlerc): All of this code could use better comments and
+/// documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-retpoline-thunks"
+
+static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
+static const char R11RetpolineName[] = "__llvm_retpoline_r11";
+static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
+static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
+static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
+static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
+
+static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
+static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
+
+namespace {
+template <typename Derived> class ThunkInserter {
+ Derived &getDerived() { return *static_cast<Derived *>(this); }
+
+protected:
+ bool InsertedThunks;
+ void doInitialization(Module &M) {}
+ void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
+
+public:
+ void init(Module &M) {
+ InsertedThunks = false;
+ getDerived().doInitialization(M);
+ }
+ // return `true` if `MMI` or `MF` was modified
+ bool run(MachineModuleInfo &MMI, MachineFunction &MF);
+};
+
+struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
+ const char *getThunkPrefix() { return RetpolineNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ const auto &STI = MF.getSubtarget<X86Subtarget>();
+ return (STI.useRetpolineIndirectCalls() ||
+ STI.useRetpolineIndirectBranches()) &&
+ !STI.useRetpolineExternalThunk();
+ }
+ void insertThunks(MachineModuleInfo &MMI);
+ void populateThunk(MachineFunction &MF);
+};
+
+struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
+ const char *getThunkPrefix() { return LVIThunkNamePrefix; }
+ bool mayUseThunk(const MachineFunction &MF) {
+ return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
+ }
+ void insertThunks(MachineModuleInfo &MMI) {
+ createThunkFunction(MMI, R11LVIThunkName);
+ }
+ void populateThunk(MachineFunction &MF) {
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
+
+ // This code mitigates LVI by replacing each indirect call/jump with a
+ // direct call/jump to a thunk that looks like:
+ // ```
+ // lfence
+ // jmpq *%r11
+ // ```
+ // This ensures that if the value in register %r11 was loaded from memory,
+ // then the value in %r11 is (architecturally) correct prior to the jump.
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
+ MF.front().addLiveIn(X86::R11);
+ return;
+ }
+};
+
+class X86IndirectThunks : public MachineFunctionPass {
+public:
+ static char ID;
+
+ X86IndirectThunks() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "X86 Indirect Thunks"; }
+
+ bool doInitialization(Module &M) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ }
+
+private:
+ std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
+
+ // FIXME: When LLVM moves to C++17, these can become folds
+ template <typename... ThunkInserterT>
+ static void initTIs(Module &M,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ (void)std::initializer_list<int>{
+ (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+ }
+ template <typename... ThunkInserterT>
+ static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+ std::tuple<ThunkInserterT...> &ThunkInserters) {
+ bool Modified = false;
+ (void)std::initializer_list<int>{
+ Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+ return Modified;
+ }
+};
+
+} // end anonymous namespace
+
+void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+ if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
+ createThunkFunction(MMI, R11RetpolineName);
+ else
+ for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
+ EDIRetpolineName})
+ createThunkFunction(MMI, Name);
+}
+
+void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
+ bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
+ Register ThunkReg;
+ if (Is64Bit) {
+ assert(MF.getName() == "__llvm_retpoline_r11" &&
+ "Should only have an r11 thunk on 64-bit targets");
+
+ // __llvm_retpoline_r11:
+ // callq .Lr11_call_target
+ // .Lr11_capture_spec:
+ // pause
+ // lfence
+ // jmp .Lr11_capture_spec
+ // .align 16
+ // .Lr11_call_target:
+ // movq %r11, (%rsp)
+ // retq
+ ThunkReg = X86::R11;
+ } else {
+ // For 32-bit targets we need to emit a collection of thunks for various
+ // possible scratch registers as well as a fallback that uses EDI, which is
+ // normally callee saved.
+ // __llvm_retpoline_eax:
+ // calll .Leax_call_target
+ // .Leax_capture_spec:
+ // pause
+ // jmp .Leax_capture_spec
+ // .align 16
+ // .Leax_call_target:
+ // movl %eax, (%esp) # Clobber return addr
+ // retl
+ //
+ // __llvm_retpoline_ecx:
+ // ... # Same setup
+ // movl %ecx, (%esp)
+ // retl
+ //
+ // __llvm_retpoline_edx:
+ // ... # Same setup
+ // movl %edx, (%esp)
+ // retl
+ //
+ // __llvm_retpoline_edi:
+ // ... # Same setup
+ // movl %edi, (%esp)
+ // retl
+ if (MF.getName() == EAXRetpolineName)
+ ThunkReg = X86::EAX;
+ else if (MF.getName() == ECXRetpolineName)
+ ThunkReg = X86::ECX;
+ else if (MF.getName() == EDXRetpolineName)
+ ThunkReg = X86::EDX;
+ else if (MF.getName() == EDIRetpolineName)
+ ThunkReg = X86::EDI;
+ else
+ llvm_unreachable("Invalid thunk name on x86-32!");
+ }
+
+ const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+ // generate two bbs for the entry block.
+ MachineBasicBlock *Entry = &MF.front();
+ Entry->clear();
+ while (MF.size() > 1)
+ MF.erase(std::next(MF.begin()));
+
+ MachineBasicBlock *CaptureSpec =
+ MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MachineBasicBlock *CallTarget =
+ MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+ MCSymbol *TargetSym = MF.getContext().createTempSymbol();
+ MF.push_back(CaptureSpec);
+ MF.push_back(CallTarget);
+
+ const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+
+ Entry->addLiveIn(ThunkReg);
+ BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+ // The MIR verifier thinks that the CALL in the entry block will fall through
+ // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+ // the successor, but the MIR verifier doesn't know how to cope with that.
+ Entry->addSuccessor(CaptureSpec);
+
+ // In the capture loop for speculation, we want to stop the processor from
+ // speculating as fast as possible. On Intel processors, the PAUSE instruction
+ // will block speculation without consuming any execution resources. On AMD
+ // processors, the PAUSE instruction is (essentially) a nop, so we also use an
+ // LFENCE instruction which they have advised will stop speculation as well
+ // with minimal resource utilization. We still end the capture with a jump to
+ // form an infinite loop to fully guarantee that no matter what implementation
+ // of the x86 ISA, speculating this code path never escapes.
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
+ CaptureSpec->setHasAddressTaken();
+ CaptureSpec->addSuccessor(CaptureSpec);
+
+ CallTarget->addLiveIn(ThunkReg);
+ CallTarget->setHasAddressTaken();
+ CallTarget->setAlignment(Align(16));
+
+ // Insert return address clobber
+ const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
+ const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
+ addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
+ 0)
+ .addReg(ThunkReg);
+
+ CallTarget->back().setPreInstrSymbol(MF, TargetSym);
+ BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
+}
+
+template <typename Derived>
+void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
+ StringRef Name) {
+ assert(Name.startswith(getDerived().getThunkPrefix()) &&
+ "Created a thunk with an unexpected prefix!");
+
+ Module &M = const_cast<Module &>(*MMI.getModule());
+ LLVMContext &Ctx = M.getContext();
+ auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+ Function *F =
+ Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+ F->setVisibility(GlobalValue::HiddenVisibility);
+ F->setComdat(M.getOrInsertComdat(Name));
+
+ // Add Attributes so that we don't create a frame, unwind information, or
+ // inline.
+ AttrBuilder B;
+ B.addAttribute(llvm::Attribute::NoUnwind);
+ B.addAttribute(llvm::Attribute::Naked);
+ F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+ // Populate our function a bit so that we can verify.
+ BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+ IRBuilder<> Builder(Entry);
+
+ Builder.CreateRetVoid();
+
+ // MachineFunctions/MachineBasicBlocks aren't created automatically for the
+ // IR-level constructs we already made. Create them and insert them into the
+ // module.
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
+
+ // Insert EntryMBB into MF. It's not in the module until we do this.
+ MF.insert(MF.end(), EntryMBB);
+ // Set MF properties. We never use vregs...
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+template <typename Derived>
+bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
+ // If MF is not a thunk, check to see if we need to insert a thunk.
+ if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
+ // If we've already inserted a thunk, nothing else to do.
+ if (InsertedThunks)
+ return false;
+
+ // Only add a thunk if one of the functions has the corresponding feature
+ // enabled in its subtarget, and doesn't enable external thunks.
+ // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+ // nothing will end up calling it.
+ // FIXME: It's a little silly to look at every function just to enumerate
+ // the subtargets, but eventually we'll want to look at them for indirect
+ // calls, so maybe this is OK.
+ if (!getDerived().mayUseThunk(MF))
+ return false;
+
+ getDerived().insertThunks(MMI);
+ InsertedThunks = true;
+ return true;
+ }
+
+ // If this *is* a thunk function, we need to populate it with the correct MI.
+ getDerived().populateThunk(MF);
+ return true;
+}
+
+FunctionPass *llvm::createX86IndirectThunksPass() {
+ return new X86IndirectThunks();
+}
+
+char X86IndirectThunks::ID = 0;
+
+bool X86IndirectThunks::doInitialization(Module &M) {
+ initTIs(M, TIs);
+ return false;
+}
+
+bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << getPassName() << '\n');
+ auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return runTIs(MMI, MF, TIs);
+}
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 78d8dd3c0d03..1fdac104cb73 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
- Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
+ (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[In64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
- (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
- Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
+ (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
+ Requires<[Not64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 32faeb1a86f2..1842dc19ec2e 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -237,13 +237,13 @@ let isCall = 1 in
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
- Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+ Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,
Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
Requires<[Not64BitMode,FavorMemIndirectCall,
- NotUseRetpolineIndirectCalls]>,
+ NotUseIndirectThunkCalls]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
@@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode,NotUseIndirectThunkCalls]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
- NotUseRetpolineIndirectCalls]>;
+ NotUseIndirectThunkCalls]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
usesCustomInserter = 1,
SchedRW = [WriteJump] in {
- def RETPOLINE_CALL32 :
+ def INDIRECT_THUNK_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
- Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
+ Requires<[Not64BitMode,UseIndirectThunkCalls]>;
- def RETPOLINE_CALL64 :
+ def INDIRECT_THUNK_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
+ Requires<[In64BitMode,UseIndirectThunkCalls]>;
- // Retpoline variant of indirect tail calls.
+ // Indirect thunk variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- def RETPOLINE_TCRETURN64 :
+ def INDIRECT_THUNK_TCRETURN64 :
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
- def RETPOLINE_TCRETURN32 :
+ def INDIRECT_THUNK_TCRETURN32 :
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index ca5425e8b89f..93f40c8ec996 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
-def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
-def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
+def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
+def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
new file mode 100644
index 000000000000..35fc439998f9
--- /dev/null
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -0,0 +1,900 @@
+//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
+/// of a load from memory (i.e., SOURCE), and any operation that may transmit
+/// the value loaded from memory over a covert channel, or use the value loaded
+/// from memory to determine a branch/call target (i.e., SINK). After finding
+/// all such gadgets in a given function, the pass minimally inserts LFENCE
+/// instructions in such a manner that the following property is satisfied: for
+/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
+/// least one LFENCE instruction. The algorithm that implements this minimal
+/// insertion is influenced by an academic paper that minimally inserts memory
+/// fences for high-performance concurrent programs:
+/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
+/// The algorithm implemented in this pass is as follows:
+/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
+/// following components:
+/// - SOURCE instructions (also includes function arguments)
+/// - SINK instructions
+/// - Basic block entry points
+/// - Basic block terminators
+/// - LFENCE instructions
+/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
+/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
+/// mitigated, go to step 6.
+/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
+/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
+/// 5. Go to step 2.
+/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction()
+/// to tell LLVM that the function was modified.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ImmutableGraph.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-load"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+ "were deployed");
+STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
+
+static cl::opt<std::string> OptimizePluginPath(
+ PASS_KEY "-opt-plugin",
+ cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
+
+static cl::opt<bool> NoConditionalBranches(
+ PASS_KEY "-no-cbranch",
+ cl::desc("Don't treat conditional branches as disclosure gadgets. This "
+ "may improve performance, at the cost of security."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDot(
+ PASS_KEY "-dot",
+ cl::desc(
+ "For each function, emit a dot graph depicting potential LVI gadgets"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotOnly(
+ PASS_KEY "-dot-only",
+ cl::desc("For each function, emit a dot graph depicting potential LVI "
+ "gadgets, and do not insert any fences"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotVerify(
+ PASS_KEY "-dot-verify",
+ cl::desc("For each function, emit a dot graph to stdout depicting "
+ "potential LVI gadgets, used for testing purposes only"),
+ cl::init(false), cl::Hidden);
+
+static llvm::sys::DynamicLibrary OptimizeDL;
+typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
+ unsigned int *edges, int *edge_values,
+ int *cut_edges /* out */, unsigned int edges_size);
+static OptimizeCutT OptimizeCut = nullptr;
+
+namespace {
+
+struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
+ static constexpr int GadgetEdgeSentinel = -1;
+ static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
+
+ using GraphT = ImmutableGraph<MachineInstr *, int>;
+ using Node = typename GraphT::Node;
+ using Edge = typename GraphT::Edge;
+ using size_type = typename GraphT::size_type;
+ MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
+ std::unique_ptr<Edge[]> Edges, size_type NodesSize,
+ size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
+ : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize),
+ NumFences(NumFences), NumGadgets(NumGadgets) {}
+ static inline bool isCFGEdge(const Edge &E) {
+ return E.getValue() != GadgetEdgeSentinel;
+ }
+ static inline bool isGadgetEdge(const Edge &E) {
+ return E.getValue() == GadgetEdgeSentinel;
+ }
+ int NumFences;
+ int NumGadgets;
+};
+
+class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Load Hardening";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+private:
+ using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
+ using EdgeSet = MachineGadgetGraph::EdgeSet;
+ using NodeSet = MachineGadgetGraph::NodeSet;
+ using Gadget = std::pair<MachineInstr *, MachineInstr *>;
+
+ const X86Subtarget *STI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ std::unique_ptr<MachineGadgetGraph>
+ getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ const MachineDominanceFrontier &MDF) const;
+ int hardenLoadsWithPlugin(MachineFunction &MF,
+ std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int hardenLoadsWithGreedyHeuristic(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
+ int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
+ EdgeSet &ElimEdges /* in, out */,
+ NodeSet &ElimNodes /* in, out */) const;
+ std::unique_ptr<MachineGadgetGraph>
+ trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
+ void findAndCutEdges(MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* out */) const;
+ int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const;
+ bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
+ bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
+ inline bool isFence(const MachineInstr *MI) const {
+ return MI && (MI->getOpcode() == X86::LFENCE ||
+ (STI->useLVIControlFlowIntegrity() && MI->isCall()));
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <>
+struct GraphTraits<MachineGadgetGraph *>
+ : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
+
+template <>
+struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
+ using GraphType = MachineGadgetGraph;
+ using Traits = llvm::GraphTraits<GraphType *>;
+ using NodeRef = typename Traits::NodeRef;
+ using EdgeRef = typename Traits::EdgeRef;
+ using ChildIteratorType = typename Traits::ChildIteratorType;
+ using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(NodeRef Node, GraphType *) {
+ if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
+ return "ARGS";
+
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << *Node->getValue();
+ return OS.str();
+ }
+
+ static std::string getNodeAttributes(NodeRef Node, GraphType *) {
+ MachineInstr *MI = Node->getValue();
+ if (MI == MachineGadgetGraph::ArgNodeSentinel)
+ return "color = blue";
+ if (MI->getOpcode() == X86::LFENCE)
+ return "color = green";
+ return "";
+ }
+
+ static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
+ GraphType *) {
+ int EdgeVal = (*E.getCurrent()).getValue();
+ return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
+ : "color = red, style = \"dashed\"";
+ }
+};
+
+} // end namespace llvm
+
+constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
+constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
+
+char X86LoadValueInjectionLoadHardeningPass::ID = 0;
+
+void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ AU.setPreservesCFG();
+}
+
+static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
+ MachineGadgetGraph *G) {
+ WriteGraph(OS, G, /*ShortNames*/ false,
+ "Speculative gadgets for \"" + MF.getName() + "\" function");
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ STI = &MF.getSubtarget<X86Subtarget>();
+ if (!STI->useLVILoadHardening())
+ return false;
+
+ // FIXME: support 32-bit
+ if (!STI->is64Bit())
+ report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ ++NumFunctionsConsidered;
+ TII = STI->getInstrInfo();
+ TRI = STI->getRegisterInfo();
+ LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ const auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+ std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF);
+ LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
+ if (Graph == nullptr)
+ return false; // didn't find any gadgets
+
+ if (EmitDotVerify) {
+ WriteGadgetGraph(outs(), MF, Graph.get());
+ return false;
+ }
+
+ if (EmitDot || EmitDotOnly) {
+ LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
+ std::error_code FileError;
+ std::string FileName = "lvi.";
+ FileName += MF.getName();
+ FileName += ".dot";
+ raw_fd_ostream FileOut(FileName, FileError);
+ if (FileError)
+ errs() << FileError.message();
+ WriteGadgetGraph(FileOut, MF, Graph.get());
+ FileOut.close();
+ LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
+ if (EmitDotOnly)
+ return false;
+ }
+
+ int FencesInserted;
+ if (!OptimizePluginPath.empty()) {
+ if (!OptimizeDL.isValid()) {
+ std::string ErrorMsg;
+ OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
+ OptimizePluginPath.c_str(), &ErrorMsg);
+ if (!ErrorMsg.empty())
+ report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+ OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
+ if (!OptimizeCut)
+ report_fatal_error("Invalid optimization plugin");
+ }
+ FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
+ } else { // Use the default greedy heuristic
+ FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
+ }
+
+ if (FencesInserted > 0)
+ ++NumFunctionsMitigated;
+ NumFences += FencesInserted;
+ return (FencesInserted > 0);
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
+ MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT,
+ const MachineDominanceFrontier &MDF) const {
+ using namespace rdf;
+
+ // Build the Register Dataflow Graph using the RDF framework
+ TargetOperandInfo TOI{*TII};
+ DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
+ DFG.build();
+ Liveness L{MF.getRegInfo(), DFG};
+ L.computePhiInfo();
+
+ GraphBuilder Builder;
+ using GraphIter = typename GraphBuilder::BuilderNodeRef;
+ DenseMap<MachineInstr *, GraphIter> NodeMap;
+ int FenceCount = 0, GadgetCount = 0;
+ auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
+ auto Ref = NodeMap.find(MI);
+ if (Ref == NodeMap.end()) {
+ auto I = Builder.addVertex(MI);
+ NodeMap[MI] = I;
+ return std::pair<GraphIter, bool>{I, true};
+ }
+ return std::pair<GraphIter, bool>{Ref->getSecond(), false};
+ };
+
+ // The `Transmitters` map memoizes transmitters found for each def. If a def
+ // has not yet been analyzed, then it will not appear in the map. If a def
+ // has been analyzed and was determined not to have any transmitters, then
+ // its list of transmitters will be empty.
+ DenseMap<NodeId, std::vector<NodeId>> Transmitters;
+
+ // Analyze all machine instructions to find gadgets and LFENCEs, adding
+ // each interesting value to `Nodes`
+ auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) {
+ SmallSet<NodeId, 8> UsesVisited, DefsVisited;
+ std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
+ [&](NodeAddr<DefNode *> Def) {
+ if (Transmitters.find(Def.Id) != Transmitters.end())
+ return; // Already analyzed `Def`
+
+ // Use RDF to find all the uses of `Def`
+ rdf::NodeSet Uses;
+ RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
+ for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
+ auto Use = DFG.addr<UseNode *>(UseID);
+ if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
+ NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
+ for (auto I : L.getRealUses(Phi.Id)) {
+ if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
+ for (auto UA : I.second)
+ Uses.emplace(UA.first);
+ }
+ }
+ } else { // not a phi node
+ Uses.emplace(UseID);
+ }
+ }
+
+ // For each use of `Def`, we want to know whether:
+ // (1) The use can leak the Def'ed value,
+ // (2) The use can further propagate the Def'ed value to more defs
+ for (auto UseID : Uses) {
+ if (!UsesVisited.insert(UseID).second)
+ continue; // Already visited this use of `Def`
+
+ auto Use = DFG.addr<UseNode *>(UseID);
+ assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
+ MachineOperand &UseMO = Use.Addr->getOp();
+ MachineInstr &UseMI = *UseMO.getParent();
+ assert(UseMO.isReg());
+
+ // We naively assume that an instruction propagates any loaded
+ // uses to all defs unless the instruction is a call, in which
+ // case all arguments will be treated as gadget sources during
+ // analysis of the callee function.
+ if (UseMI.isCall())
+ continue;
+
+ // Check whether this use can transmit (leak) its value.
+ if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
+ (!NoConditionalBranches &&
+ instrUsesRegToBranch(UseMI, UseMO.getReg()))) {
+ Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id);
+ if (UseMI.mayLoad())
+ continue; // Found a transmitting load -- no need to continue
+ // traversing its defs (i.e., this load will become
+ // a new gadget source anyways).
+ }
+
+ // Check whether the use propagates to more defs.
+ NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
+ rdf::NodeList AnalyzedChildDefs;
+ for (auto &ChildDef :
+ Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) {
+ if (!DefsVisited.insert(ChildDef.Id).second)
+ continue; // Already visited this def
+ if (Def.Addr->getAttrs() & NodeAttrs::Dead)
+ continue;
+ if (Def.Id == ChildDef.Id)
+ continue; // `Def` uses itself (e.g., increment loop counter)
+
+ AnalyzeDefUseChain(ChildDef);
+
+ // `Def` inherits all of its child defs' transmitters.
+ for (auto TransmitterId : Transmitters[ChildDef.Id])
+ Transmitters[Def.Id].push_back(TransmitterId);
+ }
+ }
+
+ // Note that this statement adds `Def.Id` to the map if no
+ // transmitters were found for `Def`.
+ auto &DefTransmitters = Transmitters[Def.Id];
+
+ // Remove duplicate transmitters
+ llvm::sort(DefTransmitters);
+ DefTransmitters.erase(
+ std::unique(DefTransmitters.begin(), DefTransmitters.end()),
+ DefTransmitters.end());
+ };
+
+ // Find all of the transmitters
+ AnalyzeDefUseChain(SourceDef);
+ auto &SourceDefTransmitters = Transmitters[SourceDef.Id];
+ if (SourceDefTransmitters.empty())
+ return; // No transmitters for `SourceDef`
+
+ MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef
+ ? MachineGadgetGraph::ArgNodeSentinel
+ : SourceDef.Addr->getOp().getParent();
+ auto GadgetSource = MaybeAddNode(Source);
+ // Each transmitter is a sink for `SourceDef`.
+ for (auto TransmitterId : SourceDefTransmitters) {
+ MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode();
+ auto GadgetSink = MaybeAddNode(Sink);
+ // Add the gadget edge to the graph.
+ Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel,
+ GadgetSource.first, GadgetSink.first);
+ ++GadgetCount;
+ }
+ };
+
+ LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
+ // Analyze function arguments
+ NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
+ for (NodeAddr<PhiNode *> ArgPhi :
+ EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
+ NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
+ llvm::for_each(Defs, AnalyzeDef);
+ }
+ // Analyze every instruction in MF
+ for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<StmtNode *> SA :
+ BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
+ MachineInstr *MI = SA.Addr->getCode();
+ if (isFence(MI)) {
+ MaybeAddNode(MI);
+ ++FenceCount;
+ } else if (MI->mayLoad()) {
+ NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
+ llvm::for_each(Defs, AnalyzeDef);
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
+ LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
+ if (GadgetCount == 0)
+ return nullptr;
+ NumGadgets += GadgetCount;
+
+ // Traverse CFG to build the rest of the graph
+ SmallSet<MachineBasicBlock *, 8> BlocksVisited;
+ std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
+ [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
+ unsigned LoopDepth = MLI.getLoopDepth(MBB);
+ if (!MBB->empty()) {
+ // Always add the first instruction in each block
+ auto NI = MBB->begin();
+ auto BeginBB = MaybeAddNode(&*NI);
+ Builder.addEdge(ParentDepth, GI, BeginBB.first);
+ if (!BlocksVisited.insert(MBB).second)
+ return;
+
+ // Add any instructions within the block that are gadget components
+ GI = BeginBB.first;
+ while (++NI != MBB->end()) {
+ auto Ref = NodeMap.find(&*NI);
+ if (Ref != NodeMap.end()) {
+ Builder.addEdge(LoopDepth, GI, Ref->getSecond());
+ GI = Ref->getSecond();
+ }
+ }
+
+ // Always add the terminator instruction, if one exists
+ auto T = MBB->getFirstTerminator();
+ if (T != MBB->end()) {
+ auto EndBB = MaybeAddNode(&*T);
+ if (EndBB.second)
+ Builder.addEdge(LoopDepth, GI, EndBB.first);
+ GI = EndBB.first;
+ }
+ }
+ for (MachineBasicBlock *Succ : MBB->successors())
+ TraverseCFG(Succ, GI, LoopDepth);
+ };
+ // ArgNodeSentinel is a pseudo-instruction that represents MF args in the
+ // GadgetGraph
+ GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first;
+ TraverseCFG(&MF.front(), ArgNode, 0);
+ std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
+ LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n");
+ return G;
+}
+
+// Returns the number of remaining gadget edges that could not be eliminated
+int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
+ MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
+ MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
+ if (G.NumFences > 0) {
+ // Eliminate fences and CFG edges that ingress and egress the fence, as
+ // they are trivially mitigated.
+ for (const auto &E : G.edges()) {
+ const MachineGadgetGraph::Node *Dest = E.getDest();
+ if (isFence(Dest->getValue())) {
+ ElimNodes.insert(*Dest);
+ ElimEdges.insert(E);
+ for (const auto &DE : Dest->edges())
+ ElimEdges.insert(DE);
+ }
+ }
+ }
+
+ // Find and eliminate gadget edges that have been mitigated.
+ int MitigatedGadgets = 0, RemainingGadgets = 0;
+ MachineGadgetGraph::NodeSet ReachableNodes{G};
+ for (const auto &RootN : G.nodes()) {
+ if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
+ continue; // skip this node if it isn't a gadget source
+
+ // Find all of the nodes that are CFG-reachable from RootN using DFS
+ ReachableNodes.clear();
+ std::function<void(const MachineGadgetGraph::Node *, bool)>
+ FindReachableNodes =
+ [&](const MachineGadgetGraph::Node *N, bool FirstNode) {
+ if (!FirstNode)
+ ReachableNodes.insert(*N);
+ for (const auto &E : N->edges()) {
+ const MachineGadgetGraph::Node *Dest = E.getDest();
+ if (MachineGadgetGraph::isCFGEdge(E) &&
+ !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
+ FindReachableNodes(Dest, false);
+ }
+ };
+ FindReachableNodes(&RootN, true);
+
+ // Any gadget whose sink is unreachable has been mitigated
+ for (const auto &E : RootN.edges()) {
+ if (MachineGadgetGraph::isGadgetEdge(E)) {
+ if (ReachableNodes.contains(*E.getDest())) {
+ // This gadget's sink is reachable
+ ++RemainingGadgets;
+ } else { // This gadget's sink is unreachable, and therefore mitigated
+ ++MitigatedGadgets;
+ ElimEdges.insert(E);
+ }
+ }
+ }
+ }
+ return RemainingGadgets;
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
+ std::unique_ptr<MachineGadgetGraph> Graph) const {
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+ int RemainingGadgets =
+ elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
+ if (ElimEdges.empty() && ElimNodes.empty()) {
+ Graph->NumFences = 0;
+ Graph->NumGadgets = RemainingGadgets;
+ } else {
+ Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */,
+ RemainingGadgets);
+ }
+ return Graph;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+ int FencesInserted = 0;
+
+ do {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (Graph->NumGadgets == 0)
+ break;
+
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ EdgeSet CutEdges{*Graph};
+ auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() +
+ 1 /* terminator node */);
+ auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
+ auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
+ auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
+ for (const auto &N : Graph->nodes()) {
+ Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
+ }
+ Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
+ for (const auto &E : Graph->edges()) {
+ Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
+ EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
+ }
+ OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(),
+ EdgeCuts.get(), Graph->edges_size());
+ for (int I = 0; I < Graph->edges_size(); ++I)
+ if (EdgeCuts[I])
+ CutEdges.set(I);
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ FencesInserted += insertFences(MF, *Graph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+ Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
+ CutEdges);
+ } while (true);
+
+ return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
+ MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+ Graph = trimMitigatedEdges(std::move(Graph));
+ LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+ if (Graph->NumGadgets == 0)
+ return 0;
+
+ LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+ MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
+ MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
+ auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
+ return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+ MachineGadgetGraph::isCFGEdge(E);
+ };
+ auto IsGadgetEdge = [&ElimEdges,
+ &CutEdges](const MachineGadgetGraph::Edge &E) {
+ return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+ MachineGadgetGraph::isGadgetEdge(E);
+ };
+
+ // FIXME: this is O(E^2), we could probably do better.
+ do {
+ // Find the cheapest CFG edge that will eliminate a gadget (by being
+ // egress from a SOURCE node or ingress to a SINK node), and cut it.
+ const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
+
+ // First, collect all gadget source and sink nodes.
+ MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
+ for (const auto &N : Graph->nodes()) {
+ if (ElimNodes.contains(N))
+ continue;
+ for (const auto &E : N.edges()) {
+ if (IsGadgetEdge(E)) {
+ GadgetSources.insert(N);
+ GadgetSinks.insert(*E.getDest());
+ }
+ }
+ }
+
+ // Next, look for the cheapest CFG edge which, when cut, is guaranteed to
+ // mitigate at least one gadget by either:
+ // (a) being egress from a gadget source, or
+ // (b) being ingress to a gadget sink.
+ for (const auto &N : Graph->nodes()) {
+ if (ElimNodes.contains(N))
+ continue;
+ for (const auto &E : N.edges()) {
+ if (IsCFGEdge(E)) {
+ if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
+ if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
+ CheapestSoFar = &E;
+ }
+ }
+ }
+ }
+
+ assert(CheapestSoFar && "Failed to cut an edge");
+ CutEdges.insert(*CheapestSoFar);
+ ElimEdges.insert(*CheapestSoFar);
+ } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
+ LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+ LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+ int FencesInserted = insertFences(MF, *Graph, CutEdges);
+ LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+ LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+ return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::insertFences(
+ MachineFunction &MF, MachineGadgetGraph &G,
+ EdgeSet &CutEdges /* in, out */) const {
+ int FencesInserted = 0;
+ for (const auto &N : G.nodes()) {
+ for (const auto &E : N.edges()) {
+ if (CutEdges.contains(E)) {
+ MachineInstr *MI = N.getValue(), *Prev;
+ MachineBasicBlock *MBB; // Insert an LFENCE in this MBB
+ MachineBasicBlock::iterator InsertionPt; // ...at this point
+ if (MI == MachineGadgetGraph::ArgNodeSentinel) {
+ // insert LFENCE at beginning of entry block
+ MBB = &MF.front();
+ InsertionPt = MBB->begin();
+ Prev = nullptr;
+ } else if (MI->isBranch()) { // insert the LFENCE before the branch
+ MBB = MI->getParent();
+ InsertionPt = MI;
+ Prev = MI->getPrevNode();
+ // Remove all egress CFG edges from this branch because the inserted
+ // LFENCE prevents gadgets from crossing the branch.
+ for (const auto &E : N.edges()) {
+ if (MachineGadgetGraph::isCFGEdge(E))
+ CutEdges.insert(E);
+ }
+ } else { // insert the LFENCE after the instruction
+ MBB = MI->getParent();
+ InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
+ Prev = InsertionPt == MBB->end()
+ ? (MBB->empty() ? nullptr : &MBB->back())
+ : InsertionPt->getPrevNode();
+ }
+ // Ensure this insertion is not redundant (two LFENCEs in sequence).
+ if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
+ (!Prev || !isFence(Prev))) {
+ BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++FencesInserted;
+ }
+ }
+ }
+ }
+ return FencesInserted;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
+ const MachineInstr &MI, unsigned Reg) const {
+ if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
+ MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
+ return false;
+
+ // FIXME: This does not handle pseudo loading instruction like TCRETURN*
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+ if (MemRefBeginIdx < 0) {
+ LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
+ "instruction:\n";
+ MI.print(dbgs()); dbgs() << '\n';);
+ return false;
+ }
+ MemRefBeginIdx += X86II::getOperandBias(Desc);
+
+ const MachineOperand &BaseMO =
+ MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
+ const MachineOperand &IndexMO =
+ MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
+ return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
+ (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
+ TRI->regsOverlap(IndexMO.getReg(), Reg));
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
+ const MachineInstr &MI, unsigned Reg) const {
+ if (!MI.isConditionalBranch())
+ return false;
+ for (const MachineOperand &Use : MI.uses())
+ if (Use.isReg() && Use.getReg() == Reg)
+ return true;
+ return false;
+}
+
+INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
+ return new X86LoadValueInjectionLoadHardeningPass();
+}
+
+namespace {
+
+/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
+/// analysis passes that add complexity to the pipeline. This complexity
+/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
+/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
+/// provide the same security as the optimized pass, but without adding
+/// unnecessary complexity to the LLVM pipeline.
+///
+/// The behavior of this pass is simply to insert an LFENCE after every load
+/// instruction.
+class X86LoadValueInjectionLoadHardeningUnoptimizedPass
+ : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionLoadHardeningUnoptimizedPass()
+ : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
+
+bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
+ if (!STI->useLVILoadHardening())
+ return false;
+
+ // FIXME: support 32-bit
+ if (!STI->is64Bit())
+ report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ bool Modified = false;
+ ++NumFunctionsConsidered;
+
+ const TargetInstrInfo *TII = STI->getInstrInfo();
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
+ MI.getOpcode() == X86::MFENCE)
+ continue;
+
+ MachineBasicBlock::iterator InsertionPt =
+ MI.getNextNode() ? MI.getNextNode() : MBB.end();
+ BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+ ++NumFences;
+ Modified = true;
+ }
+ }
+
+ if (Modified)
+ ++NumFunctionsMitigated;
+
+ return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
+ "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
+ return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
+}
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
new file mode 100644
index 000000000000..6e1134a25950
--- /dev/null
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -0,0 +1,143 @@
+//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: Replaces every `ret` instruction with the sequence:
+/// ```
+/// pop <scratch-reg>
+/// lfence
+/// jmp *<scratch-reg>
+/// ```
+/// where `<scratch-reg>` is some available scratch register, according to the
+/// calling convention of the function being mitigated.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include <bitset>
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-ret"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+ "were deployed");
+
+namespace {
+
+class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
+public:
+ X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override {
+ return "X86 Load Value Injection (LVI) Ret-Hardening";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionRetHardeningPass::ID = 0;
+
+bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+ << " *****\n");
+ const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
+ if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
+ return false; // FIXME: support 32-bit
+
+ // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+ const Function &F = MF.getFunction();
+ if (!F.hasOptNone() && skipFunction(F))
+ return false;
+
+ ++NumFunctionsConsidered;
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ unsigned ClobberReg = X86::NoRegister;
+ std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
+ UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
+ UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
+ UnclobberableGR64s.set(X86::RAX); // used for function return
+ UnclobberableGR64s.set(X86::RDX); // used for function return
+
+ // We can clobber any register allowed by the function's calling convention.
+ for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
+ UnclobberableGR64s.set(Reg);
+ for (auto &Reg : X86::GR64RegClass) {
+ if (!UnclobberableGR64s.test(Reg)) {
+ ClobberReg = Reg;
+ break;
+ }
+ }
+
+ if (ClobberReg != X86::NoRegister) {
+ LLVM_DEBUG(dbgs() << "Selected register "
+ << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
+ << " to clobber\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
+ }
+
+ bool Modified = false;
+ for (auto &MBB : MF) {
+ if (MBB.empty())
+ continue;
+
+ MachineInstr &MI = MBB.back();
+ if (MI.getOpcode() != X86::RETQ)
+ continue;
+
+ if (ClobberReg != X86::NoRegister) {
+ MBB.erase_instr(&MI);
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
+ .addReg(ClobberReg, RegState::Define)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
+ BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
+ .addReg(ClobberReg);
+ } else {
+ // In case there is no available scratch register, we can still read from
+ // RSP to assert that RSP points to a valid page. The write to RSP is
+ // also helpful because it verifies that the stack's write permissions
+ // are intact.
+ MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+ addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
+ X86::RSP, false, 0)
+ .addImm(0)
+ ->addRegisterDead(X86::EFLAGS, TRI);
+ }
+
+ ++NumFences;
+ Modified = true;
+ }
+
+ if (Modified)
+ ++NumFunctionsMitigated;
+ return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
+ "X86 LVI ret hardener", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
+ return new X86LoadValueInjectionRetHardeningPass();
+}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 7f49c6e861d4..f5caaaae4d84 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
break;
case MachineOperand::MO_Register:
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpolineIndirectCalls())
- report_fatal_error("Lowering register statepoints with retpoline not "
+ if (Subtarget->useIndirectThunkCalls())
+ report_fatal_error("Lowering register statepoints with thunks not "
"yet implemented.");
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
CallOpcode = X86::CALL64r;
@@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
// FIXME: Add retpoline support and remove this.
- if (Subtarget->useRetpolineIndirectCalls())
+ if (Subtarget->useIndirectThunkCalls())
report_fatal_error(
- "Lowering patchpoint with retpoline not yet implemented.");
+ "Lowering patchpoint with thunks not yet implemented.");
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
}
diff --git a/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/llvm/lib/Target/X86/X86RetpolineThunks.cpp
deleted file mode 100644
index 9085d7f068ac..000000000000
--- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ /dev/null
@@ -1,286 +0,0 @@
-//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Pass that injects an MI thunk implementing a "retpoline". This is
-/// a RET-implemented trampoline that is used to lower indirect calls in a way
-/// that prevents speculation on some x86 processors and can be used to mitigate
-/// security vulnerabilities due to targeted speculative execution and side
-/// channels such as CVE-2017-5715.
-///
-/// TODO(chandlerc): All of this code could use better comments and
-/// documentation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-retpoline-thunks"
-
-static const char ThunkNamePrefix[] = "__llvm_retpoline_";
-static const char R11ThunkName[] = "__llvm_retpoline_r11";
-static const char EAXThunkName[] = "__llvm_retpoline_eax";
-static const char ECXThunkName[] = "__llvm_retpoline_ecx";
-static const char EDXThunkName[] = "__llvm_retpoline_edx";
-static const char EDIThunkName[] = "__llvm_retpoline_edi";
-
-namespace {
-class X86RetpolineThunks : public MachineFunctionPass {
-public:
- static char ID;
-
- X86RetpolineThunks() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
-
- bool doInitialization(Module &M) override;
- bool runOnMachineFunction(MachineFunction &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- }
-
-private:
- MachineModuleInfo *MMI = nullptr;
- const TargetMachine *TM = nullptr;
- bool Is64Bit = false;
- const X86Subtarget *STI = nullptr;
- const X86InstrInfo *TII = nullptr;
-
- bool InsertedThunks = false;
-
- void createThunkFunction(Module &M, StringRef Name);
- void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
- void populateThunk(MachineFunction &MF, unsigned Reg);
-};
-
-} // end anonymous namespace
-
-FunctionPass *llvm::createX86RetpolineThunksPass() {
- return new X86RetpolineThunks();
-}
-
-char X86RetpolineThunks::ID = 0;
-
-bool X86RetpolineThunks::doInitialization(Module &M) {
- InsertedThunks = false;
- return false;
-}
-
-bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << getPassName() << '\n');
-
- TM = &MF.getTarget();;
- STI = &MF.getSubtarget<X86Subtarget>();
- TII = STI->getInstrInfo();
- Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
-
- MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
- Module &M = const_cast<Module &>(*MMI->getModule());
-
- // If this function is not a thunk, check to see if we need to insert
- // a thunk.
- if (!MF.getName().startswith(ThunkNamePrefix)) {
- // If we've already inserted a thunk, nothing else to do.
- if (InsertedThunks)
- return false;
-
- // Only add a thunk if one of the functions has the retpoline feature
- // enabled in its subtarget, and doesn't enable external thunks.
- // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
- // nothing will end up calling it.
- // FIXME: It's a little silly to look at every function just to enumerate
- // the subtargets, but eventually we'll want to look at them for indirect
- // calls, so maybe this is OK.
- if ((!STI->useRetpolineIndirectCalls() &&
- !STI->useRetpolineIndirectBranches()) ||
- STI->useRetpolineExternalThunk())
- return false;
-
- // Otherwise, we need to insert the thunk.
- // WARNING: This is not really a well behaving thing to do in a function
- // pass. We extract the module and insert a new function (and machine
- // function) directly into the module.
- if (Is64Bit)
- createThunkFunction(M, R11ThunkName);
- else
- for (StringRef Name :
- {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
- createThunkFunction(M, Name);
- InsertedThunks = true;
- return true;
- }
-
- // If this *is* a thunk function, we need to populate it with the correct MI.
- if (Is64Bit) {
- assert(MF.getName() == "__llvm_retpoline_r11" &&
- "Should only have an r11 thunk on 64-bit targets");
-
- // __llvm_retpoline_r11:
- // callq .Lr11_call_target
- // .Lr11_capture_spec:
- // pause
- // lfence
- // jmp .Lr11_capture_spec
- // .align 16
- // .Lr11_call_target:
- // movq %r11, (%rsp)
- // retq
- populateThunk(MF, X86::R11);
- } else {
- // For 32-bit targets we need to emit a collection of thunks for various
- // possible scratch registers as well as a fallback that uses EDI, which is
- // normally callee saved.
- // __llvm_retpoline_eax:
- // calll .Leax_call_target
- // .Leax_capture_spec:
- // pause
- // jmp .Leax_capture_spec
- // .align 16
- // .Leax_call_target:
- // movl %eax, (%esp) # Clobber return addr
- // retl
- //
- // __llvm_retpoline_ecx:
- // ... # Same setup
- // movl %ecx, (%esp)
- // retl
- //
- // __llvm_retpoline_edx:
- // ... # Same setup
- // movl %edx, (%esp)
- // retl
- //
- // __llvm_retpoline_edi:
- // ... # Same setup
- // movl %edi, (%esp)
- // retl
- if (MF.getName() == EAXThunkName)
- populateThunk(MF, X86::EAX);
- else if (MF.getName() == ECXThunkName)
- populateThunk(MF, X86::ECX);
- else if (MF.getName() == EDXThunkName)
- populateThunk(MF, X86::EDX);
- else if (MF.getName() == EDIThunkName)
- populateThunk(MF, X86::EDI);
- else
- llvm_unreachable("Invalid thunk name on x86-32!");
- }
-
- return true;
-}
-
-void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
- assert(Name.startswith(ThunkNamePrefix) &&
- "Created a thunk with an unexpected prefix!");
-
- LLVMContext &Ctx = M.getContext();
- auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
- Function *F =
- Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
- F->setVisibility(GlobalValue::HiddenVisibility);
- F->setComdat(M.getOrInsertComdat(Name));
-
- // Add Attributes so that we don't create a frame, unwind information, or
- // inline.
- AttrBuilder B;
- B.addAttribute(llvm::Attribute::NoUnwind);
- B.addAttribute(llvm::Attribute::Naked);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
-
- // Populate our function a bit so that we can verify.
- BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
- IRBuilder<> Builder(Entry);
-
- Builder.CreateRetVoid();
-
- // MachineFunctions/MachineBasicBlocks aren't created automatically for the
- // IR-level constructs we already made. Create them and insert them into the
- // module.
- MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
- MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
-
- // Insert EntryMBB into MF. It's not in the module until we do this.
- MF.insert(MF.end(), EntryMBB);
-}
-
-void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
- unsigned Reg) {
- const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
- const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
- addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
- .addReg(Reg);
-}
-
-void X86RetpolineThunks::populateThunk(MachineFunction &MF,
- unsigned Reg) {
- // Set MF properties. We never use vregs...
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-
- // Grab the entry MBB and erase any other blocks. O0 codegen appears to
- // generate two bbs for the entry block.
- MachineBasicBlock *Entry = &MF.front();
- Entry->clear();
- while (MF.size() > 1)
- MF.erase(std::next(MF.begin()));
-
- MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
- MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
- MCSymbol *TargetSym = MF.getContext().createTempSymbol();
- MF.push_back(CaptureSpec);
- MF.push_back(CallTarget);
-
- const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
-
- Entry->addLiveIn(Reg);
- BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
-
- // The MIR verifier thinks that the CALL in the entry block will fall through
- // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
- // the successor, but the MIR verifier doesn't know how to cope with that.
- Entry->addSuccessor(CaptureSpec);
-
- // In the capture loop for speculation, we want to stop the processor from
- // speculating as fast as possible. On Intel processors, the PAUSE instruction
- // will block speculation without consuming any execution resources. On AMD
- // processors, the PAUSE instruction is (essentially) a nop, so we also use an
- // LFENCE instruction which they have advised will stop speculation as well
- // with minimal resource utilization. We still end the capture with a jump to
- // form an infinite loop to fully guarantee that no matter what implementation
- // of the x86 ISA, speculating this code path never escapes.
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
- BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
- CaptureSpec->setHasAddressTaken();
- CaptureSpec->addSuccessor(CaptureSpec);
-
- CallTarget->addLiveIn(Reg);
- CallTarget->setHasAddressTaken();
- CallTarget->setAlignment(Align(16));
- insertRegReturnAddrClobber(*CallTarget, Reg);
- CallTarget->back().setPreInstrSymbol(MF, TargetSym);
- BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
-}
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index f4e8d30328ca..af5153243c8b 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -421,6 +421,16 @@ protected:
/// than emitting one inside the compiler.
bool UseRetpolineExternalThunk = false;
+ /// Prevent generation of indirect call/branch instructions from memory,
+ /// and force all indirect call/branch instructions from a register to be
+ /// preceded by an LFENCE. Also decompose RET instructions into a
+ /// POP+LFENCE+JMP sequence.
+ bool UseLVIControlFlowIntegrity = false;
+
+ /// Insert LFENCE instructions to prevent data speculatively injected into
+ /// loads from being used maliciously.
+ bool UseLVILoadHardening = false;
+
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@@ -707,8 +717,21 @@ public:
return UseRetpolineIndirectBranches;
}
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
+
+ // These are generic getters that OR together all of the thunk types
+ // supported by the subtarget. Therefore useIndirectThunk*() will return true
+ // if any respective thunk feature is enabled.
+ bool useIndirectThunkCalls() const {
+ return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
+ }
+ bool useIndirectThunkBranches() const {
+ return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
+ }
+
bool preferMaskRegisters() const { return PreferMaskRegisters; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
+ bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+ bool useLVILoadHardening() const { return UseLVILoadHardening; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@@ -853,10 +876,10 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
- /// If we are using retpolines, we need to expand indirectbr to avoid it
+ /// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
- return useRetpolineIndirectBranches();
+ return useIndirectThunkBranches();
}
/// Enable the MachineScheduler pass for all X86 subtargets.
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 7176e46f07b1..9f639ffa22ec 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
initializeX86CondBrFoldingPassPass(PR);
+ initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
+ initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
}
@@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {
void X86PassConfig::addPostRegAlloc() {
addPass(createX86FloatingPointStackifierPass());
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createX86LoadValueInjectionLoadHardeningPass());
+ else
+ addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
}
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();
- addPass(createX86RetpolineThunksPass());
+ addPass(createX86IndirectThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
@@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {
// Identify valid longjmp targets for Windows Control Flow Guard.
if (TT.isOSWindows())
addPass(createCFGuardLongjmpPass());
+ addPass(createX86LoadValueInjectionRetHardeningPass());
}
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index ec976a971e3c..23561c25c50a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Constant *C2;
// C-(C2-X) --> X+(C-C2)
- if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+ if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
// C-(X+C2) --> (C-C2)-X