diff options
Diffstat (limited to 'llvm/lib')
41 files changed, 2129 insertions, 1754 deletions
| diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index e852d663c6b4..e439c94a7325 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -2059,12 +2059,13 @@ char BasicAAWrapperPass::ID = 0;  void BasicAAWrapperPass::anchor() {}  INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", -                      "Basic Alias Analysis (stateless AA impl)", false, true) +                      "Basic Alias Analysis (stateless AA impl)", true, true)  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)  INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", -                    "Basic Alias Analysis (stateless AA impl)", false, true) +                    "Basic Alias Analysis (stateless AA impl)", true, true)  FunctionPass *llvm::createBasicAAWrapperPass() {    return new BasicAAWrapperPass(); diff --git a/llvm/lib/Target/Hexagon/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp index 0cb35dc98819..437a6b030096 100644 --- a/llvm/lib/Target/Hexagon/RDFGraph.cpp +++ b/llvm/lib/CodeGen/RDFGraph.cpp @@ -8,8 +8,6 @@  //  // Target-independent, SSA-based data flow graph for register data flow (RDF).  // -#include "RDFGraph.h" -#include "RDFRegisters.h"  #include "llvm/ADT/BitVector.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SetVector.h" @@ -20,6 +18,8 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/CodeGen/TargetInstrInfo.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const {    const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();    if (RegisterId R = TLI.getExceptionPointerRegister(PF))      LR.insert(RegisterRef(R)); -  if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) -    LR.insert(RegisterRef(R)); +  if (!isFuncletEHPersonality(classifyEHPersonality(PF))) { +    if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) +      LR.insert(RegisterRef(R)); +  }    return LR;  } diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp index e2c007c9d01a..0bcd27f8ea45 100644 --- a/llvm/lib/Target/Hexagon/RDFLiveness.cpp +++ b/llvm/lib/CodeGen/RDFLiveness.cpp @@ -22,9 +22,6 @@  // and Embedded Architectures and Compilers", 8 (4),  // <10.1145/2086696.2086706>. <hal-00647369>  // -#include "RDFLiveness.h" -#include "RDFGraph.h" -#include "RDFRegisters.h"  #include "llvm/ADT/BitVector.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SetVector.h" @@ -33,6 +30,9 @@  #include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp index b5675784e34b..bd8661816e71 100644 --- a/llvm/lib/Target/Hexagon/RDFRegisters.cpp +++ b/llvm/lib/CodeGen/RDFRegisters.cpp @@ -6,11 +6,11 @@  //  //===----------------------------------------------------------------------===// -#include "RDFRegisters.h"  #include "llvm/ADT/BitVector.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/MC/LaneBitmask.h"  #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 297b11de17a9..fa2f0777897b 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey(    // Include the hash for the current module    auto ModHash = Index.getModuleHash(ModuleID);    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); + +  std::vector<uint64_t> ExportsGUID; +  ExportsGUID.reserve(ExportList.size());    for (const auto &VI : ExportList) {      auto GUID = VI.getGUID(); +    ExportsGUID.push_back(GUID); +  } + +  // Sort the export list elements GUIDs. +  llvm::sort(ExportsGUID); +  for (uint64_t GUID : ExportsGUID) {      // The export list can impact the internalization, be conservative here      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));    } @@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey(    // Include the hash for every module we import functions from. The set of    // imported symbols for each module may affect code generation and is    // sensitive to link order, so include that as well. -  for (auto &Entry : ImportList) { -    auto ModHash = Index.getModuleHash(Entry.first()); +  using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator; +  std::vector<ImportMapIteratorTy> ImportModulesVector; +  ImportModulesVector.reserve(ImportList.size()); + +  for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end(); +       ++It) { +    ImportModulesVector.push_back(It); +  } +  llvm::sort(ImportModulesVector, +             [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs) +                 -> bool { return Lhs->getKey() < Rhs->getKey(); }); +  for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) { +    auto ModHash = Index.getModuleHash(EntryIt->first());      Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); -    AddUint64(Entry.second.size()); -    for (auto &Fn : Entry.second) +    AddUint64(EntryIt->second.size()); +    for (auto &Fn : EntryIt->second)        AddUint64(Fn);    } diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp index 6f5f58554d09..d407edfbd966 100644 --- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,    LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "                      << val << '\n'); -  SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); +  SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));    // After replacement, the current node is dead, we need to    // go backward one step to make iterator still work diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index a9fb04f20d1c..6daeb3b4b63b 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,                                bool CheckPointer, bool SeenPointer) {    if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {      TypeId = DIToIdMap[Ty]; + +    // To handle the case like the following: +    //    struct t; +    //    typedef struct t _t; +    //    struct s1 { _t *c; }; +    //    int test1(struct s1 *arg) { ... } +    // +    //    struct t { int a; int b; }; +    //    struct s2 { _t c; } +    //    int test2(struct s2 *arg) { ... } +    // +    // During traversing test1() argument, "_t" is recorded +    // in DIToIdMap and a forward declaration fixup is created +    // for "struct t" to avoid pointee type traversal. +    // +    // During traversing test2() argument, even if we see "_t" is +    // already defined, we should keep moving to eventually +    // bring in types for "struct t". Otherwise, the "struct s2" +    // definition won't be correct. +    if (Ty && (!CheckPointer || !SeenPointer)) { +      if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) { +        unsigned Tag = DTy->getTag(); +        if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || +            Tag == dwarf::DW_TAG_volatile_type || +            Tag == dwarf::DW_TAG_restrict_type) { +          uint32_t TmpTypeId; +          visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer, +                         SeenPointer); +        } +      } +    } +      return;    } diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 886034d9601a..f1fe51f5e54f 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -12,9 +12,6 @@  #include "HexagonInstrInfo.h"  #include "HexagonSubtarget.h"  #include "MCTargetDesc/HexagonBaseInfo.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DenseSet.h"  #include "llvm/ADT/StringRef.h" @@ -27,6 +24,9 @@  #include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/InitializePasses.h"  #include "llvm/MC/MCInstrDesc.h" diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp index 517ad1c6ee7b..f26e23befde2 100644 --- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -11,9 +11,6 @@  #include "MCTargetDesc/HexagonBaseInfo.h"  #include "RDFCopy.h"  #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SetVector.h" @@ -24,6 +21,9 @@  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/InitializePasses.h"  #include "llvm/Pass.h"  #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp index a9d39fd4b2dc..34d58f0a7a23 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.cpp +++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -11,13 +11,13 @@  //===----------------------------------------------------------------------===//  #include "RDFCopy.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h"  #include "llvm/CodeGen/MachineDominators.h"  #include "llvm/CodeGen/MachineInstr.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/CodeGen/TargetOpcodes.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h index 1450ab884849..99b18a75d8c2 100644 --- a/llvm/lib/Target/Hexagon/RDFCopy.h +++ b/llvm/lib/Target/Hexagon/RDFCopy.h @@ -9,9 +9,9 @@  #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H  #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h"  #include "llvm/CodeGen/MachineFunction.h"  #include <map>  #include <vector> diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp index af86c7b1956b..5a98debd3c00 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,13 +9,13 @@  // RDF-based generic dead code elimination.  #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h"  #include "llvm/ADT/SetVector.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h"  #include "llvm/Support/Debug.h"  #include <queue> diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h index 7f91977e1d6c..859c8161d355 100644 --- a/llvm/lib/Target/Hexagon/RDFDeadCode.h +++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h @@ -23,8 +23,8 @@  #ifndef RDF_DEADCODE_H  #define RDF_DEADCODE_H -#include "RDFGraph.h" -#include "RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h"  #include "llvm/ADT/SetVector.h"  namespace llvm { diff --git a/llvm/lib/Target/Hexagon/RDFGraph.h b/llvm/lib/Target/Hexagon/RDFGraph.h deleted file mode 100644 index 585f43e116f9..000000000000 --- a/llvm/lib/Target/Hexagon/RDFGraph.h +++ /dev/null @@ -1,968 +0,0 @@ -//===- RDFGraph.h -----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Target-independent, SSA-based data flow graph for register data flow (RDF) -// for a non-SSA program representation (e.g. post-RA machine code). -// -// -// *** Introduction -// -// The RDF graph is a collection of nodes, each of which denotes some element -// of the program. There are two main types of such elements: code and refe- -// rences. Conceptually, "code" is something that represents the structure -// of the program, e.g. basic block or a statement, while "reference" is an -// instance of accessing a register, e.g. a definition or a use. Nodes are -// connected with each other based on the structure of the program (such as -// blocks, instructions, etc.), and based on the data flow (e.g. reaching -// definitions, reached uses, etc.). The single-reaching-definition principle -// of SSA is generally observed, although, due to the non-SSA representation -// of the program, there are some differences between the graph and a "pure" -// SSA representation. -// -// -// *** Implementation remarks -// -// Since the graph can contain a large number of nodes, memory consumption -// was one of the major design considerations. As a result, there is a single -// base class NodeBase which defines all members used by all possible derived -// classes. The members are arranged in a union, and a derived class cannot -// add any data members of its own. Each derived class only defines the -// functional interface, i.e. member functions. NodeBase must be a POD, -// which implies that all of its members must also be PODs. -// Since nodes need to be connected with other nodes, pointers have been -// replaced with 32-bit identifiers: each node has an id of type NodeId. -// There are mapping functions in the graph that translate between actual -// memory addresses and the corresponding identifiers. -// A node id of 0 is equivalent to nullptr. -// -// -// *** Structure of the graph -// -// A code node is always a collection of other nodes. For example, a code -// node corresponding to a basic block will contain code nodes corresponding -// to instructions. In turn, a code node corresponding to an instruction will -// contain a list of reference nodes that correspond to the definitions and -// uses of registers in that instruction. The members are arranged into a -// circular list, which is yet another consequence of the effort to save -// memory: for each member node it should be possible to obtain its owner, -// and it should be possible to access all other members. There are other -// ways to accomplish that, but the circular list seemed the most natural. -// -// +- CodeNode -+ -// |            | <---------------------------------------------------+ -// +-+--------+-+                                                     | -//   |FirstM  |LastM                                                  | -//   |        +-------------------------------------+                 | -//   |                                              |                 | -//   V                                              V                 | -//  +----------+ Next +----------+ Next       Next +----------+ Next  | -//  |          |----->|          |-----> ... ----->|          |----->-+ -//  +- Member -+      +- Member -+                 +- Member -+ -// -// The order of members is such that related reference nodes (see below) -// should be contiguous on the member list. -// -// A reference node is a node that encapsulates an access to a register, -// in other words, data flowing into or out of a register. There are two -// major kinds of reference nodes: defs and uses. A def node will contain -// the id of the first reached use, and the id of the first reached def. -// Each def and use will contain the id of the reaching def, and also the -// id of the next reached def (for def nodes) or use (for use nodes). -// The "next node sharing the same reaching def" is denoted as "sibling". -// In summary: -// - Def node contains: reaching def, sibling, first reached def, and first -// reached use. -// - Use node contains: reaching def and sibling. -// -// +-- DefNode --+ -// | R2 = ...    | <---+--------------------+ -// ++---------+--+     |                    | -//  |Reached  |Reached |                    | -//  |Def      |Use     |                    | -//  |         |        |Reaching            |Reaching -//  |         V        |Def                 |Def -//  |      +-- UseNode --+ Sib  +-- UseNode --+ Sib       Sib -//  |      | ... = R2    |----->| ... = R2    |----> ... ----> 0 -//  |      +-------------+      +-------------+ -//  V -// +-- DefNode --+ Sib -// | R2 = ...    |----> ... -// ++---------+--+ -//  |         | -//  |         | -// ...       ... -// -// To get a full picture, the circular lists connecting blocks within a -// function, instructions within a block, etc. should be superimposed with -// the def-def, def-use links shown above. -// To illustrate this, consider a small example in a pseudo-assembly: -// foo: -//   add r2, r0, r1   ; r2 = r0+r1 -//   addi r0, r2, 1   ; r0 = r2+1 -//   ret r0           ; return value in r0 -// -// The graph (in a format used by the debugging functions) would look like: -// -//   DFG dump:[ -//   f1: Function foo -//   b2: === %bb.0 === preds(0), succs(0): -//   p3: phi [d4<r0>(,d12,u9):] -//   p5: phi [d6<r1>(,,u10):] -//   s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):] -//   s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):] -//   s14: ret [u15<r0>(d12):] -//   ] -// -// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the -// kind of the node (i.e. f - function, b - basic block, p - phi, s - state- -// ment, d - def, u - use). -// The format of a def node is: -//   dN<R>(rd,d,u):sib, -// where -//   N   - numeric node id, -//   R   - register being defined -//   rd  - reaching def, -//   d   - reached def, -//   u   - reached use, -//   sib - sibling. -// The format of a use node is: -//   uN<R>[!](rd):sib, -// where -//   N   - numeric node id, -//   R   - register being used, -//   rd  - reaching def, -//   sib - sibling. -// Possible annotations (usually preceding the node id): -//   +   - preserving def, -//   ~   - clobbering def, -//   "   - shadow ref (follows the node id), -//   !   - fixed register (appears after register name). -// -// The circular lists are not explicit in the dump. -// -// -// *** Node attributes -// -// NodeBase has a member "Attrs", which is the primary way of determining -// the node's characteristics. The fields in this member decide whether -// the node is a code node or a reference node (i.e. node's "type"), then -// within each type, the "kind" determines what specifically this node -// represents. The remaining bits, "flags", contain additional information -// that is even more detailed than the "kind". -// CodeNode's kinds are: -// - Phi:   Phi node, members are reference nodes. -// - Stmt:  Statement, members are reference nodes. -// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt). -// - Func:  The whole function. The members are basic block nodes. -// RefNode's kinds are: -// - Use. -// - Def. -// -// Meaning of flags: -// - Preserving: applies only to defs. A preserving def is one that can -//   preserve some of the original bits among those that are included in -//   the register associated with that def. For example, if R0 is a 32-bit -//   register, but a def can only change the lower 16 bits, then it will -//   be marked as preserving. -// - Shadow: a reference that has duplicates holding additional reaching -//   defs (see more below). -// - Clobbering: applied only to defs, indicates that the value generated -//   by this def is unspecified. A typical example would be volatile registers -//   after function calls. -// - Fixed: the register in this def/use cannot be replaced with any other -//   register. A typical case would be a parameter register to a call, or -//   the register with the return value from a function. -// - Undef: the register in this reference the register is assumed to have -//   no pre-existing value, even if it appears to be reached by some def. -//   This is typically used to prevent keeping registers artificially live -//   in cases when they are defined via predicated instructions. For example: -//     r0 = add-if-true cond, r10, r11                (1) -//     r0 = add-if-false cond, r12, r13, implicit r0  (2) -//     ... = r0                                       (3) -//   Before (1), r0 is not intended to be live, and the use of r0 in (3) is -//   not meant to be reached by any def preceding (1). However, since the -//   defs in (1) and (2) are both preserving, these properties alone would -//   imply that the use in (3) may indeed be reached by some prior def. -//   Adding Undef flag to the def in (1) prevents that. The Undef flag -//   may be applied to both defs and uses. -// - Dead: applies only to defs. The value coming out of a "dead" def is -//   assumed to be unused, even if the def appears to be reaching other defs -//   or uses. The motivation for this flag comes from dead defs on function -//   calls: there is no way to determine if such a def is dead without -//   analyzing the target's ABI. Hence the graph should contain this info, -//   as it is unavailable otherwise. On the other hand, a def without any -//   uses on a typical instruction is not the intended target for this flag. -// -// *** Shadow references -// -// It may happen that a super-register can have two (or more) non-overlapping -// sub-registers. When both of these sub-registers are defined and followed -// by a use of the super-register, the use of the super-register will not -// have a unique reaching def: both defs of the sub-registers need to be -// accounted for. In such cases, a duplicate use of the super-register is -// added and it points to the extra reaching def. Both uses are marked with -// a flag "shadow". Example: -// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap: -//   set r0, 1        ; r0 = 1 -//   set r1, 1        ; r1 = 1 -//   addi t1, t0, 1   ; t1 = t0+1 -// -// The DFG: -//   s1: set [d2<r0>(,,u9):] -//   s3: set [d4<r1>(,,u10):] -//   s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):] -// -// The statement s5 has two use nodes for t0: u7" and u9". The quotation -// mark " indicates that the node is a shadow. -// - -#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H -#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H - -#include "RDFRegisters.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/LaneBitmask.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/MathExtras.h" -#include <cassert> -#include <cstdint> -#include <cstring> -#include <map> -#include <set> -#include <unordered_map> -#include <utility> -#include <vector> - -// RDF uses uint32_t to refer to registers. This is to ensure that the type -// size remains specific. In other places, registers are often stored using -// unsigned. -static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal"); - -namespace llvm { - -class MachineBasicBlock; -class MachineDominanceFrontier; -class MachineDominatorTree; -class MachineFunction; -class MachineInstr; -class MachineOperand; -class raw_ostream; -class TargetInstrInfo; -class TargetRegisterInfo; - -namespace rdf { - -  using NodeId = uint32_t; - -  struct DataFlowGraph; - -  struct NodeAttrs { -    enum : uint16_t { -      None          = 0x0000,   // Nothing - -      // Types: 2 bits -      TypeMask      = 0x0003, -      Code          = 0x0001,   // 01, Container -      Ref           = 0x0002,   // 10, Reference - -      // Kind: 3 bits -      KindMask      = 0x0007 << 2, -      Def           = 0x0001 << 2,  // 001 -      Use           = 0x0002 << 2,  // 010 -      Phi           = 0x0003 << 2,  // 011 -      Stmt          = 0x0004 << 2,  // 100 -      Block         = 0x0005 << 2,  // 101 -      Func          = 0x0006 << 2,  // 110 - -      // Flags: 7 bits for now -      FlagMask      = 0x007F << 5, -      Shadow        = 0x0001 << 5,  // 0000001, Has extra reaching defs. -      Clobbering    = 0x0002 << 5,  // 0000010, Produces unspecified values. -      PhiRef        = 0x0004 << 5,  // 0000100, Member of PhiNode. -      Preserving    = 0x0008 << 5,  // 0001000, Def can keep original bits. -      Fixed         = 0x0010 << 5,  // 0010000, Fixed register. -      Undef         = 0x0020 << 5,  // 0100000, Has no pre-existing value. -      Dead          = 0x0040 << 5,  // 1000000, Does not define a value. -    }; - -    static uint16_t type(uint16_t T)  { return T & TypeMask; } -    static uint16_t kind(uint16_t T)  { return T & KindMask; } -    static uint16_t flags(uint16_t T) { return T & FlagMask; } - -    static uint16_t set_type(uint16_t A, uint16_t T) { -      return (A & ~TypeMask) | T; -    } - -    static uint16_t set_kind(uint16_t A, uint16_t K) { -      return (A & ~KindMask) | K; -    } - -    static uint16_t set_flags(uint16_t A, uint16_t F) { -      return (A & ~FlagMask) | F; -    } - -    // Test if A contains B. -    static bool contains(uint16_t A, uint16_t B) { -      if (type(A) != Code) -        return false; -      uint16_t KB = kind(B); -      switch (kind(A)) { -        case Func: -          return KB == Block; -        case Block: -          return KB == Phi || KB == Stmt; -        case Phi: -        case Stmt: -          return type(B) == Ref; -      } -      return false; -    } -  }; - -  struct BuildOptions { -    enum : unsigned { -      None          = 0x00, -      KeepDeadPhis  = 0x01,   // Do not remove dead phis during build. -    }; -  }; - -  template <typename T> struct NodeAddr { -    NodeAddr() = default; -    NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} - -    // Type cast (casting constructor). The reason for having this class -    // instead of std::pair. -    template <typename S> NodeAddr(const NodeAddr<S> &NA) -      : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} - -    bool operator== (const NodeAddr<T> &NA) const { -      assert((Addr == NA.Addr) == (Id == NA.Id)); -      return Addr == NA.Addr; -    } -    bool operator!= (const NodeAddr<T> &NA) const { -      return !operator==(NA); -    } - -    T Addr = nullptr; -    NodeId Id = 0; -  }; - -  struct NodeBase; - -  // Fast memory allocation and translation between node id and node address. -  // This is really the same idea as the one underlying the "bump pointer -  // allocator", the difference being in the translation. A node id is -  // composed of two components: the index of the block in which it was -  // allocated, and the index within the block. With the default settings, -  // where the number of nodes per block is 4096, the node id (minus 1) is: -  // -  // bit position:                11             0 -  // +----------------------------+--------------+ -  // | Index of the block         |Index in block| -  // +----------------------------+--------------+ -  // -  // The actual node id is the above plus 1, to avoid creating a node id of 0. -  // -  // This method significantly improved the build time, compared to using maps -  // (std::unordered_map or DenseMap) to translate between pointers and ids. -  struct NodeAllocator { -    // Amount of storage for a single node. -    enum { NodeMemSize = 32 }; - -    NodeAllocator(uint32_t NPB = 4096) -        : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), -          IndexMask((1 << BitsPerIndex)-1) { -      assert(isPowerOf2_32(NPB)); -    } - -    NodeBase *ptr(NodeId N) const { -      uint32_t N1 = N-1; -      uint32_t BlockN = N1 >> BitsPerIndex; -      uint32_t Offset = (N1 & IndexMask) * NodeMemSize; -      return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset); -    } - -    NodeId id(const NodeBase *P) const; -    NodeAddr<NodeBase*> New(); -    void clear(); - -  private: -    void startNewBlock(); -    bool needNewBlock(); - -    uint32_t makeId(uint32_t Block, uint32_t Index) const { -      // Add 1 to the id, to avoid the id of 0, which is treated as "null". -      return ((Block << BitsPerIndex) | Index) + 1; -    } - -    const uint32_t NodesPerBlock; -    const uint32_t BitsPerIndex; -    const uint32_t IndexMask; -    char *ActiveEnd = nullptr; -    std::vector<char*> Blocks; -    using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>; -    AllocatorTy MemPool; -  }; - -  using RegisterSet = std::set<RegisterRef>; - -  struct TargetOperandInfo { -    TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} -    virtual ~TargetOperandInfo() = default; - -    virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; -    virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; -    virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; - -    const TargetInstrInfo &TII; -  }; - -  // Packed register reference. Only used for storage. -  struct PackedRegisterRef { -    RegisterId Reg; -    uint32_t MaskId; -  }; - -  struct LaneMaskIndex : private IndexedSet<LaneBitmask> { -    LaneMaskIndex() = default; - -    LaneBitmask getLaneMaskForIndex(uint32_t K) const { -      return K == 0 ? LaneBitmask::getAll() : get(K); -    } - -    uint32_t getIndexForLaneMask(LaneBitmask LM) { -      assert(LM.any()); -      return LM.all() ? 0 : insert(LM); -    } - -    uint32_t getIndexForLaneMask(LaneBitmask LM) const { -      assert(LM.any()); -      return LM.all() ? 0 : find(LM); -    } -  }; - -  struct NodeBase { -  public: -    // Make sure this is a POD. -    NodeBase() = default; - -    uint16_t getType()  const { return NodeAttrs::type(Attrs); } -    uint16_t getKind()  const { return NodeAttrs::kind(Attrs); } -    uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } -    NodeId   getNext()  const { return Next; } - -    uint16_t getAttrs() const { return Attrs; } -    void setAttrs(uint16_t A) { Attrs = A; } -    void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); } - -    // Insert node NA after "this" in the circular chain. -    void append(NodeAddr<NodeBase*> NA); - -    // Initialize all members to 0. -    void init() { memset(this, 0, sizeof *this); } - -    void setNext(NodeId N) { Next = N; } - -  protected: -    uint16_t Attrs; -    uint16_t Reserved; -    NodeId Next;                // Id of the next node in the circular chain. -    // Definitions of nested types. Using anonymous nested structs would make -    // this class definition clearer, but unnamed structs are not a part of -    // the standard. -    struct Def_struct  { -      NodeId DD, DU;          // Ids of the first reached def and use. -    }; -    struct PhiU_struct  { -      NodeId PredB;           // Id of the predecessor block for a phi use. -    }; -    struct Code_struct { -      void *CP;               // Pointer to the actual code. -      NodeId FirstM, LastM;   // Id of the first member and last. -    }; -    struct Ref_struct { -      NodeId RD, Sib;         // Ids of the reaching def and the sibling. -      union { -        Def_struct Def; -        PhiU_struct PhiU; -      }; -      union { -        MachineOperand *Op;   // Non-phi refs point to a machine operand. -        PackedRegisterRef PR; // Phi refs store register info directly. -      }; -    }; - -    // The actual payload. -    union { -      Ref_struct Ref; -      Code_struct Code; -    }; -  }; -  // The allocator allocates chunks of 32 bytes for each node. The fact that -  // each node takes 32 bytes in memory is used for fast translation between -  // the node id and the node address. -  static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize, -        "NodeBase must be at most NodeAllocator::NodeMemSize bytes"); - -  using NodeList = SmallVector<NodeAddr<NodeBase *>, 4>; -  using NodeSet = std::set<NodeId>; - -  struct RefNode : public NodeBase { -    RefNode() = default; - -    RegisterRef getRegRef(const DataFlowGraph &G) const; - -    MachineOperand &getOp() { -      assert(!(getFlags() & NodeAttrs::PhiRef)); -      return *Ref.Op; -    } - -    void setRegRef(RegisterRef RR, DataFlowGraph &G); -    void setRegRef(MachineOperand *Op, DataFlowGraph &G); - -    NodeId getReachingDef() const { -      return Ref.RD; -    } -    void setReachingDef(NodeId RD) { -      Ref.RD = RD; -    } - -    NodeId getSibling() const { -      return Ref.Sib; -    } -    void setSibling(NodeId Sib) { -      Ref.Sib = Sib; -    } - -    bool isUse() const { -      assert(getType() == NodeAttrs::Ref); -      return getKind() == NodeAttrs::Use; -    } - -    bool isDef() const { -      assert(getType() == NodeAttrs::Ref); -      return getKind() == NodeAttrs::Def; -    } - -    template <typename Predicate> -    NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly, -        const DataFlowGraph &G); -    NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G); -  }; - -  struct DefNode : public RefNode { -    NodeId getReachedDef() const { -      return Ref.Def.DD; -    } -    void setReachedDef(NodeId D) { -      Ref.Def.DD = D; -    } -    NodeId getReachedUse() const { -      return Ref.Def.DU; -    } -    void setReachedUse(NodeId U) { -      Ref.Def.DU = U; -    } - -    void linkToDef(NodeId Self, NodeAddr<DefNode*> DA); -  }; - -  struct UseNode : public RefNode { -    void linkToDef(NodeId Self, NodeAddr<DefNode*> DA); -  }; - -  struct PhiUseNode : public UseNode { -    NodeId getPredecessor() const { -      assert(getFlags() & NodeAttrs::PhiRef); -      return Ref.PhiU.PredB; -    } -    void setPredecessor(NodeId B) { -      assert(getFlags() & NodeAttrs::PhiRef); -      Ref.PhiU.PredB = B; -    } -  }; - -  struct CodeNode : public NodeBase { -    template <typename T> T getCode() const { -      return static_cast<T>(Code.CP); -    } -    void setCode(void *C) { -      Code.CP = C; -    } - -    NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const; -    NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const; -    void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G); -    void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA, -        const DataFlowGraph &G); -    void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G); - -    NodeList members(const DataFlowGraph &G) const; -    template <typename Predicate> -    NodeList members_if(Predicate P, const DataFlowGraph &G) const; -  }; - -  struct InstrNode : public CodeNode { -    NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G); -  }; - -  struct PhiNode : public InstrNode { -    MachineInstr *getCode() const { -      return nullptr; -    } -  }; - -  struct StmtNode : public InstrNode { -    MachineInstr *getCode() const { -      return CodeNode::getCode<MachineInstr*>(); -    } -  }; - -  struct BlockNode : public CodeNode { -    MachineBasicBlock *getCode() const { -      return CodeNode::getCode<MachineBasicBlock*>(); -    } - -    void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G); -  }; - -  struct FuncNode : public CodeNode { -    MachineFunction *getCode() const { -      return CodeNode::getCode<MachineFunction*>(); -    } - -    NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB, -        const DataFlowGraph &G) const; -    NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G); -  }; - -  struct DataFlowGraph { -    DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, -        const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, -        const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi); - -    NodeBase *ptr(NodeId N) const; -    template <typename T> T ptr(NodeId N) const { -      return static_cast<T>(ptr(N)); -    } - -    NodeId id(const NodeBase *P) const; - -    template <typename T> NodeAddr<T> addr(NodeId N) const { -      return { ptr<T>(N), N }; -    } - -    NodeAddr<FuncNode*> getFunc() const { return Func; } -    MachineFunction &getMF() const { return MF; } -    const TargetInstrInfo &getTII() const { return TII; } -    const TargetRegisterInfo &getTRI() const { return TRI; } -    const PhysicalRegisterInfo &getPRI() const { return PRI; } -    const MachineDominatorTree &getDT() const { return MDT; } -    const MachineDominanceFrontier &getDF() const { return MDF; } -    const RegisterAggr &getLiveIns() const { return LiveIns; } - -    struct DefStack { -      DefStack() = default; - -      bool empty() const { return Stack.empty() || top() == bottom(); } - -    private: -      using value_type = NodeAddr<DefNode *>; -      struct Iterator { -        using value_type = DefStack::value_type; - -        Iterator &up() { Pos = DS.nextUp(Pos); return *this; } -        Iterator &down() { Pos = DS.nextDown(Pos); return *this; } - -        value_type operator*() const { -          assert(Pos >= 1); -          return DS.Stack[Pos-1]; -        } -        const value_type *operator->() const { -          assert(Pos >= 1); -          return &DS.Stack[Pos-1]; -        } -        bool operator==(const Iterator &It) const { return Pos == It.Pos; } -        bool operator!=(const Iterator &It) const { return Pos != It.Pos; } - -      private: -        friend struct DefStack; - -        Iterator(const DefStack &S, bool Top); - -        // Pos-1 is the index in the StorageType object that corresponds to -        // the top of the DefStack. -        const DefStack &DS; -        unsigned Pos; -      }; - -    public: -      using iterator = Iterator; - -      iterator top() const { return Iterator(*this, true); } -      iterator bottom() const { return Iterator(*this, false); } -      unsigned size() const; - -      void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); } -      void pop(); -      void start_block(NodeId N); -      void clear_block(NodeId N); - -    private: -      friend struct Iterator; - -      using StorageType = std::vector<value_type>; - -      bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { -        return (P.Addr == nullptr) && (N == 0 || P.Id == N); -      } - -      unsigned nextUp(unsigned P) const; -      unsigned nextDown(unsigned P) const; - -      StorageType Stack; -    }; - -    // Make this std::unordered_map for speed of accessing elements. -    // Map: Register (physical or virtual) -> DefStack -    using DefStackMap = std::unordered_map<RegisterId, DefStack>; - -    void build(unsigned Options = BuildOptions::None); -    void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM); -    void markBlock(NodeId B, DefStackMap &DefM); -    void releaseBlock(NodeId B, DefStackMap &DefM); - -    PackedRegisterRef pack(RegisterRef RR) { -      return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) }; -    } -    PackedRegisterRef pack(RegisterRef RR) const { -      return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) }; -    } -    RegisterRef unpack(PackedRegisterRef PR) const { -      return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId)); -    } - -    RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const; -    RegisterRef makeRegRef(const MachineOperand &Op) const; -    RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const; - -    NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA, -        NodeAddr<RefNode*> RA) const; -    NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, -        NodeAddr<RefNode*> RA, bool Create); -    NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, -        NodeAddr<RefNode*> RA) const; -    NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, -        NodeAddr<RefNode*> RA, bool Create); -    NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, -        NodeAddr<RefNode*> RA) const; - -    NodeList getRelatedRefs(NodeAddr<InstrNode*> IA, -        NodeAddr<RefNode*> RA) const; - -    NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const { -      return BlockNodes.at(BB); -    } - -    void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) { -      unlinkUseDF(UA); -      if (RemoveFromOwner) -        removeFromOwner(UA); -    } - -    void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) { -      unlinkDefDF(DA); -      if (RemoveFromOwner) -        removeFromOwner(DA); -    } - -    // Some useful filters. -    template <uint16_t Kind> -    static bool IsRef(const NodeAddr<NodeBase*> BA) { -      return BA.Addr->getType() == NodeAttrs::Ref && -             BA.Addr->getKind() == Kind; -    } - -    template <uint16_t Kind> -    static bool IsCode(const NodeAddr<NodeBase*> BA) { -      return BA.Addr->getType() == NodeAttrs::Code && -             BA.Addr->getKind() == Kind; -    } - -    static bool IsDef(const NodeAddr<NodeBase*> BA) { -      return BA.Addr->getType() == NodeAttrs::Ref && -             BA.Addr->getKind() == NodeAttrs::Def; -    } - -    static bool IsUse(const NodeAddr<NodeBase*> BA) { -      return BA.Addr->getType() == NodeAttrs::Ref && -             BA.Addr->getKind() == NodeAttrs::Use; -    } - -    static bool IsPhi(const NodeAddr<NodeBase*> BA) { -      return BA.Addr->getType() == NodeAttrs::Code && -             BA.Addr->getKind() == NodeAttrs::Phi; -    } - -    static bool IsPreservingDef(const NodeAddr<DefNode*> DA) { -      uint16_t Flags = DA.Addr->getFlags(); -      return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef); -    } - -  private: -    void reset(); - -    RegisterSet getLandingPadLiveIns() const; - -    NodeAddr<NodeBase*> newNode(uint16_t Attrs); -    NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B); -    NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner, -        MachineOperand &Op, uint16_t Flags = NodeAttrs::None); -    NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner, -        RegisterRef RR, NodeAddr<BlockNode*> PredB, -        uint16_t Flags = NodeAttrs::PhiRef); -    NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner, -        MachineOperand &Op, uint16_t Flags = NodeAttrs::None); -    NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner, -        RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef); -    NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner); -    NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner, -        MachineInstr *MI); -    NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner, -        MachineBasicBlock *BB); -    NodeAddr<FuncNode*> newFunc(MachineFunction *MF); - -    template <typename Predicate> -    std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>> -    locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, -        Predicate P) const; - -    using BlockRefsMap = std::map<NodeId, RegisterSet>; - -    void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In); -    void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr<BlockNode*> BA); -    void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, -        NodeAddr<BlockNode*> BA); -    void removeUnusedPhis(); - -    void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM); -    void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM); -    template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA, -        NodeAddr<T> TA, DefStack &DS); -    template <typename Predicate> void linkStmtRefs(DefStackMap &DefM, -        NodeAddr<StmtNode*> SA, Predicate P); -    void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA); - -    void unlinkUseDF(NodeAddr<UseNode*> UA); -    void unlinkDefDF(NodeAddr<DefNode*> DA); - -    void removeFromOwner(NodeAddr<RefNode*> RA) { -      NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this); -      IA.Addr->removeMember(RA, *this); -    } - -    MachineFunction &MF; -    const TargetInstrInfo &TII; -    const TargetRegisterInfo &TRI; -    const PhysicalRegisterInfo PRI; -    const MachineDominatorTree &MDT; -    const MachineDominanceFrontier &MDF; -    const TargetOperandInfo &TOI; - -    RegisterAggr LiveIns; -    NodeAddr<FuncNode*> Func; -    NodeAllocator Memory; -    // Local map:  MachineBasicBlock -> NodeAddr<BlockNode*> -    std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes; -    // Lane mask map. -    LaneMaskIndex LMI; -  };  // struct DataFlowGraph - -  template <typename Predicate> -  NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P, -        bool NextOnly, const DataFlowGraph &G) { -    // Get the "Next" reference in the circular list that references RR and -    // satisfies predicate "Pred". -    auto NA = G.addr<NodeBase*>(getNext()); - -    while (NA.Addr != this) { -      if (NA.Addr->getType() == NodeAttrs::Ref) { -        NodeAddr<RefNode*> RA = NA; -        if (RA.Addr->getRegRef(G) == RR && P(NA)) -          return NA; -        if (NextOnly) -          break; -        NA = G.addr<NodeBase*>(NA.Addr->getNext()); -      } else { -        // We've hit the beginning of the chain. -        assert(NA.Addr->getType() == NodeAttrs::Code); -        NodeAddr<CodeNode*> CA = NA; -        NA = CA.Addr->getFirstMember(G); -      } -    } -    // Return the equivalent of "nullptr" if such a node was not found. -    return NodeAddr<RefNode*>(); -  } - -  template <typename Predicate> -  NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const { -    NodeList MM; -    auto M = getFirstMember(G); -    if (M.Id == 0) -      return MM; - -    while (M.Addr != this) { -      if (P(M)) -        MM.push_back(M); -      M = G.addr<NodeBase*>(M.Addr->getNext()); -    } -    return MM; -  } - -  template <typename T> -  struct Print { -    Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {} - -    const T &Obj; -    const DataFlowGraph &G; -  }; - -  template <typename T> -  struct PrintNode : Print<NodeAddr<T>> { -    PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g) -      : Print<NodeAddr<T>>(x, g) {} -  }; - -  raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, -                          const Print<NodeAddr<PhiUseNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, -                          const Print<NodeAddr<StmtNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, -                          const Print<NodeAddr<InstrNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, -                          const Print<NodeAddr<BlockNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, -                          const Print<NodeAddr<FuncNode *>> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P); -  raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P); -  raw_ostream &operator<<(raw_ostream &OS, -                          const Print<DataFlowGraph::DefStack> &P); - -} // end namespace rdf - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H diff --git a/llvm/lib/Target/Hexagon/RDFLiveness.h b/llvm/lib/Target/Hexagon/RDFLiveness.h deleted file mode 100644 index ea4890271726..000000000000 --- a/llvm/lib/Target/Hexagon/RDFLiveness.h +++ /dev/null @@ -1,151 +0,0 @@ -//===- RDFLiveness.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Recalculate the liveness information given a data flow graph. -// This includes block live-ins and kill flags. - -#ifndef LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H -#define LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H - -#include "RDFGraph.h" -#include "RDFRegisters.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/MC/LaneBitmask.h" -#include <map> -#include <set> -#include <utility> - -namespace llvm { - -class MachineBasicBlock; -class MachineDominanceFrontier; -class MachineDominatorTree; -class MachineRegisterInfo; -class TargetRegisterInfo; - -namespace rdf { - -  struct Liveness { -  public: -    // This is really a std::map, except that it provides a non-trivial -    // default constructor to the element accessed via []. -    struct LiveMapType { -      LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {} - -      RegisterAggr &operator[] (MachineBasicBlock *B) { -        return Map.emplace(B, Empty).first->second; -      } - -    private: -      RegisterAggr Empty; -      std::map<MachineBasicBlock*,RegisterAggr> Map; -    }; - -    using NodeRef = std::pair<NodeId, LaneBitmask>; -    using NodeRefSet = std::set<NodeRef>; -    // RegisterId in RefMap must be normalized. -    using RefMap = std::map<RegisterId, NodeRefSet>; - -    Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) -        : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()), -          MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {} - -    NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA, -        bool TopShadows, bool FullChain, const RegisterAggr &DefRRs); - -    NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) { -      return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, -                                false, NoRegs); -    } - -    NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) { -      return getAllReachingDefs(RefRR, RefA, false, false, NoRegs); -    } - -    NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA, -        const RegisterAggr &DefRRs); - -    NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) { -      return getAllReachedUses(RefRR, DefA, NoRegs); -    } - -    std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR, -        NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs); - -    NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR, -        NodeAddr<InstrNode*> IA); - -    LiveMapType &getLiveMap() { return LiveMap; } -    const LiveMapType &getLiveMap() const { return LiveMap; } - -    const RefMap &getRealUses(NodeId P) const { -      auto F = RealUseMap.find(P); -      return F == RealUseMap.end() ? Empty : F->second; -    } - -    void computePhiInfo(); -    void computeLiveIns(); -    void resetLiveIns(); -    void resetKills(); -    void resetKills(MachineBasicBlock *B); - -    void trace(bool T) { Trace = T; } - -  private: -    const DataFlowGraph &DFG; -    const TargetRegisterInfo &TRI; -    const PhysicalRegisterInfo &PRI; -    const MachineDominatorTree &MDT; -    const MachineDominanceFrontier &MDF; -    LiveMapType LiveMap; -    const RefMap Empty; -    const RegisterAggr NoRegs; -    bool Trace = false; - -    // Cache of mapping from node ids (for RefNodes) to the containing -    // basic blocks. Not computing it each time for each node reduces -    // the liveness calculation time by a large fraction. -    using NodeBlockMap = DenseMap<NodeId, MachineBasicBlock *>; -    NodeBlockMap NBMap; - -    // Phi information: -    // -    // RealUseMap -    // map: NodeId -> (map: RegisterId -> NodeRefSet) -    //      phi id -> (map: register -> set of reached non-phi uses) -    std::map<NodeId, RefMap> RealUseMap; - -    // Inverse iterated dominance frontier. -    std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF; - -    // Live on entry. -    std::map<MachineBasicBlock*,RefMap> PhiLON; - -    // Phi uses are considered to be located at the end of the block that -    // they are associated with. The reaching def of a phi use dominates the -    // block that the use corresponds to, but not the block that contains -    // the phi itself. To include these uses in the liveness propagation (up -    // the dominator tree), create a map: block -> set of uses live on exit. -    std::map<MachineBasicBlock*,RefMap> PhiLOX; - -    MachineBasicBlock *getBlockWithRef(NodeId RN) const; -    void traverse(MachineBasicBlock *B, RefMap &LiveIn); -    void emptify(RefMap &M); - -    std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR, -        NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs, -        unsigned Nest, unsigned MaxNest); -  }; - -  raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P); - -} // end namespace rdf - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_HEXAGON_RDFLIVENESS_H diff --git a/llvm/lib/Target/Hexagon/RDFRegisters.h b/llvm/lib/Target/Hexagon/RDFRegisters.h deleted file mode 100644 index 4afaf80e4659..000000000000 --- a/llvm/lib/Target/Hexagon/RDFRegisters.h +++ /dev/null @@ -1,240 +0,0 @@ -//===- RDFRegisters.h -------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H -#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H - -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/LaneBitmask.h" -#include <cassert> -#include <cstdint> -#include <map> -#include <set> -#include <vector> - -namespace llvm { - -class MachineFunction; -class raw_ostream; - -namespace rdf { - -  using RegisterId = uint32_t; - -  // Template class for a map translating uint32_t into arbitrary types. -  // The map will act like an indexed set: upon insertion of a new object, -  // it will automatically assign a new index to it. Index of 0 is treated -  // as invalid and is never allocated. -  template <typename T, unsigned N = 32> -  struct IndexedSet { -    IndexedSet() { Map.reserve(N); } - -    T get(uint32_t Idx) const { -      // Index Idx corresponds to Map[Idx-1]. -      assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size()); -      return Map[Idx-1]; -    } - -    uint32_t insert(T Val) { -      // Linear search. -      auto F = llvm::find(Map, Val); -      if (F != Map.end()) -        return F - Map.begin() + 1; -      Map.push_back(Val); -      return Map.size();  // Return actual_index + 1. -    } - -    uint32_t find(T Val) const { -      auto F = llvm::find(Map, Val); -      assert(F != Map.end()); -      return F - Map.begin() + 1; -    } - -    uint32_t size() const { return Map.size(); } - -    using const_iterator = typename std::vector<T>::const_iterator; - -    const_iterator begin() const { return Map.begin(); } -    const_iterator end() const { return Map.end(); } - -  private: -    std::vector<T> Map; -  }; - -  struct RegisterRef { -    RegisterId Reg = 0; -    LaneBitmask Mask = LaneBitmask::getNone(); - -    RegisterRef() = default; -    explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll()) -      : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {} - -    operator bool() const { -      return Reg != 0 && Mask.any(); -    } - -    bool operator== (const RegisterRef &RR) const { -      return Reg == RR.Reg && Mask == RR.Mask; -    } - -    bool operator!= (const RegisterRef &RR) const { -      return !operator==(RR); -    } - -    bool operator< (const RegisterRef &RR) const { -      return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask); -    } -  }; - - -  struct PhysicalRegisterInfo { -    PhysicalRegisterInfo(const TargetRegisterInfo &tri, -                         const MachineFunction &mf); - -    static bool isRegMaskId(RegisterId R) { -      return Register::isStackSlot(R); -    } - -    RegisterId getRegMaskId(const uint32_t *RM) const { -      return Register::index2StackSlot(RegMasks.find(RM)); -    } - -    const uint32_t *getRegMaskBits(RegisterId R) const { -      return RegMasks.get(Register::stackSlot2Index(R)); -    } - -    RegisterRef normalize(RegisterRef RR) const; - -    bool alias(RegisterRef RA, RegisterRef RB) const { -      if (!isRegMaskId(RA.Reg)) -        return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB); -      return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB); -    } - -    std::set<RegisterId> getAliasSet(RegisterId Reg) const; - -    RegisterRef getRefForUnit(uint32_t U) const { -      return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask); -    } - -    const BitVector &getMaskUnits(RegisterId MaskId) const { -      return MaskInfos[Register::stackSlot2Index(MaskId)].Units; -    } - -    RegisterRef mapTo(RegisterRef RR, unsigned R) const; -    const TargetRegisterInfo &getTRI() const { return TRI; } - -  private: -    struct RegInfo { -      const TargetRegisterClass *RegClass = nullptr; -    }; -    struct UnitInfo { -      RegisterId Reg = 0; -      LaneBitmask Mask; -    }; -    struct MaskInfo { -      BitVector Units; -    }; - -    const TargetRegisterInfo &TRI; -    IndexedSet<const uint32_t*> RegMasks; -    std::vector<RegInfo> RegInfos; -    std::vector<UnitInfo> UnitInfos; -    std::vector<MaskInfo> MaskInfos; - -    bool aliasRR(RegisterRef RA, RegisterRef RB) const; -    bool aliasRM(RegisterRef RR, RegisterRef RM) const; -    bool aliasMM(RegisterRef RM, RegisterRef RN) const; -  }; - -  struct RegisterAggr { -    RegisterAggr(const PhysicalRegisterInfo &pri) -        : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {} -    RegisterAggr(const RegisterAggr &RG) = default; - -    bool empty() const { return Units.none(); } -    bool hasAliasOf(RegisterRef RR) const; -    bool hasCoverOf(RegisterRef RR) const; - -    static bool isCoverOf(RegisterRef RA, RegisterRef RB, -                          const PhysicalRegisterInfo &PRI) { -      return RegisterAggr(PRI).insert(RA).hasCoverOf(RB); -    } - -    RegisterAggr &insert(RegisterRef RR); -    RegisterAggr &insert(const RegisterAggr &RG); -    RegisterAggr &intersect(RegisterRef RR); -    RegisterAggr &intersect(const RegisterAggr &RG); -    RegisterAggr &clear(RegisterRef RR); -    RegisterAggr &clear(const RegisterAggr &RG); - -    RegisterRef intersectWith(RegisterRef RR) const; -    RegisterRef clearIn(RegisterRef RR) const; -    RegisterRef makeRegRef() const; - -    void print(raw_ostream &OS) const; - -    struct rr_iterator { -      using MapType = std::map<RegisterId, LaneBitmask>; - -    private: -      MapType Masks; -      MapType::iterator Pos; -      unsigned Index; -      const RegisterAggr *Owner; - -    public: -      rr_iterator(const RegisterAggr &RG, bool End); - -      RegisterRef operator*() const { -        return RegisterRef(Pos->first, Pos->second); -      } - -      rr_iterator &operator++() { -        ++Pos; -        ++Index; -        return *this; -      } - -      bool operator==(const rr_iterator &I) const { -        assert(Owner == I.Owner); -        (void)Owner; -        return Index == I.Index; -      } - -      bool operator!=(const rr_iterator &I) const { -        return !(*this == I); -      } -    }; - -    rr_iterator rr_begin() const { -      return rr_iterator(*this, false); -    } -    rr_iterator rr_end() const { -      return rr_iterator(*this, true); -    } - -  private: -    BitVector Units; -    const PhysicalRegisterInfo &PRI; -  }; - -  // Optionally print the lane mask, if it is not ~0. -  struct PrintLaneMaskOpt { -    PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {} -    LaneBitmask Mask; -  }; -  raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P); - -} // end namespace rdf - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 9b3d13989ee2..d7e3519d5539 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],      VMSUMSHS,      VMSUMUBM,      VMSUMUHM, +    VMSUMUDM,      VMSUMUHS,      VMULESB,      VMULESH, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 00f59bba52e8..ca1649fae258 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);    } +  if (Subtarget.isISA3_0()) { +    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); +    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); +    setTruncStoreAction(MVT::f64, MVT::f16, Legal); +    setTruncStoreAction(MVT::f32, MVT::f16, Legal); +  } else { +    // No extending loads from f16 or HW conversions back and forth. +    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); +    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); +    setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); +    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); +    setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); +    setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); +    setTruncStoreAction(MVT::f64, MVT::f16, Expand); +    setTruncStoreAction(MVT::f32, MVT::f16, Expand); +  } +    setTruncStoreAction(MVT::f64, MVT::f32, Expand);    // PowerPC has pre-inc load and store's. @@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,          setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);        }      } +    setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);      if (!Subtarget.hasP8Vector()) {        setOperationAction(ISD::SMAX, MVT::v2i64, Expand);        setOperationAction(ISD::SMIN, MVT::v2i64, Expand); @@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {    assert(Op.getOpcode() == ISD::FP_EXTEND &&           "Should only be called for ISD::FP_EXTEND"); +  // FIXME: handle extends from half precision float vectors on P9.    // We only want to custom lower an extend from v2f32 to v2f64.    if (Op.getValueType() != MVT::v2f64 ||        Op.getOperand(0).getValueType() != MVT::v2f32) @@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,    case ISD::BITCAST:      // Don't handle bitcast here.      return; +  case ISD::FP_EXTEND: +    SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); +    if (Lowered) +      Results.push_back(Lowered); +    return;    }  } @@ -15255,7 +15279,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,    if (!VT.isSimple())      return false; -  if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess()) +  if (VT.isFloatingPoint() && !VT.isVector() && +      !Subtarget.allowsUnalignedFPAccess())      return false;    if (VT.getSimpleVT().isVector()) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index e0c381827b87..2e1485373d19 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -637,7 +637,7 @@ namespace llvm {      /// then the VPERM for the shuffle. All in all a very slow sequence.      TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)        const override { -      if (VT.getScalarSizeInBits() % 8 == 0) +      if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)          return TypeWidenVector;        return TargetLoweringBase::getPreferredVectorAction(VT);      } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index f94816a35f79..6e8635f2413c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;  def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;  let Predicates = [HasP9Altivec] in { +// Vector Multiply-Sum +def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm, +                            v1i128, v2i64, v1i128>; +  // i8 element comparisons.  def VCMPNEB   : VCMP   <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;  def VCMPNEB_rec  : VCMPo  <  7, "vcmpneb. $vD, $vA, $vB" , v16i8>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 30906a32b00c..d7925befcd37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,    if (Opc != PPC::ADDI && Opc != PPC::ADDI8)      return false; +  // The operand may not necessarily be an immediate - it could be a relocation. +  if (!ADDIMI.getOperand(2).isImm()) +    return false; +    Imm = ADDIMI.getOperand(2).getImm();    return true; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index be6b30ffa08b..95e5ff6b130d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {    def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),              (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>; +  // Load/convert and convert/store patterns for f16. +  def : Pat<(f64 (extloadf16 xoaddr:$src)), +            (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>; +  def : Pat<(truncstoref16 f64:$src, xoaddr:$dst), +            (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>; +  def : Pat<(f32 (extloadf16 xoaddr:$src)), +            (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>; +  def : Pat<(truncstoref16 f32:$src, xoaddr:$dst), +            (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>; +  def : Pat<(f64 (f16_to_fp i32:$A)), +            (f64 (XSCVHPDP (MTVSRWZ $A)))>; +  def : Pat<(f32 (f16_to_fp i32:$A)), +            (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>; +  def : Pat<(i32 (fp_to_f16 f32:$A)), +            (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>; +  def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>; +    let Predicates = [IsBigEndian, HasP9Vector] in {    // Scalar stores of i8    def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst), diff --git a/llvm/lib/Target/X86/ImmutableGraph.h b/llvm/lib/Target/X86/ImmutableGraph.h new file mode 100644 index 000000000000..5833017037a5 --- /dev/null +++ b/llvm/lib/Target/X86/ImmutableGraph.h @@ -0,0 +1,446 @@ +//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: ImmutableGraph is a fast DAG implementation that cannot be +/// modified, except by creating a new ImmutableGraph. ImmutableGraph is +/// implemented as two arrays: one containing nodes, and one containing edges. +/// The advantages to this implementation are two-fold: +/// 1. Iteration and traversal operations benefit from cache locality. +/// 2. Operations on sets of nodes/edges are efficient, and representations of +///    those sets in memory are compact. For instance, a set of edges is +///    implemented as a bit vector, wherein each bit corresponds to one edge in +///    the edge array. This implies a lower bound of 64x spatial improvement +///    over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that +///    insert/erase/contains operations complete in negligible constant time: +///    insert and erase require one load and one store, and contains requires +///    just one load. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H +#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <iterator> +#include <utility> +#include <vector> + +namespace llvm { + +template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph { +  using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>; +  template <typename> friend class ImmutableGraphBuilder; + +public: +  using node_value_type = NodeValueT; +  using edge_value_type = EdgeValueT; +  using size_type = int; +  class Node; +  class Edge { +    friend class ImmutableGraph; +    template <typename> friend class ImmutableGraphBuilder; + +    const Node *Dest; +    edge_value_type Value; + +  public: +    const Node *getDest() const { return Dest; }; +    const edge_value_type &getValue() const { return Value; } +  }; +  class Node { +    friend class ImmutableGraph; +    template <typename> friend class ImmutableGraphBuilder; + +    const Edge *Edges; +    node_value_type Value; + +  public: +    const node_value_type &getValue() const { return Value; } + +    const Edge *edges_begin() const { return Edges; } +    // Nodes are allocated sequentially. Edges for a node are stored together. +    // The end of this Node's edges is the beginning of the next node's edges. +    // An extra node was allocated to hold the end pointer for the last real +    // node. +    const Edge *edges_end() const { return (this + 1)->Edges; } +    ArrayRef<Edge> edges() const { +      return makeArrayRef(edges_begin(), edges_end()); +    } +  }; + +protected: +  ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges, +                 size_type NodesSize, size_type EdgesSize) +      : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize), +        EdgesSize(EdgesSize) {} +  ImmutableGraph(const ImmutableGraph &) = delete; +  ImmutableGraph(ImmutableGraph &&) = delete; +  ImmutableGraph &operator=(const ImmutableGraph &) = delete; +  ImmutableGraph &operator=(ImmutableGraph &&) = delete; + +public: +  ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); } +  const Node *nodes_begin() const { return nodes().begin(); } +  const Node *nodes_end() const { return nodes().end(); } + +  ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); } +  const Edge *edges_begin() const { return edges().begin(); } +  const Edge *edges_end() const { return edges().end(); } + +  size_type nodes_size() const { return NodesSize; } +  size_type edges_size() const { return EdgesSize; } + +  // Node N must belong to this ImmutableGraph. +  size_type getNodeIndex(const Node &N) const { +    return std::distance(nodes_begin(), &N); +  } +  // Edge E must belong to this ImmutableGraph. +  size_type getEdgeIndex(const Edge &E) const { +    return std::distance(edges_begin(), &E); +  } + +  // FIXME: Could NodeSet and EdgeSet be templated to share code? +  class NodeSet { +    const ImmutableGraph &G; +    BitVector V; + +  public: +    NodeSet(const ImmutableGraph &G, bool ContainsAll = false) +        : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {} +    bool insert(const Node &N) { +      size_type Idx = G.getNodeIndex(N); +      bool AlreadyExists = V.test(Idx); +      V.set(Idx); +      return !AlreadyExists; +    } +    void erase(const Node &N) { +      size_type Idx = G.getNodeIndex(N); +      V.reset(Idx); +    } +    bool contains(const Node &N) const { +      size_type Idx = G.getNodeIndex(N); +      return V.test(Idx); +    } +    void clear() { V.reset(); } +    size_type empty() const { return V.none(); } +    /// Return the number of elements in the set +    size_type count() const { return V.count(); } +    /// Return the size of the set's domain +    size_type size() const { return V.size(); } +    /// Set union +    NodeSet &operator|=(const NodeSet &RHS) { +      assert(&this->G == &RHS.G); +      V |= RHS.V; +      return *this; +    } +    /// Set intersection +    NodeSet &operator&=(const NodeSet &RHS) { +      assert(&this->G == &RHS.G); +      V &= RHS.V; +      return *this; +    } +    /// Set disjoint union +    NodeSet &operator^=(const NodeSet &RHS) { +      assert(&this->G == &RHS.G); +      V ^= RHS.V; +      return *this; +    } + +    using index_iterator = typename BitVector::const_set_bits_iterator; +    index_iterator index_begin() const { return V.set_bits_begin(); } +    index_iterator index_end() const { return V.set_bits_end(); } +    void set(size_type Idx) { V.set(Idx); } +    void reset(size_type Idx) { V.reset(Idx); } + +    class iterator { +      const NodeSet &Set; +      size_type Current; + +      void advance() { +        assert(Current != -1); +        Current = Set.V.find_next(Current); +      } + +    public: +      iterator(const NodeSet &Set, size_type Begin) +          : Set{Set}, Current{Begin} {} +      iterator operator++(int) { +        iterator Tmp = *this; +        advance(); +        return Tmp; +      } +      iterator &operator++() { +        advance(); +        return *this; +      } +      Node *operator*() const { +        assert(Current != -1); +        return Set.G.nodes_begin() + Current; +      } +      bool operator==(const iterator &other) const { +        assert(&this->Set == &other.Set); +        return this->Current == other.Current; +      } +      bool operator!=(const iterator &other) const { return !(*this == other); } +    }; + +    iterator begin() const { return iterator{*this, V.find_first()}; } +    iterator end() const { return iterator{*this, -1}; } +  }; + +  class EdgeSet { +    const ImmutableGraph &G; +    BitVector V; + +  public: +    EdgeSet(const ImmutableGraph &G, bool ContainsAll = false) +        : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {} +    bool insert(const Edge &E) { +      size_type Idx = G.getEdgeIndex(E); +      bool AlreadyExists = V.test(Idx); +      V.set(Idx); +      return !AlreadyExists; +    } +    void erase(const Edge &E) { +      size_type Idx = G.getEdgeIndex(E); +      V.reset(Idx); +    } +    bool contains(const Edge &E) const { +      size_type Idx = G.getEdgeIndex(E); +      return V.test(Idx); +    } +    void clear() { V.reset(); } +    bool empty() const { return V.none(); } +    /// Return the number of elements in the set +    size_type count() const { return V.count(); } +    /// Return the size of the set's domain +    size_type size() const { return V.size(); } +    /// Set union +    EdgeSet &operator|=(const EdgeSet &RHS) { +      assert(&this->G == &RHS.G); +      V |= RHS.V; +      return *this; +    } +    /// Set intersection +    EdgeSet &operator&=(const EdgeSet &RHS) { +      assert(&this->G == &RHS.G); +      V &= RHS.V; +      return *this; +    } +    /// Set disjoint union +    EdgeSet &operator^=(const EdgeSet &RHS) { +      assert(&this->G == &RHS.G); +      V ^= RHS.V; +      return *this; +    } + +    using index_iterator = typename BitVector::const_set_bits_iterator; +    index_iterator index_begin() const { return V.set_bits_begin(); } +    index_iterator index_end() const { return V.set_bits_end(); } +    void set(size_type Idx) { V.set(Idx); } +    void reset(size_type Idx) { V.reset(Idx); } + +    class iterator { +      const EdgeSet &Set; +      size_type Current; + +      void advance() { +        assert(Current != -1); +        Current = Set.V.find_next(Current); +      } + +    public: +      iterator(const EdgeSet &Set, size_type Begin) +          : Set{Set}, Current{Begin} {} +      iterator operator++(int) { +        iterator Tmp = *this; +        advance(); +        return Tmp; +      } +      iterator &operator++() { +        advance(); +        return *this; +      } +      Edge *operator*() const { +        assert(Current != -1); +        return Set.G.edges_begin() + Current; +      } +      bool operator==(const iterator &other) const { +        assert(&this->Set == &other.Set); +        return this->Current == other.Current; +      } +      bool operator!=(const iterator &other) const { return !(*this == other); } +    }; + +    iterator begin() const { return iterator{*this, V.find_first()}; } +    iterator end() const { return iterator{*this, -1}; } +  }; + +private: +  std::unique_ptr<Node[]> Nodes; +  std::unique_ptr<Edge[]> Edges; +  size_type NodesSize; +  size_type EdgesSize; +}; + +template <typename GraphT> class ImmutableGraphBuilder { +  using node_value_type = typename GraphT::node_value_type; +  using edge_value_type = typename GraphT::edge_value_type; +  static_assert( +      std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>, +                      GraphT>::value, +      "Template argument to ImmutableGraphBuilder must derive from " +      "ImmutableGraph<>"); +  using size_type = typename GraphT::size_type; +  using NodeSet = typename GraphT::NodeSet; +  using Node = typename GraphT::Node; +  using EdgeSet = typename GraphT::EdgeSet; +  using Edge = typename GraphT::Edge; +  using BuilderEdge = std::pair<edge_value_type, size_type>; +  using EdgeList = std::vector<BuilderEdge>; +  using BuilderVertex = std::pair<node_value_type, EdgeList>; +  using VertexVec = std::vector<BuilderVertex>; + +public: +  using BuilderNodeRef = size_type; + +  BuilderNodeRef addVertex(const node_value_type &V) { +    auto I = AdjList.emplace(AdjList.end(), V, EdgeList{}); +    return std::distance(AdjList.begin(), I); +  } + +  void addEdge(const edge_value_type &E, BuilderNodeRef From, +               BuilderNodeRef To) { +    AdjList[From].second.emplace_back(E, To); +  } + +  bool empty() const { return AdjList.empty(); } + +  template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) { +    size_type VertexSize = AdjList.size(), EdgeSize = 0; +    for (const auto &V : AdjList) { +      EdgeSize += V.second.size(); +    } +    auto VertexArray = +        std::make_unique<Node[]>(VertexSize + 1 /* terminator node */); +    auto EdgeArray = std::make_unique<Edge[]>(EdgeSize); +    size_type VI = 0, EI = 0; +    for (; VI < VertexSize; ++VI) { +      VertexArray[VI].Value = std::move(AdjList[VI].first); +      VertexArray[VI].Edges = &EdgeArray[EI]; +      auto NumEdges = static_cast<size_type>(AdjList[VI].second.size()); +      for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) { +        auto &E = AdjList[VI].second[VEI]; +        EdgeArray[EI].Value = std::move(E.first); +        EdgeArray[EI].Dest = &VertexArray[E.second]; +      } +    } +    assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed"); +    VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node +    return std::make_unique<GraphT>(std::move(VertexArray), +                                    std::move(EdgeArray), VertexSize, EdgeSize, +                                    std::forward<ArgT>(Args)...); +  } + +  template <typename... ArgT> +  static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes, +                                      const EdgeSet &TrimEdges, +                                      ArgT &&... Args) { +    size_type NewVertexSize = G.nodes_size() - TrimNodes.count(); +    size_type NewEdgeSize = G.edges_size() - TrimEdges.count(); +    auto NewVertexArray = +        std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */); +    auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize); + +    // Walk the nodes and determine the new index for each node. +    size_type NewNodeIndex = 0; +    std::vector<size_type> RemappedNodeIndex(G.nodes_size()); +    for (const Node &N : G.nodes()) { +      if (TrimNodes.contains(N)) +        continue; +      RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++; +    } +    assert(NewNodeIndex == NewVertexSize && +           "Should have assigned NewVertexSize indices"); + +    size_type VertexI = 0, EdgeI = 0; +    for (const Node &N : G.nodes()) { +      if (TrimNodes.contains(N)) +        continue; +      NewVertexArray[VertexI].Value = N.getValue(); +      NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI]; +      for (const Edge &E : N.edges()) { +        if (TrimEdges.contains(E)) +          continue; +        NewEdgeArray[EdgeI].Value = E.getValue(); +        size_type DestIdx = G.getNodeIndex(*E.getDest()); +        size_type NewIdx = RemappedNodeIndex[DestIdx]; +        assert(NewIdx < NewVertexSize); +        NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx]; +        ++EdgeI; +      } +      ++VertexI; +    } +    assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize && +           "Gadget graph malformed"); +    NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator +    return std::make_unique<GraphT>(std::move(NewVertexArray), +                                    std::move(NewEdgeArray), NewVertexSize, +                                    NewEdgeSize, std::forward<ArgT>(Args)...); +  } + +private: +  VertexVec AdjList; +}; + +template <typename NodeValueT, typename EdgeValueT> +struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> { +  using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>; +  using NodeRef = typename GraphT::Node const *; +  using EdgeRef = typename GraphT::Edge const &; + +  static NodeRef edge_dest(EdgeRef E) { return E.getDest(); } +  using ChildIteratorType = +      mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>; + +  static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); } +  static ChildIteratorType child_begin(NodeRef N) { +    return {N->edges_begin(), &edge_dest}; +  } +  static ChildIteratorType child_end(NodeRef N) { +    return {N->edges_end(), &edge_dest}; +  } + +  static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; } +  using nodes_iterator = +      mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>; +  static nodes_iterator nodes_begin(GraphT *G) { +    return {G->nodes_begin(), &getNode}; +  } +  static nodes_iterator nodes_end(GraphT *G) { +    return {G->nodes_end(), &getNode}; +  } + +  using ChildEdgeIteratorType = typename GraphT::Edge const *; + +  static ChildEdgeIteratorType child_edge_begin(NodeRef N) { +    return N->edges_begin(); +  } +  static ChildEdgeIteratorType child_edge_end(NodeRef N) { +    return N->edges_end(); +  } +  static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 0481a40d462a..a0ab5c3a5b3c 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();  FunctionPass *createX86EvexToVexInsts();  /// This pass creates the thunks for the retpoline feature. -FunctionPass *createX86RetpolineThunksPass(); +FunctionPass *createX86IndirectThunksPass();  /// This pass ensures instructions featuring a memory operand  /// have distinctive <LineNumber, Discriminator> (with respect to eachother) @@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,                                                    X86Subtarget &,                                                    X86RegisterBankInfo &); +FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); +FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass(); +FunctionPass *createX86LoadValueInjectionRetHardeningPass();  FunctionPass *createX86SpeculativeLoadHardeningPass();  void initializeEvexToVexInstPassPass(PassRegistry &); @@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);  void initializeX86ExecutionDomainFixPass(PassRegistry &);  void initializeX86ExpandPseudoPass(PassRegistry &);  void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); +void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);  void initializeX86OptimizeLEAPassPass(PassRegistry &);  void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index a2b11d55f650..bb8952f54e3a 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk            "ourselves. Only has effect when combined with some other retpoline "            "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity +    : SubtargetFeature< +          "lvi-cfi", "UseLVIControlFlowIntegrity", "true", +          "Prevent indirect calls/branches from using a memory operand, and " +          "precede all indirect calls/branches from a register with an " +          "LFENCE instruction to serialize control flow. Also decompose RET " +          "instructions into a POP+LFENCE+JMP sequence.">; + +// Mitigate LVI attacks against data loads +def FeatureLVILoadHardening +    : SubtargetFeature< +          "lvi-load-hardening", "UseLVILoadHardening", "true", +          "Insert LFENCE instructions to prevent data speculatively injected " +          "into loads from being used maliciously.">; +  // Direct Move instructions.  def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",                                         "Support movdiri instruction">; diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 1dbf40683564..a1d256ea872d 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {        (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))      return false; -  // Functions using retpoline for indirect calls need to use SDISel. -  if (Subtarget->useRetpolineIndirectCalls()) +  // Functions using thunks for indirect calls need to use SDISel. +  if (Subtarget->useIndirectThunkCalls())      return false;    // Handle only C, fastcc, and webkit_js calling conventions for now. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 799c1f5d1285..1da20371caf5 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,                                            bool InProlog) const {    bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; -  // FIXME: Add retpoline support and remove this. -  if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) +  // FIXME: Add indirect thunk support and remove this. +  if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())      report_fatal_error("Emitting stack probe calls on 64-bit with the large " -                       "code model and retpoline not yet implemented."); +                       "code model and indirect thunks not yet implemented.");    unsigned CallOp;    if (Is64Bit) @@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(      // is laid out within 2^31 bytes of each function body, but this seems      // to be sufficient for JIT.      // FIXME: Add retpoline support and remove the error here.. -    if (STI.useRetpolineIndirectCalls()) +    if (STI.useIndirectThunkCalls())        report_fatal_error("Emitting morestack calls on 64-bit with the large " -                         "code model and retpoline not yet implemented."); +                         "code model and thunks not yet implemented.");      BuildMI(allocMBB, DL, TII.get(X86::CALL64m))          .addReg(X86::RIP)          .addImm(0) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index bf33f399db28..88af0ebcfd0e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {      if (OptLevel != CodeGenOpt::None &&          // Only do this when the target can fold the load into the call or          // jmp. -        !Subtarget->useRetpolineIndirectCalls() && +        !Subtarget->useIndirectThunkCalls() &&          ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||           (N->getOpcode() == X86ISD::TC_RETURN &&            (Subtarget->is64Bit() || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1523d56cc4e7..c8720d9ae3a6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,  }  bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { -  // If the subtarget is using retpolines, we need to not generate jump tables. -  if (Subtarget.useRetpolineIndirectBranches()) +  // If the subtarget is using thunks, we need to not generate jump tables. +  if (Subtarget.useIndirectThunkBranches())      return false;    // Otherwise, fallback on the generic logic. @@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,    return BB;  } -static unsigned getOpcodeForRetpoline(unsigned RPOpc) { +static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {    switch (RPOpc) { -  case X86::RETPOLINE_CALL32: +  case X86::INDIRECT_THUNK_CALL32:      return X86::CALLpcrel32; -  case X86::RETPOLINE_CALL64: +  case X86::INDIRECT_THUNK_CALL64:      return X86::CALL64pcrel32; -  case X86::RETPOLINE_TCRETURN32: +  case X86::INDIRECT_THUNK_TCRETURN32:      return X86::TCRETURNdi; -  case X86::RETPOLINE_TCRETURN64: +  case X86::INDIRECT_THUNK_TCRETURN64:      return X86::TCRETURNdi64;    } -  llvm_unreachable("not retpoline opcode"); +  llvm_unreachable("not indirect thunk opcode");  } -static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, -                                      unsigned Reg) { +static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, +                                          unsigned Reg) {    if (Subtarget.useRetpolineExternalThunk()) {      // When using an external thunk for retpolines, we pick names that match the      // names GCC happens to use as well. This helps simplify the implementation @@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,        assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");        return "__x86_indirect_thunk_r11";      } +    llvm_unreachable("unexpected reg for external indirect thunk"); +  } + +  if (Subtarget.useRetpolineIndirectCalls() || +      Subtarget.useRetpolineIndirectBranches()) { +    // When targeting an internal COMDAT thunk use an LLVM-specific name. +    switch (Reg) { +    case X86::EAX: +      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); +      return "__llvm_retpoline_eax"; +    case X86::ECX: +      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); +      return "__llvm_retpoline_ecx"; +    case X86::EDX: +      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); +      return "__llvm_retpoline_edx"; +    case X86::EDI: +      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); +      return "__llvm_retpoline_edi"; +    case X86::R11: +      assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); +      return "__llvm_retpoline_r11"; +    }      llvm_unreachable("unexpected reg for retpoline");    } -  // When targeting an internal COMDAT thunk use an LLVM-specific name. -  switch (Reg) { -  case X86::EAX: -    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); -    return "__llvm_retpoline_eax"; -  case X86::ECX: -    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); -    return "__llvm_retpoline_ecx"; -  case X86::EDX: -    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); -    return "__llvm_retpoline_edx"; -  case X86::EDI: -    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); -    return "__llvm_retpoline_edi"; -  case X86::R11: +  if (Subtarget.useLVIControlFlowIntegrity()) {      assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); -    return "__llvm_retpoline_r11"; +    return "__llvm_lvi_thunk_r11";    } -  llvm_unreachable("unexpected reg for retpoline"); +  llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");  }  MachineBasicBlock * -X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, -                                        MachineBasicBlock *BB) const { +X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, +                                            MachineBasicBlock *BB) const {    // Copy the virtual register into the R11 physical register and    // call the retpoline thunk.    DebugLoc DL = MI.getDebugLoc();    const X86InstrInfo *TII = Subtarget.getInstrInfo();    Register CalleeVReg = MI.getOperand(0).getReg(); -  unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); +  unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());    // Find an available scratch register to hold the callee. On 64-bit, we can    // just use R11, but we scan for uses anyway to ensure we don't generate @@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,      report_fatal_error("calling convention incompatible with retpoline, no "                         "available registers"); -  const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); +  const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)        .addReg(CalleeVReg); @@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,    case X86::TLS_base_addr32:    case X86::TLS_base_addr64:      return EmitLoweredTLSAddr(MI, BB); -  case X86::RETPOLINE_CALL32: -  case X86::RETPOLINE_CALL64: -  case X86::RETPOLINE_TCRETURN32: -  case X86::RETPOLINE_TCRETURN64: -    return EmitLoweredRetpoline(MI, BB); +  case X86::INDIRECT_THUNK_CALL32: +  case X86::INDIRECT_THUNK_CALL64: +  case X86::INDIRECT_THUNK_TCRETURN32: +  case X86::INDIRECT_THUNK_TCRETURN64: +    return EmitLoweredIndirectThunk(MI, BB);    case X86::CATCHRET:      return EmitLoweredCatchRet(MI, BB);    case X86::CATCHPAD: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3a17099da38f..830cdfc79c0a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1482,8 +1482,8 @@ namespace llvm {      MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,                                            MachineBasicBlock *BB) const; -    MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, -                                            MachineBasicBlock *BB) const; +    MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, +                                                MachineBasicBlock *BB) const;      MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,                                          MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp new file mode 100644 index 000000000000..36b9c3ccc959 --- /dev/null +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -0,0 +1,364 @@ +//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86  --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Pass that injects an MI thunk that is used to lower indirect calls in a way +/// that prevents speculation on some x86 processors and can be used to mitigate +/// security vulnerabilities due to targeted speculative execution and side +/// channels such as CVE-2017-5715. +/// +/// Currently supported thunks include: +/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls +/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization +///   before making an indirect call/jump +/// +/// Note that the reason that this is implemented as a MachineFunctionPass and +/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline +/// serialize all transformations, which can consume lots of memory. +/// +/// TODO(chandlerc): All of this code could use better comments and +/// documentation. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-retpoline-thunks" + +static const char RetpolineNamePrefix[] = "__llvm_retpoline_"; +static const char R11RetpolineName[] = "__llvm_retpoline_r11"; +static const char EAXRetpolineName[] = "__llvm_retpoline_eax"; +static const char ECXRetpolineName[] = "__llvm_retpoline_ecx"; +static const char EDXRetpolineName[] = "__llvm_retpoline_edx"; +static const char EDIRetpolineName[] = "__llvm_retpoline_edi"; + +static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_"; +static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11"; + +namespace { +template <typename Derived> class ThunkInserter { +  Derived &getDerived() { return *static_cast<Derived *>(this); } + +protected: +  bool InsertedThunks; +  void doInitialization(Module &M) {} +  void createThunkFunction(MachineModuleInfo &MMI, StringRef Name); + +public: +  void init(Module &M) { +    InsertedThunks = false; +    getDerived().doInitialization(M); +  } +  // return `true` if `MMI` or `MF` was modified +  bool run(MachineModuleInfo &MMI, MachineFunction &MF); +}; + +struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { +  const char *getThunkPrefix() { return RetpolineNamePrefix; } +  bool mayUseThunk(const MachineFunction &MF) { +    const auto &STI = MF.getSubtarget<X86Subtarget>(); +    return (STI.useRetpolineIndirectCalls() || +            STI.useRetpolineIndirectBranches()) && +           !STI.useRetpolineExternalThunk(); +  } +  void insertThunks(MachineModuleInfo &MMI); +  void populateThunk(MachineFunction &MF); +}; + +struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { +  const char *getThunkPrefix() { return LVIThunkNamePrefix; } +  bool mayUseThunk(const MachineFunction &MF) { +    return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); +  } +  void insertThunks(MachineModuleInfo &MMI) { +    createThunkFunction(MMI, R11LVIThunkName); +  } +  void populateThunk(MachineFunction &MF) { +    // Grab the entry MBB and erase any other blocks. O0 codegen appears to +    // generate two bbs for the entry block. +    MachineBasicBlock *Entry = &MF.front(); +    Entry->clear(); +    while (MF.size() > 1) +      MF.erase(std::next(MF.begin())); + +    // This code mitigates LVI by replacing each indirect call/jump with a +    // direct call/jump to a thunk that looks like: +    // ``` +    // lfence +    // jmpq *%r11 +    // ``` +    // This ensures that if the value in register %r11 was loaded from memory, +    // then the value in %r11 is (architecturally) correct prior to the jump. +    const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); +    BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE)); +    BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11); +    MF.front().addLiveIn(X86::R11); +    return; +  } +}; + +class X86IndirectThunks : public MachineFunctionPass { +public: +  static char ID; + +  X86IndirectThunks() : MachineFunctionPass(ID) {} + +  StringRef getPassName() const override { return "X86 Indirect Thunks"; } + +  bool doInitialization(Module &M) override; +  bool runOnMachineFunction(MachineFunction &MF) override; + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    MachineFunctionPass::getAnalysisUsage(AU); +    AU.addRequired<MachineModuleInfoWrapperPass>(); +    AU.addPreserved<MachineModuleInfoWrapperPass>(); +  } + +private: +  std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs; + +  // FIXME: When LLVM moves to C++17, these can become folds +  template <typename... ThunkInserterT> +  static void initTIs(Module &M, +                      std::tuple<ThunkInserterT...> &ThunkInserters) { +    (void)std::initializer_list<int>{ +        (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...}; +  } +  template <typename... ThunkInserterT> +  static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF, +                     std::tuple<ThunkInserterT...> &ThunkInserters) { +    bool Modified = false; +    (void)std::initializer_list<int>{ +        Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...}; +    return Modified; +  } +}; + +} // end anonymous namespace + +void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) { +  if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) +    createThunkFunction(MMI, R11RetpolineName); +  else +    for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, +                           EDIRetpolineName}) +      createThunkFunction(MMI, Name); +} + +void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { +  bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; +  Register ThunkReg; +  if (Is64Bit) { +    assert(MF.getName() == "__llvm_retpoline_r11" && +           "Should only have an r11 thunk on 64-bit targets"); + +    // __llvm_retpoline_r11: +    //   callq .Lr11_call_target +    // .Lr11_capture_spec: +    //   pause +    //   lfence +    //   jmp .Lr11_capture_spec +    // .align 16 +    // .Lr11_call_target: +    //   movq %r11, (%rsp) +    //   retq +    ThunkReg = X86::R11; +  } else { +    // For 32-bit targets we need to emit a collection of thunks for various +    // possible scratch registers as well as a fallback that uses EDI, which is +    // normally callee saved. +    //   __llvm_retpoline_eax: +    //         calll .Leax_call_target +    //   .Leax_capture_spec: +    //         pause +    //         jmp .Leax_capture_spec +    //   .align 16 +    //   .Leax_call_target: +    //         movl %eax, (%esp)  # Clobber return addr +    //         retl +    // +    //   __llvm_retpoline_ecx: +    //   ... # Same setup +    //         movl %ecx, (%esp) +    //         retl +    // +    //   __llvm_retpoline_edx: +    //   ... # Same setup +    //         movl %edx, (%esp) +    //         retl +    // +    //   __llvm_retpoline_edi: +    //   ... # Same setup +    //         movl %edi, (%esp) +    //         retl +    if (MF.getName() == EAXRetpolineName) +      ThunkReg = X86::EAX; +    else if (MF.getName() == ECXRetpolineName) +      ThunkReg = X86::ECX; +    else if (MF.getName() == EDXRetpolineName) +      ThunkReg = X86::EDX; +    else if (MF.getName() == EDIRetpolineName) +      ThunkReg = X86::EDI; +    else +      llvm_unreachable("Invalid thunk name on x86-32!"); +  } + +  const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); +  // Grab the entry MBB and erase any other blocks. O0 codegen appears to +  // generate two bbs for the entry block. +  MachineBasicBlock *Entry = &MF.front(); +  Entry->clear(); +  while (MF.size() > 1) +    MF.erase(std::next(MF.begin())); + +  MachineBasicBlock *CaptureSpec = +      MF.CreateMachineBasicBlock(Entry->getBasicBlock()); +  MachineBasicBlock *CallTarget = +      MF.CreateMachineBasicBlock(Entry->getBasicBlock()); +  MCSymbol *TargetSym = MF.getContext().createTempSymbol(); +  MF.push_back(CaptureSpec); +  MF.push_back(CallTarget); + +  const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; +  const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; + +  Entry->addLiveIn(ThunkReg); +  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); + +  // The MIR verifier thinks that the CALL in the entry block will fall through +  // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is +  // the successor, but the MIR verifier doesn't know how to cope with that. +  Entry->addSuccessor(CaptureSpec); + +  // In the capture loop for speculation, we want to stop the processor from +  // speculating as fast as possible. On Intel processors, the PAUSE instruction +  // will block speculation without consuming any execution resources. On AMD +  // processors, the PAUSE instruction is (essentially) a nop, so we also use an +  // LFENCE instruction which they have advised will stop speculation as well +  // with minimal resource utilization. We still end the capture with a jump to +  // form an infinite loop to fully guarantee that no matter what implementation +  // of the x86 ISA, speculating this code path never escapes. +  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); +  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); +  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); +  CaptureSpec->setHasAddressTaken(); +  CaptureSpec->addSuccessor(CaptureSpec); + +  CallTarget->addLiveIn(ThunkReg); +  CallTarget->setHasAddressTaken(); +  CallTarget->setAlignment(Align(16)); + +  // Insert return address clobber +  const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; +  const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; +  addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false, +               0) +      .addReg(ThunkReg); + +  CallTarget->back().setPreInstrSymbol(MF, TargetSym); +  BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); +} + +template <typename Derived> +void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, +                                                 StringRef Name) { +  assert(Name.startswith(getDerived().getThunkPrefix()) && +         "Created a thunk with an unexpected prefix!"); + +  Module &M = const_cast<Module &>(*MMI.getModule()); +  LLVMContext &Ctx = M.getContext(); +  auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); +  Function *F = +      Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); +  F->setVisibility(GlobalValue::HiddenVisibility); +  F->setComdat(M.getOrInsertComdat(Name)); + +  // Add Attributes so that we don't create a frame, unwind information, or +  // inline. +  AttrBuilder B; +  B.addAttribute(llvm::Attribute::NoUnwind); +  B.addAttribute(llvm::Attribute::Naked); +  F->addAttributes(llvm::AttributeList::FunctionIndex, B); + +  // Populate our function a bit so that we can verify. +  BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); +  IRBuilder<> Builder(Entry); + +  Builder.CreateRetVoid(); + +  // MachineFunctions/MachineBasicBlocks aren't created automatically for the +  // IR-level constructs we already made. Create them and insert them into the +  // module. +  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); +  MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); + +  // Insert EntryMBB into MF. It's not in the module until we do this. +  MF.insert(MF.end(), EntryMBB); +  // Set MF properties. We never use vregs... +  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +template <typename Derived> +bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) { +  // If MF is not a thunk, check to see if we need to insert a thunk. +  if (!MF.getName().startswith(getDerived().getThunkPrefix())) { +    // If we've already inserted a thunk, nothing else to do. +    if (InsertedThunks) +      return false; + +    // Only add a thunk if one of the functions has the corresponding feature +    // enabled in its subtarget, and doesn't enable external thunks. +    // FIXME: Conditionalize on indirect calls so we don't emit a thunk when +    // nothing will end up calling it. +    // FIXME: It's a little silly to look at every function just to enumerate +    // the subtargets, but eventually we'll want to look at them for indirect +    // calls, so maybe this is OK. +    if (!getDerived().mayUseThunk(MF)) +      return false; + +    getDerived().insertThunks(MMI); +    InsertedThunks = true; +    return true; +  } + +  // If this *is* a thunk function, we need to populate it with the correct MI. +  getDerived().populateThunk(MF); +  return true; +} + +FunctionPass *llvm::createX86IndirectThunksPass() { +  return new X86IndirectThunks(); +} + +char X86IndirectThunks::ID = 0; + +bool X86IndirectThunks::doInitialization(Module &M) { +  initTIs(M, TIs); +  return false; +} + +bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) { +  LLVM_DEBUG(dbgs() << getPassName() << '\n'); +  auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); +  return runTIs(MMI, MF, TIs); +} diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 78d8dd3c0d03..1fdac104cb73 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),  def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),            (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, -          Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>; +          Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;  // FIXME: This is disabled for 32-bit PIC mode because the global base  // register which is part of the address mode may be assigned a  // callee-saved register.  def : Pat<(X86tcret (load addr:$dst), imm:$off),            (TCRETURNmi addr:$dst, imm:$off)>, -          Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>; +          Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;  def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),            (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),  def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),            (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, -          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; +          Requires<[In64BitMode, NotUseIndirectThunkCalls]>;  // Don't fold loads into X86tcret requiring more than 6 regs.  // There wouldn't be enough scratch registers for base+index.  def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),            (TCRETURNmi64 addr:$dst, imm:$off)>, -          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; +          Requires<[In64BitMode, NotUseIndirectThunkCalls]>;  def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), -          (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, -          Requires<[In64BitMode, UseRetpolineIndirectCalls]>; +          (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, +          Requires<[In64BitMode, UseIndirectThunkCalls]>;  def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), -          (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, -          Requires<[Not64BitMode, UseRetpolineIndirectCalls]>; +          (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, +          Requires<[Not64BitMode, UseIndirectThunkCalls]>;  def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),            (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 32faeb1a86f2..1842dc19ec2e 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -237,13 +237,13 @@ let isCall = 1 in                          Sched<[WriteJumpLd]>;      def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),                          "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32, -                        Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>, +                        Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,                          Sched<[WriteJump]>;      def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),                          "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,                          OpSize32,                          Requires<[Not64BitMode,FavorMemIndirectCall, -                                  NotUseRetpolineIndirectCalls]>, +                                  NotUseIndirectThunkCalls]>,                          Sched<[WriteJumpLd]>;      // Non-tracking calls for IBT, use with caution. @@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {                        Requires<[In64BitMode]>;    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, -                      Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; +                      Requires<[In64BitMode,NotUseIndirectThunkCalls]>;    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,                        Requires<[In64BitMode,FavorMemIndirectCall, -                                NotUseRetpolineIndirectCalls]>; +                                NotUseIndirectThunkCalls]>;    // Non-tracking calls for IBT, use with caution.    let isCodeGenOnly = 1 in { @@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,      Uses = [RSP, SSP],      usesCustomInserter = 1,      SchedRW = [WriteJump] in { -  def RETPOLINE_CALL32 : +  def INDIRECT_THUNK_CALL32 :      PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>, -            Requires<[Not64BitMode,UseRetpolineIndirectCalls]>; +            Requires<[Not64BitMode,UseIndirectThunkCalls]>; -  def RETPOLINE_CALL64 : +  def INDIRECT_THUNK_CALL64 :      PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, -            Requires<[In64BitMode,UseRetpolineIndirectCalls]>; +            Requires<[In64BitMode,UseIndirectThunkCalls]>; -  // Retpoline variant of indirect tail calls. +  // Indirect thunk variant of indirect tail calls.    let isTerminator = 1, isReturn = 1, isBarrier = 1 in { -    def RETPOLINE_TCRETURN64 : +    def INDIRECT_THUNK_TCRETURN64 :        PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; -    def RETPOLINE_TCRETURN32 : +    def INDIRECT_THUNK_TCRETURN32 :        PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;    }  } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index ca5425e8b89f..93f40c8ec996 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;  def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;  def HasERMSB : Predicate<"Subtarget->hasERMSB()">;  def HasMFence    : Predicate<"Subtarget->hasMFence()">; -def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; -def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; +def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">; +def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;  //===----------------------------------------------------------------------===//  // X86 Instruction Format Definitions. diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp new file mode 100644 index 000000000000..35fc439998f9 --- /dev/null +++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -0,0 +1,900 @@ +//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass finds Load Value Injection (LVI) gadgets consisting +/// of a load from memory (i.e., SOURCE), and any operation that may transmit +/// the value loaded from memory over a covert channel, or use the value loaded +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +///         http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +///    - SOURCE instructions (also includes function arguments) +///    - SINK instructions +///    - Basic block entry points +///    - Basic block terminators +///    - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction() +/// to tell LLVM that the function was modified. +/// +//===----------------------------------------------------------------------===// + +#include "ImmutableGraph.h" +#include "X86.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-load" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " +                                 "were deployed"); +STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); + +static cl::opt<std::string> OptimizePluginPath( +    PASS_KEY "-opt-plugin", +    cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + +static cl::opt<bool> NoConditionalBranches( +    PASS_KEY "-no-cbranch", +    cl::desc("Don't treat conditional branches as disclosure gadgets. This " +             "may improve performance, at the cost of security."), +    cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDot( +    PASS_KEY "-dot", +    cl::desc( +        "For each function, emit a dot graph depicting potential LVI gadgets"), +    cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotOnly( +    PASS_KEY "-dot-only", +    cl::desc("For each function, emit a dot graph depicting potential LVI " +             "gadgets, and do not insert any fences"), +    cl::init(false), cl::Hidden); + +static cl::opt<bool> EmitDotVerify( +    PASS_KEY "-dot-verify", +    cl::desc("For each function, emit a dot graph to stdout depicting " +             "potential LVI gadgets, used for testing purposes only"), +    cl::init(false), cl::Hidden); + +static llvm::sys::DynamicLibrary OptimizeDL; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, +                            unsigned int *edges, int *edge_values, +                            int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + +namespace { + +struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> { +  static constexpr int GadgetEdgeSentinel = -1; +  static constexpr MachineInstr *const ArgNodeSentinel = nullptr; + +  using GraphT = ImmutableGraph<MachineInstr *, int>; +  using Node = typename GraphT::Node; +  using Edge = typename GraphT::Edge; +  using size_type = typename GraphT::size_type; +  MachineGadgetGraph(std::unique_ptr<Node[]> Nodes, +                     std::unique_ptr<Edge[]> Edges, size_type NodesSize, +                     size_type EdgesSize, int NumFences = 0, int NumGadgets = 0) +      : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize), +        NumFences(NumFences), NumGadgets(NumGadgets) {} +  static inline bool isCFGEdge(const Edge &E) { +    return E.getValue() != GadgetEdgeSentinel; +  } +  static inline bool isGadgetEdge(const Edge &E) { +    return E.getValue() == GadgetEdgeSentinel; +  } +  int NumFences; +  int NumGadgets; +}; + +class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { +public: +  X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {} + +  StringRef getPassName() const override { +    return "X86 Load Value Injection (LVI) Load Hardening"; +  } +  void getAnalysisUsage(AnalysisUsage &AU) const override; +  bool runOnMachineFunction(MachineFunction &MF) override; + +  static char ID; + +private: +  using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>; +  using EdgeSet = MachineGadgetGraph::EdgeSet; +  using NodeSet = MachineGadgetGraph::NodeSet; +  using Gadget = std::pair<MachineInstr *, MachineInstr *>; + +  const X86Subtarget *STI; +  const TargetInstrInfo *TII; +  const TargetRegisterInfo *TRI; + +  std::unique_ptr<MachineGadgetGraph> +  getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, +                 const MachineDominatorTree &MDT, +                 const MachineDominanceFrontier &MDF) const; +  int hardenLoadsWithPlugin(MachineFunction &MF, +                            std::unique_ptr<MachineGadgetGraph> Graph) const; +  int hardenLoadsWithGreedyHeuristic( +      MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const; +  int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G, +                                 EdgeSet &ElimEdges /* in, out */, +                                 NodeSet &ElimNodes /* in, out */) const; +  std::unique_ptr<MachineGadgetGraph> +  trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const; +  void findAndCutEdges(MachineGadgetGraph &G, +                       EdgeSet &CutEdges /* out */) const; +  int insertFences(MachineFunction &MF, MachineGadgetGraph &G, +                   EdgeSet &CutEdges /* in, out */) const; +  bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; +  bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; +  inline bool isFence(const MachineInstr *MI) const { +    return MI && (MI->getOpcode() == X86::LFENCE || +                  (STI->useLVIControlFlowIntegrity() && MI->isCall())); +  } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> +struct GraphTraits<MachineGadgetGraph *> +    : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {}; + +template <> +struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits { +  using GraphType = MachineGadgetGraph; +  using Traits = llvm::GraphTraits<GraphType *>; +  using NodeRef = typename Traits::NodeRef; +  using EdgeRef = typename Traits::EdgeRef; +  using ChildIteratorType = typename Traits::ChildIteratorType; +  using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; + +  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + +  std::string getNodeLabel(NodeRef Node, GraphType *) { +    if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel) +      return "ARGS"; + +    std::string Str; +    raw_string_ostream OS(Str); +    OS << *Node->getValue(); +    return OS.str(); +  } + +  static std::string getNodeAttributes(NodeRef Node, GraphType *) { +    MachineInstr *MI = Node->getValue(); +    if (MI == MachineGadgetGraph::ArgNodeSentinel) +      return "color = blue"; +    if (MI->getOpcode() == X86::LFENCE) +      return "color = green"; +    return ""; +  } + +  static std::string getEdgeAttributes(NodeRef, ChildIteratorType E, +                                       GraphType *) { +    int EdgeVal = (*E.getCurrent()).getValue(); +    return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal) +                        : "color = red, style = \"dashed\""; +  } +}; + +} // end namespace llvm + +constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel; +constexpr int MachineGadgetGraph::GadgetEdgeSentinel; + +char X86LoadValueInjectionLoadHardeningPass::ID = 0; + +void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( +    AnalysisUsage &AU) const { +  MachineFunctionPass::getAnalysisUsage(AU); +  AU.addRequired<MachineLoopInfo>(); +  AU.addRequired<MachineDominatorTree>(); +  AU.addRequired<MachineDominanceFrontier>(); +  AU.setPreservesCFG(); +} + +static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF, +                             MachineGadgetGraph *G) { +  WriteGraph(OS, G, /*ShortNames*/ false, +             "Speculative gadgets for \"" + MF.getName() + "\" function"); +} + +bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction( +    MachineFunction &MF) { +  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() +                    << " *****\n"); +  STI = &MF.getSubtarget<X86Subtarget>(); +  if (!STI->useLVILoadHardening()) +    return false; + +  // FIXME: support 32-bit +  if (!STI->is64Bit()) +    report_fatal_error("LVI load hardening is only supported on 64-bit", false); + +  // Don't skip functions with the "optnone" attr but participate in opt-bisect. +  const Function &F = MF.getFunction(); +  if (!F.hasOptNone() && skipFunction(F)) +    return false; + +  ++NumFunctionsConsidered; +  TII = STI->getInstrInfo(); +  TRI = STI->getRegisterInfo(); +  LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); +  const auto &MLI = getAnalysis<MachineLoopInfo>(); +  const auto &MDT = getAnalysis<MachineDominatorTree>(); +  const auto &MDF = getAnalysis<MachineDominanceFrontier>(); +  std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF); +  LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); +  if (Graph == nullptr) +    return false; // didn't find any gadgets + +  if (EmitDotVerify) { +    WriteGadgetGraph(outs(), MF, Graph.get()); +    return false; +  } + +  if (EmitDot || EmitDotOnly) { +    LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n"); +    std::error_code FileError; +    std::string FileName = "lvi."; +    FileName += MF.getName(); +    FileName += ".dot"; +    raw_fd_ostream FileOut(FileName, FileError); +    if (FileError) +      errs() << FileError.message(); +    WriteGadgetGraph(FileOut, MF, Graph.get()); +    FileOut.close(); +    LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); +    if (EmitDotOnly) +      return false; +  } + +  int FencesInserted; +  if (!OptimizePluginPath.empty()) { +    if (!OptimizeDL.isValid()) { +      std::string ErrorMsg; +      OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( +          OptimizePluginPath.c_str(), &ErrorMsg); +      if (!ErrorMsg.empty()) +        report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); +      OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); +      if (!OptimizeCut) +        report_fatal_error("Invalid optimization plugin"); +    } +    FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph)); +  } else { // Use the default greedy heuristic +    FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph)); +  } + +  if (FencesInserted > 0) +    ++NumFunctionsMitigated; +  NumFences += FencesInserted; +  return (FencesInserted > 0); +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( +    MachineFunction &MF, const MachineLoopInfo &MLI, +    const MachineDominatorTree &MDT, +    const MachineDominanceFrontier &MDF) const { +  using namespace rdf; + +  // Build the Register Dataflow Graph using the RDF framework +  TargetOperandInfo TOI{*TII}; +  DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI}; +  DFG.build(); +  Liveness L{MF.getRegInfo(), DFG}; +  L.computePhiInfo(); + +  GraphBuilder Builder; +  using GraphIter = typename GraphBuilder::BuilderNodeRef; +  DenseMap<MachineInstr *, GraphIter> NodeMap; +  int FenceCount = 0, GadgetCount = 0; +  auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { +    auto Ref = NodeMap.find(MI); +    if (Ref == NodeMap.end()) { +      auto I = Builder.addVertex(MI); +      NodeMap[MI] = I; +      return std::pair<GraphIter, bool>{I, true}; +    } +    return std::pair<GraphIter, bool>{Ref->getSecond(), false}; +  }; + +  // The `Transmitters` map memoizes transmitters found for each def. If a def +  // has not yet been analyzed, then it will not appear in the map. If a def +  // has been analyzed and was determined not to have any transmitters, then +  // its list of transmitters will be empty. +  DenseMap<NodeId, std::vector<NodeId>> Transmitters; + +  // Analyze all machine instructions to find gadgets and LFENCEs, adding +  // each interesting value to `Nodes` +  auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) { +    SmallSet<NodeId, 8> UsesVisited, DefsVisited; +    std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain = +        [&](NodeAddr<DefNode *> Def) { +          if (Transmitters.find(Def.Id) != Transmitters.end()) +            return; // Already analyzed `Def` + +          // Use RDF to find all the uses of `Def` +          rdf::NodeSet Uses; +          RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG)); +          for (auto UseID : L.getAllReachedUses(DefReg, Def)) { +            auto Use = DFG.addr<UseNode *>(UseID); +            if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node +              NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG); +              for (auto I : L.getRealUses(Phi.Id)) { +                if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) { +                  for (auto UA : I.second) +                    Uses.emplace(UA.first); +                } +              } +            } else { // not a phi node +              Uses.emplace(UseID); +            } +          } + +          // For each use of `Def`, we want to know whether: +          // (1) The use can leak the Def'ed value, +          // (2) The use can further propagate the Def'ed value to more defs +          for (auto UseID : Uses) { +            if (!UsesVisited.insert(UseID).second) +              continue; // Already visited this use of `Def` + +            auto Use = DFG.addr<UseNode *>(UseID); +            assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef)); +            MachineOperand &UseMO = Use.Addr->getOp(); +            MachineInstr &UseMI = *UseMO.getParent(); +            assert(UseMO.isReg()); + +            // We naively assume that an instruction propagates any loaded +            // uses to all defs unless the instruction is a call, in which +            // case all arguments will be treated as gadget sources during +            // analysis of the callee function. +            if (UseMI.isCall()) +              continue; + +            // Check whether this use can transmit (leak) its value. +            if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) || +                (!NoConditionalBranches && +                 instrUsesRegToBranch(UseMI, UseMO.getReg()))) { +              Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id); +              if (UseMI.mayLoad()) +                continue; // Found a transmitting load -- no need to continue +                          // traversing its defs (i.e., this load will become +                          // a new gadget source anyways). +            } + +            // Check whether the use propagates to more defs. +            NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)}; +            rdf::NodeList AnalyzedChildDefs; +            for (auto &ChildDef : +                 Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) { +              if (!DefsVisited.insert(ChildDef.Id).second) +                continue; // Already visited this def +              if (Def.Addr->getAttrs() & NodeAttrs::Dead) +                continue; +              if (Def.Id == ChildDef.Id) +                continue; // `Def` uses itself (e.g., increment loop counter) + +              AnalyzeDefUseChain(ChildDef); + +              // `Def` inherits all of its child defs' transmitters. +              for (auto TransmitterId : Transmitters[ChildDef.Id]) +                Transmitters[Def.Id].push_back(TransmitterId); +            } +          } + +          // Note that this statement adds `Def.Id` to the map if no +          // transmitters were found for `Def`. +          auto &DefTransmitters = Transmitters[Def.Id]; + +          // Remove duplicate transmitters +          llvm::sort(DefTransmitters); +          DefTransmitters.erase( +              std::unique(DefTransmitters.begin(), DefTransmitters.end()), +              DefTransmitters.end()); +        }; + +    // Find all of the transmitters +    AnalyzeDefUseChain(SourceDef); +    auto &SourceDefTransmitters = Transmitters[SourceDef.Id]; +    if (SourceDefTransmitters.empty()) +      return; // No transmitters for `SourceDef` + +    MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef +                               ? MachineGadgetGraph::ArgNodeSentinel +                               : SourceDef.Addr->getOp().getParent(); +    auto GadgetSource = MaybeAddNode(Source); +    // Each transmitter is a sink for `SourceDef`. +    for (auto TransmitterId : SourceDefTransmitters) { +      MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode(); +      auto GadgetSink = MaybeAddNode(Sink); +      // Add the gadget edge to the graph. +      Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel, +                      GadgetSource.first, GadgetSink.first); +      ++GadgetCount; +    } +  }; + +  LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); +  // Analyze function arguments +  NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG); +  for (NodeAddr<PhiNode *> ArgPhi : +       EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) { +    NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG); +    llvm::for_each(Defs, AnalyzeDef); +  } +  // Analyze every instruction in MF +  for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) { +    for (NodeAddr<StmtNode *> SA : +         BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) { +      MachineInstr *MI = SA.Addr->getCode(); +      if (isFence(MI)) { +        MaybeAddNode(MI); +        ++FenceCount; +      } else if (MI->mayLoad()) { +        NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG); +        llvm::for_each(Defs, AnalyzeDef); +      } +    } +  } +  LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n"); +  LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n"); +  if (GadgetCount == 0) +    return nullptr; +  NumGadgets += GadgetCount; + +  // Traverse CFG to build the rest of the graph +  SmallSet<MachineBasicBlock *, 8> BlocksVisited; +  std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG = +      [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) { +        unsigned LoopDepth = MLI.getLoopDepth(MBB); +        if (!MBB->empty()) { +          // Always add the first instruction in each block +          auto NI = MBB->begin(); +          auto BeginBB = MaybeAddNode(&*NI); +          Builder.addEdge(ParentDepth, GI, BeginBB.first); +          if (!BlocksVisited.insert(MBB).second) +            return; + +          // Add any instructions within the block that are gadget components +          GI = BeginBB.first; +          while (++NI != MBB->end()) { +            auto Ref = NodeMap.find(&*NI); +            if (Ref != NodeMap.end()) { +              Builder.addEdge(LoopDepth, GI, Ref->getSecond()); +              GI = Ref->getSecond(); +            } +          } + +          // Always add the terminator instruction, if one exists +          auto T = MBB->getFirstTerminator(); +          if (T != MBB->end()) { +            auto EndBB = MaybeAddNode(&*T); +            if (EndBB.second) +              Builder.addEdge(LoopDepth, GI, EndBB.first); +            GI = EndBB.first; +          } +        } +        for (MachineBasicBlock *Succ : MBB->successors()) +          TraverseCFG(Succ, GI, LoopDepth); +      }; +  // ArgNodeSentinel is a pseudo-instruction that represents MF args in the +  // GadgetGraph +  GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first; +  TraverseCFG(&MF.front(), ArgNode, 0); +  std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)}; +  LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n"); +  return G; +} + +// Returns the number of remaining gadget edges that could not be eliminated +int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes( +    MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */, +    MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const { +  if (G.NumFences > 0) { +    // Eliminate fences and CFG edges that ingress and egress the fence, as +    // they are trivially mitigated. +    for (const auto &E : G.edges()) { +      const MachineGadgetGraph::Node *Dest = E.getDest(); +      if (isFence(Dest->getValue())) { +        ElimNodes.insert(*Dest); +        ElimEdges.insert(E); +        for (const auto &DE : Dest->edges()) +          ElimEdges.insert(DE); +      } +    } +  } + +  // Find and eliminate gadget edges that have been mitigated. +  int MitigatedGadgets = 0, RemainingGadgets = 0; +  MachineGadgetGraph::NodeSet ReachableNodes{G}; +  for (const auto &RootN : G.nodes()) { +    if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge)) +      continue; // skip this node if it isn't a gadget source + +    // Find all of the nodes that are CFG-reachable from RootN using DFS +    ReachableNodes.clear(); +    std::function<void(const MachineGadgetGraph::Node *, bool)> +        FindReachableNodes = +            [&](const MachineGadgetGraph::Node *N, bool FirstNode) { +              if (!FirstNode) +                ReachableNodes.insert(*N); +              for (const auto &E : N->edges()) { +                const MachineGadgetGraph::Node *Dest = E.getDest(); +                if (MachineGadgetGraph::isCFGEdge(E) && +                    !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest)) +                  FindReachableNodes(Dest, false); +              } +            }; +    FindReachableNodes(&RootN, true); + +    // Any gadget whose sink is unreachable has been mitigated +    for (const auto &E : RootN.edges()) { +      if (MachineGadgetGraph::isGadgetEdge(E)) { +        if (ReachableNodes.contains(*E.getDest())) { +          // This gadget's sink is reachable +          ++RemainingGadgets; +        } else { // This gadget's sink is unreachable, and therefore mitigated +          ++MitigatedGadgets; +          ElimEdges.insert(E); +        } +      } +    } +  } +  return RemainingGadgets; +} + +std::unique_ptr<MachineGadgetGraph> +X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges( +    std::unique_ptr<MachineGadgetGraph> Graph) const { +  MachineGadgetGraph::NodeSet ElimNodes{*Graph}; +  MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; +  int RemainingGadgets = +      elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes); +  if (ElimEdges.empty() && ElimNodes.empty()) { +    Graph->NumFences = 0; +    Graph->NumGadgets = RemainingGadgets; +  } else { +    Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */, +                               RemainingGadgets); +  } +  return Graph; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin( +    MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { +  int FencesInserted = 0; + +  do { +    LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); +    Graph = trimMitigatedEdges(std::move(Graph)); +    LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); +    if (Graph->NumGadgets == 0) +      break; + +    LLVM_DEBUG(dbgs() << "Cutting edges...\n"); +    EdgeSet CutEdges{*Graph}; +    auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() + +                                                  1 /* terminator node */); +    auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size()); +    auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size()); +    auto EdgeValues = std::make_unique<int[]>(Graph->edges_size()); +    for (const auto &N : Graph->nodes()) { +      Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin()); +    } +    Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node +    for (const auto &E : Graph->edges()) { +      Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest()); +      EdgeValues[Graph->getEdgeIndex(E)] = E.getValue(); +    } +    OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(), +                EdgeCuts.get(), Graph->edges_size()); +    for (int I = 0; I < Graph->edges_size(); ++I) +      if (EdgeCuts[I]) +        CutEdges.set(I); +    LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); +    LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + +    LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); +    FencesInserted += insertFences(MF, *Graph, CutEdges); +    LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); +    LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + +    Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph}, +                               CutEdges); +  } while (true); + +  return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic( +    MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const { +  LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); +  Graph = trimMitigatedEdges(std::move(Graph)); +  LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); +  if (Graph->NumGadgets == 0) +    return 0; + +  LLVM_DEBUG(dbgs() << "Cutting edges...\n"); +  MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph}; +  MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph}; +  auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) { +    return !ElimEdges.contains(E) && !CutEdges.contains(E) && +           MachineGadgetGraph::isCFGEdge(E); +  }; +  auto IsGadgetEdge = [&ElimEdges, +                       &CutEdges](const MachineGadgetGraph::Edge &E) { +    return !ElimEdges.contains(E) && !CutEdges.contains(E) && +           MachineGadgetGraph::isGadgetEdge(E); +  }; + +  // FIXME: this is O(E^2), we could probably do better. +  do { +    // Find the cheapest CFG edge that will eliminate a gadget (by being +    // egress from a SOURCE node or ingress to a SINK node), and cut it. +    const MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + +    // First, collect all gadget source and sink nodes. +    MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph}; +    for (const auto &N : Graph->nodes()) { +      if (ElimNodes.contains(N)) +        continue; +      for (const auto &E : N.edges()) { +        if (IsGadgetEdge(E)) { +          GadgetSources.insert(N); +          GadgetSinks.insert(*E.getDest()); +        } +      } +    } + +    // Next, look for the cheapest CFG edge which, when cut, is guaranteed to +    // mitigate at least one gadget by either: +    // (a) being egress from a gadget source, or +    // (b) being ingress to a gadget sink. +    for (const auto &N : Graph->nodes()) { +      if (ElimNodes.contains(N)) +        continue; +      for (const auto &E : N.edges()) { +        if (IsCFGEdge(E)) { +          if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) { +            if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue()) +              CheapestSoFar = &E; +          } +        } +      } +    } + +    assert(CheapestSoFar && "Failed to cut an edge"); +    CutEdges.insert(*CheapestSoFar); +    ElimEdges.insert(*CheapestSoFar); +  } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes)); +  LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); +  LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); + +  LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); +  int FencesInserted = insertFences(MF, *Graph, CutEdges); +  LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); +  LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + +  return FencesInserted; +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( +    MachineFunction &MF, MachineGadgetGraph &G, +    EdgeSet &CutEdges /* in, out */) const { +  int FencesInserted = 0; +  for (const auto &N : G.nodes()) { +    for (const auto &E : N.edges()) { +      if (CutEdges.contains(E)) { +        MachineInstr *MI = N.getValue(), *Prev; +        MachineBasicBlock *MBB;                  // Insert an LFENCE in this MBB +        MachineBasicBlock::iterator InsertionPt; // ...at this point +        if (MI == MachineGadgetGraph::ArgNodeSentinel) { +          // insert LFENCE at beginning of entry block +          MBB = &MF.front(); +          InsertionPt = MBB->begin(); +          Prev = nullptr; +        } else if (MI->isBranch()) { // insert the LFENCE before the branch +          MBB = MI->getParent(); +          InsertionPt = MI; +          Prev = MI->getPrevNode(); +          // Remove all egress CFG edges from this branch because the inserted +          // LFENCE prevents gadgets from crossing the branch. +          for (const auto &E : N.edges()) { +            if (MachineGadgetGraph::isCFGEdge(E)) +              CutEdges.insert(E); +          } +        } else { // insert the LFENCE after the instruction +          MBB = MI->getParent(); +          InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); +          Prev = InsertionPt == MBB->end() +                     ? (MBB->empty() ? nullptr : &MBB->back()) +                     : InsertionPt->getPrevNode(); +        } +        // Ensure this insertion is not redundant (two LFENCEs in sequence). +        if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && +            (!Prev || !isFence(Prev))) { +          BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); +          ++FencesInserted; +        } +      } +    } +  } +  return FencesInserted; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( +    const MachineInstr &MI, unsigned Reg) const { +  if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || +      MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) +    return false; + +  // FIXME: This does not handle pseudo loading instruction like TCRETURN* +  const MCInstrDesc &Desc = MI.getDesc(); +  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); +  if (MemRefBeginIdx < 0) { +    LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading " +                         "instruction:\n"; +               MI.print(dbgs()); dbgs() << '\n';); +    return false; +  } +  MemRefBeginIdx += X86II::getOperandBias(Desc); + +  const MachineOperand &BaseMO = +      MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); +  const MachineOperand &IndexMO = +      MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); +  return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister && +          TRI->regsOverlap(BaseMO.getReg(), Reg)) || +         (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister && +          TRI->regsOverlap(IndexMO.getReg(), Reg)); +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch( +    const MachineInstr &MI, unsigned Reg) const { +  if (!MI.isConditionalBranch()) +    return false; +  for (const MachineOperand &Use : MI.uses()) +    if (Use.isReg() && Use.getReg() == Reg) +      return true; +  return false; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, +                      "X86 LVI load hardening", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, +                    "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() { +  return new X86LoadValueInjectionLoadHardeningPass(); +} + +namespace { + +/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive +/// analysis passes that add complexity to the pipeline. This complexity +/// can cause noticable overhead when no optimizations are enabled, i.e., -O0. +/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to +/// provide the same security as the optimized pass, but without adding +/// unnecessary complexity to the LLVM pipeline. +/// +/// The behavior of this pass is simply to insert an LFENCE after every load +/// instruction. +class X86LoadValueInjectionLoadHardeningUnoptimizedPass +    : public MachineFunctionPass { +public: +  X86LoadValueInjectionLoadHardeningUnoptimizedPass() +      : MachineFunctionPass(ID) {} + +  StringRef getPassName() const override { +    return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)"; +  } +  bool runOnMachineFunction(MachineFunction &MF) override; +  static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0; + +bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction( +    MachineFunction &MF) { +  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() +                    << " *****\n"); +  const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>(); +  if (!STI->useLVILoadHardening()) +    return false; + +  // FIXME: support 32-bit +  if (!STI->is64Bit()) +    report_fatal_error("LVI load hardening is only supported on 64-bit", false); + +  // Don't skip functions with the "optnone" attr but participate in opt-bisect. +  const Function &F = MF.getFunction(); +  if (!F.hasOptNone() && skipFunction(F)) +    return false; + +  bool Modified = false; +  ++NumFunctionsConsidered; + +  const TargetInstrInfo *TII = STI->getInstrInfo(); +  for (auto &MBB : MF) { +    for (auto &MI : MBB) { +      if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE || +          MI.getOpcode() == X86::MFENCE) +        continue; + +      MachineBasicBlock::iterator InsertionPt = +          MI.getNextNode() ? MI.getNextNode() : MBB.end(); +      BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); +      ++NumFences; +      Modified = true; +    } +  } + +  if (Modified) +    ++NumFunctionsMitigated; + +  return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY, +                "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() { +  return new X86LoadValueInjectionLoadHardeningUnoptimizedPass(); +} diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp new file mode 100644 index 000000000000..6e1134a25950 --- /dev/null +++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -0,0 +1,143 @@ +//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: Replaces every `ret` instruction with the sequence: +/// ``` +/// pop <scratch-reg> +/// lfence +/// jmp *<scratch-reg> +/// ``` +/// where `<scratch-reg>` is some available scratch register, according to the +/// calling convention of the function being mitigated. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include <bitset> + +using namespace llvm; + +#define PASS_KEY "x86-lvi-ret" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " +                                 "were deployed"); + +namespace { + +class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass { +public: +  X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {} +  StringRef getPassName() const override { +    return "X86 Load Value Injection (LVI) Ret-Hardening"; +  } +  bool runOnMachineFunction(MachineFunction &MF) override; + +  static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionRetHardeningPass::ID = 0; + +bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( +    MachineFunction &MF) { +  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() +                    << " *****\n"); +  const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>(); +  if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit()) +    return false; // FIXME: support 32-bit + +  // Don't skip functions with the "optnone" attr but participate in opt-bisect. +  const Function &F = MF.getFunction(); +  if (!F.hasOptNone() && skipFunction(F)) +    return false; + +  ++NumFunctionsConsidered; +  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); +  const X86InstrInfo *TII = Subtarget->getInstrInfo(); +  unsigned ClobberReg = X86::NoRegister; +  std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s; +  UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer +  UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer +  UnclobberableGR64s.set(X86::RAX); // used for function return +  UnclobberableGR64s.set(X86::RDX); // used for function return + +  // We can clobber any register allowed by the function's calling convention. +  for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR) +    UnclobberableGR64s.set(Reg); +  for (auto &Reg : X86::GR64RegClass) { +    if (!UnclobberableGR64s.test(Reg)) { +      ClobberReg = Reg; +      break; +    } +  } + +  if (ClobberReg != X86::NoRegister) { +    LLVM_DEBUG(dbgs() << "Selected register " +                      << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg) +                      << " to clobber\n"); +  } else { +    LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n"); +  } + +  bool Modified = false; +  for (auto &MBB : MF) { +    if (MBB.empty()) +      continue; + +    MachineInstr &MI = MBB.back(); +    if (MI.getOpcode() != X86::RETQ) +      continue; + +    if (ClobberReg != X86::NoRegister) { +      MBB.erase_instr(&MI); +      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r)) +          .addReg(ClobberReg, RegState::Define) +          .setMIFlag(MachineInstr::FrameDestroy); +      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE)); +      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r)) +          .addReg(ClobberReg); +    } else { +      // In case there is no available scratch register, we can still read from +      // RSP to assert that RSP points to a valid page. The write to RSP is +      // also helpful because it verifies that the stack's write permissions +      // are intact. +      MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); +      addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), +                   X86::RSP, false, 0) +          .addImm(0) +          ->addRegisterDead(X86::EFLAGS, TRI); +    } + +    ++NumFences; +    Modified = true; +  } + +  if (Modified) +    ++NumFunctionsMitigated; +  return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY, +                "X86 LVI ret hardener", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() { +  return new X86LoadValueInjectionRetHardeningPass(); +} diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 7f49c6e861d4..f5caaaae4d84 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,        break;      case MachineOperand::MO_Register:        // FIXME: Add retpoline support and remove this. -      if (Subtarget->useRetpolineIndirectCalls()) -        report_fatal_error("Lowering register statepoints with retpoline not " +      if (Subtarget->useIndirectThunkCalls()) +        report_fatal_error("Lowering register statepoints with thunks not "                             "yet implemented.");        CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());        CallOpcode = X86::CALL64r; @@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,      EmitAndCountInstruction(          MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));      // FIXME: Add retpoline support and remove this. -    if (Subtarget->useRetpolineIndirectCalls()) +    if (Subtarget->useIndirectThunkCalls())        report_fatal_error( -          "Lowering patchpoint with retpoline not yet implemented."); +          "Lowering patchpoint with thunks not yet implemented.");      EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));    } diff --git a/llvm/lib/Target/X86/X86RetpolineThunks.cpp b/llvm/lib/Target/X86/X86RetpolineThunks.cpp deleted file mode 100644 index 9085d7f068ac..000000000000 --- a/llvm/lib/Target/X86/X86RetpolineThunks.cpp +++ /dev/null @@ -1,286 +0,0 @@ -//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86  --=====// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// \file -/// -/// Pass that injects an MI thunk implementing a "retpoline". This is -/// a RET-implemented trampoline that is used to lower indirect calls in a way -/// that prevents speculation on some x86 processors and can be used to mitigate -/// security vulnerabilities due to targeted speculative execution and side -/// channels such as CVE-2017-5715. -/// -/// TODO(chandlerc): All of this code could use better comments and -/// documentation. -/// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "x86-retpoline-thunks" - -static const char ThunkNamePrefix[] = "__llvm_retpoline_"; -static const char R11ThunkName[]    = "__llvm_retpoline_r11"; -static const char EAXThunkName[]    = "__llvm_retpoline_eax"; -static const char ECXThunkName[]    = "__llvm_retpoline_ecx"; -static const char EDXThunkName[]    = "__llvm_retpoline_edx"; -static const char EDIThunkName[]    = "__llvm_retpoline_edi"; - -namespace { -class X86RetpolineThunks : public MachineFunctionPass { -public: -  static char ID; - -  X86RetpolineThunks() : MachineFunctionPass(ID) {} - -  StringRef getPassName() const override { return "X86 Retpoline Thunks"; } - -  bool doInitialization(Module &M) override; -  bool runOnMachineFunction(MachineFunction &F) override; - -  void getAnalysisUsage(AnalysisUsage &AU) const override { -    MachineFunctionPass::getAnalysisUsage(AU); -    AU.addRequired<MachineModuleInfoWrapperPass>(); -    AU.addPreserved<MachineModuleInfoWrapperPass>(); -  } - -private: -  MachineModuleInfo *MMI = nullptr; -  const TargetMachine *TM = nullptr; -  bool Is64Bit = false; -  const X86Subtarget *STI = nullptr; -  const X86InstrInfo *TII = nullptr; - -  bool InsertedThunks = false; - -  void createThunkFunction(Module &M, StringRef Name); -  void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); -  void populateThunk(MachineFunction &MF, unsigned Reg); -}; - -} // end anonymous namespace - -FunctionPass *llvm::createX86RetpolineThunksPass() { -  return new X86RetpolineThunks(); -} - -char X86RetpolineThunks::ID = 0; - -bool X86RetpolineThunks::doInitialization(Module &M) { -  InsertedThunks = false; -  return false; -} - -bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) { -  LLVM_DEBUG(dbgs() << getPassName() << '\n'); - -  TM = &MF.getTarget();; -  STI = &MF.getSubtarget<X86Subtarget>(); -  TII = STI->getInstrInfo(); -  Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; - -  MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); -  Module &M = const_cast<Module &>(*MMI->getModule()); - -  // If this function is not a thunk, check to see if we need to insert -  // a thunk. -  if (!MF.getName().startswith(ThunkNamePrefix)) { -    // If we've already inserted a thunk, nothing else to do. -    if (InsertedThunks) -      return false; - -    // Only add a thunk if one of the functions has the retpoline feature -    // enabled in its subtarget, and doesn't enable external thunks. -    // FIXME: Conditionalize on indirect calls so we don't emit a thunk when -    // nothing will end up calling it. -    // FIXME: It's a little silly to look at every function just to enumerate -    // the subtargets, but eventually we'll want to look at them for indirect -    // calls, so maybe this is OK. -    if ((!STI->useRetpolineIndirectCalls() && -         !STI->useRetpolineIndirectBranches()) || -        STI->useRetpolineExternalThunk()) -      return false; - -    // Otherwise, we need to insert the thunk. -    // WARNING: This is not really a well behaving thing to do in a function -    // pass. We extract the module and insert a new function (and machine -    // function) directly into the module. -    if (Is64Bit) -      createThunkFunction(M, R11ThunkName); -    else -      for (StringRef Name : -           {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName}) -        createThunkFunction(M, Name); -    InsertedThunks = true; -    return true; -  } - -  // If this *is* a thunk function, we need to populate it with the correct MI. -  if (Is64Bit) { -    assert(MF.getName() == "__llvm_retpoline_r11" && -           "Should only have an r11 thunk on 64-bit targets"); - -    // __llvm_retpoline_r11: -    //   callq .Lr11_call_target -    // .Lr11_capture_spec: -    //   pause -    //   lfence -    //   jmp .Lr11_capture_spec -    // .align 16 -    // .Lr11_call_target: -    //   movq %r11, (%rsp) -    //   retq -    populateThunk(MF, X86::R11); -  } else { -    // For 32-bit targets we need to emit a collection of thunks for various -    // possible scratch registers as well as a fallback that uses EDI, which is -    // normally callee saved. -    //   __llvm_retpoline_eax: -    //         calll .Leax_call_target -    //   .Leax_capture_spec: -    //         pause -    //         jmp .Leax_capture_spec -    //   .align 16 -    //   .Leax_call_target: -    //         movl %eax, (%esp)  # Clobber return addr -    //         retl -    // -    //   __llvm_retpoline_ecx: -    //   ... # Same setup -    //         movl %ecx, (%esp) -    //         retl -    // -    //   __llvm_retpoline_edx: -    //   ... # Same setup -    //         movl %edx, (%esp) -    //         retl -    // -    //   __llvm_retpoline_edi: -    //   ... # Same setup -    //         movl %edi, (%esp) -    //         retl -    if (MF.getName() == EAXThunkName) -      populateThunk(MF, X86::EAX); -    else if (MF.getName() == ECXThunkName) -      populateThunk(MF, X86::ECX); -    else if (MF.getName() == EDXThunkName) -      populateThunk(MF, X86::EDX); -    else if (MF.getName() == EDIThunkName) -      populateThunk(MF, X86::EDI); -    else -      llvm_unreachable("Invalid thunk name on x86-32!"); -  } - -  return true; -} - -void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { -  assert(Name.startswith(ThunkNamePrefix) && -         "Created a thunk with an unexpected prefix!"); - -  LLVMContext &Ctx = M.getContext(); -  auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); -  Function *F = -      Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); -  F->setVisibility(GlobalValue::HiddenVisibility); -  F->setComdat(M.getOrInsertComdat(Name)); - -  // Add Attributes so that we don't create a frame, unwind information, or -  // inline. -  AttrBuilder B; -  B.addAttribute(llvm::Attribute::NoUnwind); -  B.addAttribute(llvm::Attribute::Naked); -  F->addAttributes(llvm::AttributeList::FunctionIndex, B); - -  // Populate our function a bit so that we can verify. -  BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); -  IRBuilder<> Builder(Entry); - -  Builder.CreateRetVoid(); - -  // MachineFunctions/MachineBasicBlocks aren't created automatically for the -  // IR-level constructs we already made. Create them and insert them into the -  // module. -  MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); -  MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); - -  // Insert EntryMBB into MF. It's not in the module until we do this. -  MF.insert(MF.end(), EntryMBB); -} - -void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, -                                                    unsigned Reg) { -  const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; -  const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP; -  addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0) -      .addReg(Reg); -} - -void X86RetpolineThunks::populateThunk(MachineFunction &MF, -                                       unsigned Reg) { -  // Set MF properties. We never use vregs... -  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); - -  // Grab the entry MBB and erase any other blocks. O0 codegen appears to -  // generate two bbs for the entry block. -  MachineBasicBlock *Entry = &MF.front(); -  Entry->clear(); -  while (MF.size() > 1) -    MF.erase(std::next(MF.begin())); - -  MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); -  MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock()); -  MCSymbol *TargetSym = MF.getContext().createTempSymbol(); -  MF.push_back(CaptureSpec); -  MF.push_back(CallTarget); - -  const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; -  const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; - -  Entry->addLiveIn(Reg); -  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); - -  // The MIR verifier thinks that the CALL in the entry block will fall through -  // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is -  // the successor, but the MIR verifier doesn't know how to cope with that. -  Entry->addSuccessor(CaptureSpec); - -  // In the capture loop for speculation, we want to stop the processor from -  // speculating as fast as possible. On Intel processors, the PAUSE instruction -  // will block speculation without consuming any execution resources. On AMD -  // processors, the PAUSE instruction is (essentially) a nop, so we also use an -  // LFENCE instruction which they have advised will stop speculation as well -  // with minimal resource utilization. We still end the capture with a jump to -  // form an infinite loop to fully guarantee that no matter what implementation -  // of the x86 ISA, speculating this code path never escapes. -  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); -  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); -  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); -  CaptureSpec->setHasAddressTaken(); -  CaptureSpec->addSuccessor(CaptureSpec); - -  CallTarget->addLiveIn(Reg); -  CallTarget->setHasAddressTaken(); -  CallTarget->setAlignment(Align(16)); -  insertRegReturnAddrClobber(*CallTarget, Reg); -  CallTarget->back().setPreInstrSymbol(MF, TargetSym); -  BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); -} diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index f4e8d30328ca..af5153243c8b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -421,6 +421,16 @@ protected:    /// than emitting one inside the compiler.    bool UseRetpolineExternalThunk = false; +  /// Prevent generation of indirect call/branch instructions from memory, +  /// and force all indirect call/branch instructions from a register to be +  /// preceded by an LFENCE. Also decompose RET instructions into a +  /// POP+LFENCE+JMP sequence. +  bool UseLVIControlFlowIntegrity = false; + +  /// Insert LFENCE instructions to prevent data speculatively injected into +  /// loads from being used maliciously. +  bool UseLVILoadHardening = false; +    /// Use software floating point for code generation.    bool UseSoftFloat = false; @@ -707,8 +717,21 @@ public:      return UseRetpolineIndirectBranches;    }    bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } + +  // These are generic getters that OR together all of the thunk types +  // supported by the subtarget. Therefore useIndirectThunk*() will return true +  // if any respective thunk feature is enabled. +  bool useIndirectThunkCalls() const { +    return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity(); +  } +  bool useIndirectThunkBranches() const { +    return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); +  } +    bool preferMaskRegisters() const { return PreferMaskRegisters; }    bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } +  bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } +  bool useLVILoadHardening() const { return UseLVILoadHardening; }    unsigned getPreferVectorWidth() const { return PreferVectorWidth; }    unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -853,10 +876,10 @@ public:    /// Return true if the subtarget allows calls to immediate address.    bool isLegalToCallImmediateAddr() const; -  /// If we are using retpolines, we need to expand indirectbr to avoid it +  /// If we are using indirect thunks, we need to expand indirectbr to avoid it    /// lowering to an actual indirect jump.    bool enableIndirectBrExpand() const override { -    return useRetpolineIndirectBranches(); +    return useIndirectThunkBranches();    }    /// Enable the MachineScheduler pass for all X86 subtargets. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 7176e46f07b1..9f639ffa22ec 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {    initializeX86SpeculativeLoadHardeningPassPass(PR);    initializeX86FlagsCopyLoweringPassPass(PR);    initializeX86CondBrFoldingPassPass(PR); +  initializeX86LoadValueInjectionLoadHardeningPassPass(PR); +  initializeX86LoadValueInjectionRetHardeningPassPass(PR);    initializeX86OptimizeLEAPassPass(PR);  } @@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {  void X86PassConfig::addPostRegAlloc() {    addPass(createX86FloatingPointStackifierPass()); +  if (getOptLevel() != CodeGenOpt::None) +    addPass(createX86LoadValueInjectionLoadHardeningPass()); +  else +    addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());  }  void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } @@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {    const Triple &TT = TM->getTargetTriple();    const MCAsmInfo *MAI = TM->getMCAsmInfo(); -  addPass(createX86RetpolineThunksPass()); +  addPass(createX86IndirectThunksPass());    // Insert extra int3 instructions after trailing call instructions to avoid    // issues in the unwinder. @@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {    // Identify valid longjmp targets for Windows Control Flow Guard.    if (TT.isOSWindows())      addPass(createCFGuardLongjmpPass()); +  addPass(createX86LoadValueInjectionRetHardeningPass());  }  std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index ec976a971e3c..23561c25c50a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {      Constant *C2;      // C-(C2-X) --> X+(C-C2) -    if (match(Op1, m_Sub(m_Constant(C2), m_Value(X)))) +    if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))        return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));      // C-(X+C2) --> (C-C2)-X | 
