diff options
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/Analysis/ValueTracking.cpp | 63 | ||||
| -rw-r--r-- | lib/CodeGen/BranchFolding.cpp | 5 | ||||
| -rw-r--r-- | lib/CodeGen/MachineFunction.cpp | 14 | ||||
| -rw-r--r-- | lib/CodeGen/MachineInstr.cpp | 6 | ||||
| -rw-r--r-- | lib/CodeGen/ScheduleDAGInstrs.cpp | 63 | ||||
| -rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 39 | ||||
| -rw-r--r-- | lib/CodeGen/StackColoring.cpp | 58 | ||||
| -rw-r--r-- | lib/IR/ConstantFold.cpp | 12 | ||||
| -rw-r--r-- | lib/Object/COFFImportFile.cpp | 13 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp | 12 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64FrameLowering.cpp | 52 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 4 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.td | 28 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64MacroFusion.cpp | 4 | ||||
| -rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 11 | ||||
| -rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 3 | ||||
| -rw-r--r-- | lib/Transforms/IPO/ArgumentPromotion.cpp | 2 | ||||
| -rw-r--r-- | lib/Transforms/Instrumentation/AddressSanitizer.cpp | 3 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 3 | ||||
| -rw-r--r-- | lib/Transforms/Utils/CloneModule.cpp | 3 | 
20 files changed, 264 insertions, 134 deletions
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 9e042da8801d..439b21a81258 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3277,6 +3277,69 @@ void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,    } while (!Worklist.empty());  } +/// This is the function that does the work of looking through basic +/// ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { +  do { +    if (const Operator *U = dyn_cast<Operator>(V)) { +      // If we find a ptrtoint, we can transfer control back to the +      // regular getUnderlyingObjectFromInt. +      if (U->getOpcode() == Instruction::PtrToInt) +        return U->getOperand(0); +      // If we find an add of a constant, a multiplied value, or a phi, it's +      // likely that the other operand will lead us to the base +      // object. We don't have to worry about the case where the +      // object address is somehow being computed by the multiply, +      // because our callers only care when the result is an +      // identifiable object. +      if (U->getOpcode() != Instruction::Add || +          (!isa<ConstantInt>(U->getOperand(1)) && +           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && +           !isa<PHINode>(U->getOperand(1)))) +        return V; +      V = U->getOperand(0); +    } else { +      return V; +    } +    assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); +  } while (true); +} + +/// This is a wrapper around GetUnderlyingObjects and adds support for basic +/// ptrtoint+arithmetic+inttoptr sequences. +void llvm::getUnderlyingObjectsForCodeGen(const Value *V, +                          SmallVectorImpl<Value *> &Objects, +                          const DataLayout &DL) { +  SmallPtrSet<const Value *, 16> Visited; +  SmallVector<const Value *, 4> Working(1, V); +  do { +    V = Working.pop_back_val(); + +    SmallVector<Value *, 4> Objs; +    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); + +    for (Value *V : Objs) { +      if (!Visited.insert(V).second) +        continue; +      if (Operator::getOpcode(V) == Instruction::IntToPtr) { +        const Value *O = +          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); +        if (O->getType()->isPointerTy()) { +          Working.push_back(O); +          continue; +        } +      } +      // If GetUnderlyingObjects fails to find an identifiable object, +      // getUnderlyingObjectsForCodeGen also fails for safety. +      if (!isIdentifiedObject(V)) { +        Objects.clear(); +        return; +      } +      Objects.push_back(const_cast<Value *>(V)); +    } +  } while (!Working.empty()); +} +  /// Return true if the only users of this pointer are lifetime markers.  bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {    for (const User *U : V->users()) { diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 530954976292..3c439e66944b 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1475,13 +1475,14 @@ ReoptimizeBlock:        bool PredAnalyzable =            !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true); -      if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) { +      if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB && +          PredTBB != PredFBB) {          // The predecessor has a conditional branch to this block which consists          // of only a tail call. Try to fold the tail call into the conditional          // branch.          if (TII->canMakeTailCallConditional(PredCond, TailCall)) {            // TODO: It would be nice if analyzeBranch() could provide a pointer -          // to the branch insturction so replaceBranchWithTailCall() doesn't +          // to the branch instruction so replaceBranchWithTailCall() doesn't            // have to search for it.            TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);            ++NumTailCalls; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index f88e175a9776..742b095d955e 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -330,6 +330,20 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,                                 MMO->getOrdering(), MMO->getFailureOrdering());  } +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, +                                      const AAMDNodes &AAInfo) { +  MachinePointerInfo MPI = MMO->getValue() ? +             MachinePointerInfo(MMO->getValue(), MMO->getOffset()) : +             MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()); + +  return new (Allocator) +             MachineMemOperand(MPI, MMO->getFlags(), MMO->getSize(), +                               MMO->getBaseAlignment(), AAInfo, +                               MMO->getRanges(), MMO->getSyncScopeID(), +                               MMO->getOrdering(), MMO->getFailureOrdering()); +} +  MachineInstr::mmo_iterator  MachineFunction::allocateMemRefsArray(unsigned long Num) {    return Allocator.Allocate<MachineMemOperand *>(Num); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index afea5575a3ae..535757ed87c1 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -578,10 +578,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,    if (BasePtr == nullptr)      return false; -  return isDereferenceableAndAlignedPointer(BasePtr, 1, -                                            APInt(DL.getPointerSize(), -                                                  Offset + Size), -                                            DL); +  return isDereferenceableAndAlignedPointer( +      BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);  }  /// getConstantPool - Return a MachinePointerInfo record that refers to the diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ccd937950a74..99baa07390eb 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -121,63 +121,6 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,    SchedModel.init(ST.getSchedModel(), &ST, TII);  } -/// This is the function that does the work of looking through basic -/// ptrtoint+arithmetic+inttoptr sequences. -static const Value *getUnderlyingObjectFromInt(const Value *V) { -  do { -    if (const Operator *U = dyn_cast<Operator>(V)) { -      // If we find a ptrtoint, we can transfer control back to the -      // regular getUnderlyingObjectFromInt. -      if (U->getOpcode() == Instruction::PtrToInt) -        return U->getOperand(0); -      // If we find an add of a constant, a multiplied value, or a phi, it's -      // likely that the other operand will lead us to the base -      // object. We don't have to worry about the case where the -      // object address is somehow being computed by the multiply, -      // because our callers only care when the result is an -      // identifiable object. -      if (U->getOpcode() != Instruction::Add || -          (!isa<ConstantInt>(U->getOperand(1)) && -           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && -           !isa<PHINode>(U->getOperand(1)))) -        return V; -      V = U->getOperand(0); -    } else { -      return V; -    } -    assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); -  } while (true); -} - -/// This is a wrapper around GetUnderlyingObjects and adds support for basic -/// ptrtoint+arithmetic+inttoptr sequences. -static void getUnderlyingObjects(const Value *V, -                                 SmallVectorImpl<Value *> &Objects, -                                 const DataLayout &DL) { -  SmallPtrSet<const Value *, 16> Visited; -  SmallVector<const Value *, 4> Working(1, V); -  do { -    V = Working.pop_back_val(); - -    SmallVector<Value *, 4> Objs; -    GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); - -    for (Value *V : Objs) { -      if (!Visited.insert(V).second) -        continue; -      if (Operator::getOpcode(V) == Instruction::IntToPtr) { -        const Value *O = -          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); -        if (O->getType()->isPointerTy()) { -          Working.push_back(O); -          continue; -        } -      } -      Objects.push_back(const_cast<Value *>(V)); -    } -  } while (!Working.empty()); -} -  /// If this machine instr has memory reference information and it can be tracked  /// to a normal reference to a known object, return the Value for that object.  static void getUnderlyingObjectsForInstr(const MachineInstr *MI, @@ -208,12 +151,10 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,          Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));        } else if (const Value *V = MMO->getValue()) {          SmallVector<Value *, 4> Objs; -        getUnderlyingObjects(V, Objs, DL); +        getUnderlyingObjectsForCodeGen(V, Objs, DL);          for (Value *V : Objs) { -          if (!isIdentifiedObject(V)) -            return false; - +          assert(isIdentifiedObject(V));            Objects.push_back(UnderlyingObjectsVector::value_type(V, true));          }        } else diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 41c3f5f235ea..127312076207 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -99,6 +99,27 @@ LimitFPPrecision("limit-float-precision",  // store [4096 x i8] %data, [4096 x i8]* %buffer  static const unsigned MaxParallelChains = 64; +// True if the Value passed requires ABI mangling as it is a parameter to a +// function or a return value from a function which is not an intrinsic. +static bool isABIRegCopy(const Value * V) { +  const bool IsRetInst = V && isa<ReturnInst>(V); +  const bool IsCallInst = V && isa<CallInst>(V); +  const bool IsInLineAsm = +      IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm(); +  const bool IsIndirectFunctionCall = +      IsCallInst && !IsInLineAsm && +      !static_cast<const CallInst *>(V)->getCalledFunction(); +  // It is possible that the call instruction is an inline asm statement or an +  // indirect function call in which case the return value of +  // getCalledFunction() would be nullptr. +  const bool IsInstrinsicCall = +      IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall && +      static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() != +          Intrinsic::not_intrinsic; + +  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall)); +} +  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,                                        const SDValue *Parts, unsigned NumParts,                                        MVT PartVT, EVT ValueVT, const Value *V, @@ -1026,13 +1047,9 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {    if (It != FuncInfo.ValueMap.end()) {      unsigned InReg = It->second; -    bool IsABIRegCopy = -        V && ((isa<CallInst>(V) && -               !(static_cast<const CallInst *>(V))->isInlineAsm()) || -              isa<ReturnInst>(V));      RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), -                     DAG.getDataLayout(), InReg, Ty, IsABIRegCopy); +                     DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));      SDValue Chain = DAG.getEntryNode();      Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,                                   V); @@ -1221,13 +1238,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {    // If this is an instruction which fast-isel has deferred, select it now.    if (const Instruction *Inst = dyn_cast<Instruction>(V)) {      unsigned InReg = FuncInfo.InitializeRegForValue(Inst); -    bool IsABIRegCopy = -        V && ((isa<CallInst>(V) && -               !(static_cast<const CallInst *>(V))->isInlineAsm()) || -              isa<ReturnInst>(V));      RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, -                     Inst->getType(), IsABIRegCopy); +                     Inst->getType(), isABIRegCopy(V));      SDValue Chain = DAG.getEntryNode();      return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);    } @@ -8281,13 +8294,9 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    // If this is an InlineAsm we have to match the registers required, not the    // notional registers required by the type. -  bool IsABIRegCopy = -    V && ((isa<CallInst>(V) && -           !(static_cast<const CallInst *>(V))->isInlineAsm()) || -          isa<ReturnInst>(V));    RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, -                   V->getType(), IsABIRegCopy); +                   V->getType(), isABIRegCopy(V));    SDValue Chain = DAG.getEntryNode();    ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 6bac39c7ee77..e5fc5402cb41 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -37,6 +37,7 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/Passes.h"  #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGNodes.h"  #include "llvm/CodeGen/SlotIndexes.h"  #include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/WinEHFuncInfo.h" @@ -889,6 +890,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {    // Keep a list of *allocas* which need to be remapped.    DenseMap<const AllocaInst*, const AllocaInst*> Allocas; + +  // Keep a list of allocas which has been affected by the remap. +  SmallPtrSet<const AllocaInst*, 32> MergedAllocas; +    for (const std::pair<int, int> &SI : SlotRemap) {      const AllocaInst *From = MFI->getObjectAllocation(SI.first);      const AllocaInst *To = MFI->getObjectAllocation(SI.second); @@ -908,6 +913,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {        Inst = Cast;      } +    // We keep both slots to maintain AliasAnalysis metadata later. +    MergedAllocas.insert(From); +    MergedAllocas.insert(To); +      // Allow the stack protector to adjust its value map to account for the      // upcoming replacement.      SP->adjustForColoring(From, To); @@ -939,13 +948,6 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {        // Update the MachineMemOperand to use the new alloca.        for (MachineMemOperand *MMO : I.memoperands()) { -        // FIXME: In order to enable the use of TBAA when using AA in CodeGen, -        // we'll also need to update the TBAA nodes in MMOs with values -        // derived from the merged allocas. When doing this, we'll need to use -        // the same variant of GetUnderlyingObjects that is used by the -        // instruction scheduler (that can look through ptrtoint/inttoptr -        // pairs). -          // We've replaced IR-level uses of the remapped allocas, so we only          // need to replace direct uses here.          const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue()); @@ -997,6 +999,48 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {          MO.setIndex(ToSlot);          FixedInstr++;        } + +      // We adjust AliasAnalysis information for merged stack slots. +      MachineSDNode::mmo_iterator NewMemOps = +          MF->allocateMemRefsArray(I.getNumMemOperands()); +      unsigned MemOpIdx = 0; +      bool ReplaceMemOps = false; +      for (MachineMemOperand *MMO : I.memoperands()) { +        // If this memory location can be a slot remapped here, +        // we remove AA information. +        bool MayHaveConflictingAAMD = false; +        if (MMO->getAAInfo()) { +          if (const Value *MMOV = MMO->getValue()) { +            SmallVector<Value *, 4> Objs; +            getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout()); + +            if (Objs.empty()) +              MayHaveConflictingAAMD = true; +            else +              for (Value *V : Objs) { +                // If this memory location comes from a known stack slot +                // that is not remapped, we continue checking. +                // Otherwise, we need to invalidate AA infomation. +                const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V); +                if (AI && MergedAllocas.count(AI)) { +                  MayHaveConflictingAAMD = true; +                  break; +                } +              } +          } +        } +        if (MayHaveConflictingAAMD) { +          NewMemOps[MemOpIdx++] = MF->getMachineMemOperand(MMO, AAMDNodes()); +          ReplaceMemOps = true; +        } +        else +          NewMemOps[MemOpIdx++] = MMO; +      } + +      // If any memory operand is updated, set memory references of +      // this instruction. +      if (ReplaceMemOps) +        I.setMemRefs(std::make_pair(NewMemOps, I.getNumMemOperands()));      }    // Update the location of C++ catch objects for the MSVC personality routine. diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 23ccd8d4cf42..311b0a76ce8a 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -2097,15 +2097,19 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,        // Subsequent evaluation would get confused and produce erroneous results.        //        // The following prohibits such a GEP from being formed by checking to see -      // if the index is in-range with respect to an array or vector. +      // if the index is in-range with respect to an array. +      // TODO: This code may be extended to handle vectors as well.        bool PerformFold = false;        if (Idx0->isNullValue())          PerformFold = true;        else if (LastI.isSequential())          if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx0)) -          PerformFold = -              !LastI.isBoundedSequential() || -              isIndexInRangeOfArrayType(LastI.getSequentialNumElements(), CI); +          PerformFold = (!LastI.isBoundedSequential() || +                         isIndexInRangeOfArrayType( +                             LastI.getSequentialNumElements(), CI)) && +                        !CE->getOperand(CE->getNumOperands() - 1) +                             ->getType() +                             ->isVectorTy();        if (PerformFold) {          SmallVector<Value*, 16> NewIndices; diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index d1f46fdfa292..a515bc8ad16d 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -542,15 +542,12 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,    SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t);    //__imp_ String Table -  if (Imp) { -    SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + 7; -    writeStringTable(Buffer, {std::string("__imp_").append(Sym), -                              std::string("__imp_").append(Weak)}); -  } else { -    SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + 1; -    writeStringTable(Buffer, {Sym, Weak}); -  } +  StringRef Prefix = Imp ? "__imp_" : ""; +  SymbolTable[3].Name.Offset.Offset = +      sizeof(uint32_t) + Sym.size() + Prefix.size() + 1;    append(Buffer, SymbolTable); +  writeStringTable(Buffer, {(Prefix + Sym).str(), +                            (Prefix + Weak).str()});    // Copied here so we can still use writeStringTable    char *Buf = Alloc.Allocate<char>(Buffer.size()); diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 160107cd7e2b..d52cd84246a1 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -946,6 +946,18 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,    case AArch64::CMP_SWAP_128:      return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); +  case AArch64::AESMCrrTied: +  case AArch64::AESIMCrrTied: { +    MachineInstrBuilder MIB = +    BuildMI(MBB, MBBI, MI.getDebugLoc(), +            TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : +                                                      AArch64::AESIMCrr)) +      .add(MI.getOperand(0)) +      .add(MI.getOperand(1)); +    transferImpOps(MI, MIB, MIB); +    MI.eraseFromParent(); +    return true; +   }    }    return false;  } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 4907d082eda0..7c6a99990406 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -506,19 +506,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,      return;    } -  auto CSStackSize = AFI->getCalleeSavedStackSize(); +  bool IsWin64 = +      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); +  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + +  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;    // All of the remaining stack allocations are for locals. -  AFI->setLocalStackSize(NumBytes - CSStackSize); +  AFI->setLocalStackSize(NumBytes - PrologueSaveSize);    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);    if (CombineSPBump) {      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,                      MachineInstr::FrameSetup);      NumBytes = 0; -  } else if (CSStackSize != 0) { +  } else if (PrologueSaveSize != 0) {      MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, -                                                     -CSStackSize); -    NumBytes -= CSStackSize; +                                                     -PrologueSaveSize); +    NumBytes -= PrologueSaveSize;    }    assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -532,8 +536,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,      ++MBBI;    }    if (HasFP) { -    // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. -    int FPOffset = CSStackSize - 16; +    // Only set up FP if we actually need to. Frame pointer is fp = +    // sp - fixedobject - 16. +    int FPOffset = AFI->getCalleeSavedStackSize() - 16;      if (CombineSPBump)        FPOffset += AFI->getLocalStackSize(); @@ -672,8 +677,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,      if (HasFP) {        // Define the current CFA rule to use the provided FP.        unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); -      unsigned CFIIndex = MF.addFrameInst( -          MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); +      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( +          nullptr, Reg, 2 * StackGrowth - FixedObject));        BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))            .addCFIIndex(CFIIndex)            .setMIFlags(MachineInstr::FrameSetup); @@ -759,12 +764,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps    // it as the 2nd argument of AArch64ISD::TC_RETURN. -  auto CSStackSize = AFI->getCalleeSavedStackSize(); +  bool IsWin64 = +      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); +  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + +  auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); -  if (!CombineSPBump && CSStackSize != 0) +  if (!CombineSPBump && PrologueSaveSize != 0)      convertCalleeSaveRestoreToSPPrePostIncDec( -        MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize); +        MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);    // Move past the restores of the callee-saved registers.    MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); @@ -786,7 +795,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,      return;    } -  NumBytes -= CSStackSize; +  NumBytes -= PrologueSaveSize;    assert(NumBytes >= 0 && "Negative stack allocation size!?");    if (!hasFP(MF)) { @@ -796,7 +805,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,      if (RedZone && ArgumentPopSize == 0)        return; -    bool NoCalleeSaveRestore = CSStackSize == 0; +    bool NoCalleeSaveRestore = PrologueSaveSize == 0;      int StackRestoreBytes = RedZone ? 0 : NumBytes;      if (NoCalleeSaveRestore)        StackRestoreBytes += ArgumentPopSize; @@ -815,7 +824,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,    // be able to save any instructions.    if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -                    -CSStackSize + 16, TII, MachineInstr::FrameDestroy); +                    -AFI->getCalleeSavedStackSize() + 16, TII, +                    MachineInstr::FrameDestroy);    else if (NumBytes)      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,                      MachineInstr::FrameDestroy); @@ -845,7 +855,11 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,    const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(        MF.getSubtarget().getRegisterInfo());    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); -  int FPOffset = MFI.getObjectOffset(FI) + 16; +  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); +  bool IsWin64 = +      Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); +  unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; +  int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;    int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();    bool isFixed = MFI.isFixedObjectIndex(FI); @@ -956,12 +970,6 @@ static void computeCalleeSaveRegisterPairs(           "Odd number of callee-saved regs to spill!");    int Offset = AFI->getCalleeSavedStackSize(); -  unsigned GPRSaveSize = AFI->getVarArgsGPRSize(); -  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); -  bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); -  if (IsWin64) -    Offset -= alignTo(GPRSaveSize, 16); -    for (unsigned i = 0; i < Count; ++i) {      RegPairInfo RPI;      RPI.Reg1 = CSI[i].getReg(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 8c30c4410c09..9d879886d39d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9586,8 +9586,8 @@ static bool performTBISimplification(SDValue Addr,                                       SelectionDAG &DAG) {    APInt DemandedMask = APInt::getLowBitsSet(64, 56);    KnownBits Known; -  TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), -                                        DCI.isBeforeLegalizeOps()); +  TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), +                                        !DCI.isBeforeLegalizeOps());    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {      DCI.CommitTargetLoweringOpt(TLO); diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 0dcf07f98412..5049a39814f1 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -37,6 +37,9 @@ def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,                                   AssemblerPredicate<"FeatureFullFP16", "fullfp16">;  def HasSPE           : Predicate<"Subtarget->hasSPE()">,                                   AssemblerPredicate<"FeatureSPE", "spe">; +def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">, +                                 AssemblerPredicate<"FeatureFuseAES", +                                 "fuse-aes">;  def HasSVE           : Predicate<"Subtarget->hasSVE()">,                                   AssemblerPredicate<"FeatureSVE", "sve">; @@ -5304,6 +5307,31 @@ def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;  def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;  def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>; +// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required +// for AES fusion on some CPUs. +let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { +def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, +                        Sched<[WriteV]>; +def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, +                         Sched<[WriteV]>; +} + +// Only use constrained versions of AES(I)MC instructions if they are paired with +// AESE/AESD. +def : Pat<(v16i8 (int_aarch64_crypto_aesmc +            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), +                                            (v16i8 V128:$src2))))), +          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), +                                             (v16i8 V128:$src2)))))>, +          Requires<[HasFuseAES]>; + +def : Pat<(v16i8 (int_aarch64_crypto_aesimc +            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), +                                            (v16i8 V128:$src2))))), +          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), +                                              (v16i8 V128:$src2)))))>, +          Requires<[HasFuseAES]>; +  def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;  def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;  def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>; diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp index ccc9d2ad1b48..963cfadc54fd 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -118,11 +118,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,      // Fuse AES crypto operations.      switch(SecondOpcode) {      // AES encode. -    case AArch64::AESMCrr : +    case AArch64::AESMCrr: +    case AArch64::AESMCrrTied:        return FirstOpcode == AArch64::AESErr ||               FirstOpcode == AArch64::INSTRUCTION_LIST_END;      // AES decode.      case AArch64::AESIMCrr: +    case AArch64::AESIMCrrTied:        return FirstOpcode == AArch64::AESDrr ||               FirstOpcode == AArch64::INSTRUCTION_LIST_END;      } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ba8eb8656585..7563bffd8f87 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3984,6 +3984,13 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,    if (Offset != MFI.getObjectOffset(FI))      return false; +  // If this is not byval, check that the argument stack object is immutable. +  // inalloca and argument copy elision can create mutable argument stack +  // objects. Byval objects can be mutated, but a byval call intends to pass the +  // mutated memory. +  if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) +    return false; +    if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {      // If the argument location is wider than the argument type, check that any      // extension flags match. @@ -30605,8 +30612,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,      assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");      APInt DemandedMask(APInt::getSignMask(BitWidth));      KnownBits Known; -    TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), -                                          DCI.isBeforeLegalizeOps()); +    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), +                                          !DCI.isBeforeLegalizeOps());      if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) ||          TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {        // If we changed the computation somewhere in the DAG, this change will diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index fe87bbd99473..650e4fc8716c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3697,8 +3697,7 @@ let SchedRW = [WriteNop] in {  // Pause. This "instruction" is encoded as "rep; nop", so even though it  // was introduced with SSE2, it's backward compatible.  def PAUSE : I<0x90, RawFrm, (outs), (ins), -              "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, -              OBXS, Requires<[HasSSE2]>; +              "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, OBXS;  }  let SchedRW = [WriteFence] in { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 53223ab44316..72bae203ee94 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -356,7 +356,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,        // Just add all the struct element types.        Type *AgTy = cast<PointerType>(I->getType())->getElementType();        Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, -                                        "", InsertPt); +                                        I->getParamAlignment(), "", InsertPt);        StructType *STy = cast<StructType>(AgTy);        Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),                          nullptr}; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 057f746e052d..f8d255273b2a 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -756,7 +756,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {    bool runOnFunction() {      if (!ClStack) return false; -    if (ClRedzoneByvalArgs) copyArgsPassedByValToAllocas(); +    if (ClRedzoneByvalArgs && Mapping.Offset != kDynamicShadowSentinel) +      copyArgsPassedByValToAllocas();      // Collect alloca, ret, lifetime instructions etc.      for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index a738ebb4607e..4822cf7cce0f 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1790,7 +1790,8 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,    // variables that do not have their 'addresses taken'.  If they don't have    // their addresses taken, we can propagate constants through them.    for (GlobalVariable &G : M.globals()) -    if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G)) +    if (!G.isConstant() && G.hasLocalLinkage() && +        G.hasDefinitiveInitializer() && !AddressIsTaken(&G))        Solver.TrackValueOfGlobalVariable(&G);    // Solve for constants. diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index d27cb45c7d7f..e5392b53050d 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -132,7 +132,8 @@ std::unique_ptr<Module> llvm::CloneModule(      SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;      I->getAllMetadata(MDs);      for (auto MD : MDs) -      GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap)); +      GV->addMetadata(MD.first, +                      *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));      copyComdat(GV, &*I);    }  | 
