diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp')
| -rw-r--r-- | contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp | 612 | 
1 files changed, 611 insertions, 1 deletions
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 4e85708efafc..568b278dd47c 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -24,12 +24,13 @@  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" @@ -60,6 +61,7 @@  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/SimplifyLibCalls.h"  #include "llvm/Transforms/Utils/ValueMapper.h" +  using namespace llvm;  using namespace llvm::PatternMatch; @@ -84,6 +86,12 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");  STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");  STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, +          "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); +  static cl::opt<bool> DisableBranchOpts(    "disable-cgp-branch-opts", cl::Hidden, cl::init(false),    cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -144,6 +152,11 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,      cl::desc("Enable merging of redundant sexts when one is dominating"      " the other."), cl::init(true)); +static cl::opt<unsigned> MemCmpNumLoadsPerBlock( +    "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), +    cl::desc("The number of loads per basic block for inline expansion of " +             "memcmp that is only being compared against zero.")); +  namespace {  typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;  typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; @@ -1629,6 +1642,593 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,    return true;  } +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { +  struct ResultBlock { +    BasicBlock *BB; +    PHINode *PhiSrc1; +    PHINode *PhiSrc2; +    ResultBlock(); +  }; + +  CallInst *CI; +  ResultBlock ResBlock; +  unsigned MaxLoadSize; +  unsigned NumBlocks; +  unsigned NumBlocksNonOneByte; +  unsigned NumLoadsPerBlock; +  std::vector<BasicBlock *> LoadCmpBlocks; +  BasicBlock *EndBlock; +  PHINode *PhiRes; +  bool IsUsedForZeroCmp; +  int calculateNumBlocks(unsigned Size); +  void createLoadCmpBlocks(); +  void createResultBlock(); +  void setupResultBlockPHINodes(); +  void setupEndBlockPHINodes(); +  void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex, +                            bool IsLittleEndian); +  void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, +                                         unsigned &NumBytesProcessed); +  void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); +  void emitMemCmpResultBlock(bool IsLittleEndian); +  Value *getMemCmpExpansionZeroCase(unsigned Size, bool IsLittleEndian); +  unsigned getLoadSize(unsigned Size); +  unsigned getNumLoads(unsigned Size); + +public: +  MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize, +                  unsigned NumLoadsPerBlock); +  Value *getMemCmpExpansion(bool IsLittleEndian); +}; + +MemCmpExpansion::ResultBlock::ResultBlock() +    : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {} + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion(CallInst *CI, unsigned MaxLoadSize, +                                 unsigned NumLoadsPerBlock) +    : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(NumLoadsPerBlock) { + +  IRBuilder<> Builder(CI->getContext()); + +  BasicBlock *StartBlock = CI->getParent(); +  EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); +  setupEndBlockPHINodes(); +  IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + +  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); +  uint64_t Size = SizeCast->getZExtValue(); + +  // Calculate how many load compare blocks are required for an expansion of +  // given Size. +  NumBlocks = calculateNumBlocks(Size); +  createResultBlock(); + +  // If return value of memcmp is not used in a zero equality, we need to +  // calculate which source was larger. The calculation requires the +  // two loaded source values of each load compare block. +  // These will be saved in the phi nodes created by setupResultBlockPHINodes. +  if (!IsUsedForZeroCmp) +    setupResultBlockPHINodes(); + +  // Create the number of required load compare basic blocks. +  createLoadCmpBlocks(); + +  // Update the terminator added by splitBasicBlock to branch to the first +  // LoadCmpBlock. +  Builder.SetCurrentDebugLocation(CI->getDebugLoc()); +  StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); +} + +void MemCmpExpansion::createLoadCmpBlocks() { +  for (unsigned i = 0; i < NumBlocks; i++) { +    BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", +                                        EndBlock->getParent(), EndBlock); +    LoadCmpBlocks.push_back(BB); +  } +} + +void MemCmpExpansion::createResultBlock() { +  ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", +                                   EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp paramters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { +  IRBuilder<> Builder(CI->getContext()); + +  Value *Source1 = CI->getArgOperand(0); +  Value *Source2 = CI->getArgOperand(1); + +  Builder.SetInsertPoint(LoadCmpBlocks[Index]); +  Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); +  // Cast source to LoadSizeType* +  if (Source1->getType() != LoadSizeType) +    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); +  if (Source2->getType() != LoadSizeType) +    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + +  // Get the base address using the GEPIndex +  if (GEPIndex != 0) { +    Source1 = Builder.CreateGEP(LoadSizeType, Source1, +                                ConstantInt::get(LoadSizeType, GEPIndex)); +    Source2 = Builder.CreateGEP(LoadSizeType, Source2, +                                ConstantInt::get(LoadSizeType, GEPIndex)); +  } + +  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); +  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + +  LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); +  LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); +  Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + +  PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); + +  if (Index < (LoadCmpBlocks.size() - 1)) { +    // Early exit branch if difference found to EndBlock, otherwise continue to +    // next LoadCmpBlock + +    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, +                                    ConstantInt::get(Diff->getType(), 0)); +    BranchInst *CmpBr = +        BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); +    Builder.Insert(CmpBr); +  } else { +    // The last block has an unconditional branch to EndBlock +    BranchInst *CmpBr = BranchInst::Create(EndBlock); +    Builder.Insert(CmpBr); +  } +} + +unsigned MemCmpExpansion::getNumLoads(unsigned Size) { +  return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize); +} + +unsigned MemCmpExpansion::getLoadSize(unsigned Size) { +  return MinAlign(PowerOf2Floor(Size), MaxLoadSize); +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( +    unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { + +  IRBuilder<> Builder(CI->getContext()); + +  std::vector<Value *> XorList, OrList; +  Value *Diff; + +  unsigned RemainingBytes = Size - NumBytesProcessed; +  unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); +  unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); + +  Builder.SetInsertPoint(LoadCmpBlocks[Index]); + +  for (unsigned i = 0; i < NumLoads; ++i) { +    unsigned LoadSize = getLoadSize(RemainingBytes); +    unsigned GEPIndex = NumBytesProcessed / LoadSize; +    NumBytesProcessed += LoadSize; +    RemainingBytes -= LoadSize; + +    Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); +    Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + +    Value *Source1 = CI->getArgOperand(0); +    Value *Source2 = CI->getArgOperand(1); + +    // Cast source to LoadSizeType* +    if (Source1->getType() != LoadSizeType) +      Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); +    if (Source2->getType() != LoadSizeType) +      Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + +    // Get the base address using the GEPIndex +    if (GEPIndex != 0) { +      Source1 = Builder.CreateGEP(LoadSizeType, Source1, +                                  ConstantInt::get(LoadSizeType, GEPIndex)); +      Source2 = Builder.CreateGEP(LoadSizeType, Source2, +                                  ConstantInt::get(LoadSizeType, GEPIndex)); +    } + +    // Load LoadSizeType from the base address +    Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); +    Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); +    if (LoadSizeType != MaxLoadType) { +      LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); +      LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); +    } +    Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); +    Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); +    XorList.push_back(Diff); +  } + +  auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { +    std::vector<Value *> OutList; +    for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { +      Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); +      OutList.push_back(Or); +    } +    if (InList.size() % 2 != 0) +      OutList.push_back(InList.back()); +    return OutList; +  }; + +  // Pair wise OR the XOR results +  OrList = pairWiseOr(XorList); + +  // Pair wise OR the OR results until one result left +  while (OrList.size() != 1) { +    OrList = pairWiseOr(OrList); +  } + +  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0], +                                  ConstantInt::get(Diff->getType(), 0)); +  BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) +                           ? EndBlock +                           : LoadCmpBlocks[Index + 1]; +  // Early exit branch if difference found to ResultBlock, otherwise continue to +  // next LoadCmpBlock or EndBlock. +  BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); +  Builder.Insert(CmpBr); + +  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 +  // since early exit to ResultBlock was not taken (no difference was found in +  // any of the bytes) +  if (Index == LoadCmpBlocks.size() - 1) { +    Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); +    PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); +  } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, +                                           int GEPIndex, bool IsLittleEndian) { +  if (LoadSize == 1) { +    MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); +    return; +  } + +  IRBuilder<> Builder(CI->getContext()); + +  Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); +  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + +  Value *Source1 = CI->getArgOperand(0); +  Value *Source2 = CI->getArgOperand(1); + +  Builder.SetInsertPoint(LoadCmpBlocks[Index]); +  // Cast source to LoadSizeType* +  if (Source1->getType() != LoadSizeType) +    Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); +  if (Source2->getType() != LoadSizeType) +    Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + +  // Get the base address using the GEPIndex +  if (GEPIndex != 0) { +    Source1 = Builder.CreateGEP(LoadSizeType, Source1, +                                ConstantInt::get(LoadSizeType, GEPIndex)); +    Source2 = Builder.CreateGEP(LoadSizeType, Source2, +                                ConstantInt::get(LoadSizeType, GEPIndex)); +  } + +  // Load LoadSizeType from the base address +  Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); +  Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + +  if (IsLittleEndian) { +    Function *F = LoadCmpBlocks[Index]->getParent(); + +    Function *Bswap = Intrinsic::getDeclaration(F->getParent(), +                                                Intrinsic::bswap, LoadSizeType); +    LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); +    LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); +  } + +  if (LoadSizeType != MaxLoadType) { +    LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); +    LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); +  } + +  // Add the loaded values to the phi nodes for calculating memcmp result only +  // if result is not used in a zero equality. +  if (!IsUsedForZeroCmp) { +    ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]); +    ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); +  } + +  Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + +  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, +                                  ConstantInt::get(Diff->getType(), 0)); +  BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) +                           ? EndBlock +                           : LoadCmpBlocks[Index + 1]; +  // Early exit branch if difference found to ResultBlock, otherwise continue to +  // next LoadCmpBlock or EndBlock. +  BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); +  Builder.Insert(CmpBr); + +  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 +  // since early exit to ResultBlock was not taken (no difference was found in +  // any of the bytes) +  if (Index == LoadCmpBlocks.size() - 1) { +    Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); +    PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); +  } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock(bool IsLittleEndian) { +  IRBuilder<> Builder(CI->getContext()); + +  // Special case: if memcmp result is used in a zero equality, result does not +  // need to be calculated and can simply return 1. +  if (IsUsedForZeroCmp) { +    BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); +    Builder.SetInsertPoint(ResBlock.BB, InsertPt); +    Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); +    PhiRes->addIncoming(Res, ResBlock.BB); +    BranchInst *NewBr = BranchInst::Create(EndBlock); +    Builder.Insert(NewBr); +    return; +  } +  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); +  Builder.SetInsertPoint(ResBlock.BB, InsertPt); + +  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, +                                  ResBlock.PhiSrc2); + +  Value *Res = +      Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), +                           ConstantInt::get(Builder.getInt32Ty(), 1)); + +  BranchInst *NewBr = BranchInst::Create(EndBlock); +  Builder.Insert(NewBr); +  PhiRes->addIncoming(Res, ResBlock.BB); +} + +int MemCmpExpansion::calculateNumBlocks(unsigned Size) { +  int NumBlocks = 0; +  bool haveOneByteLoad = false; +  unsigned RemainingSize = Size; +  unsigned LoadSize = MaxLoadSize; +  while (RemainingSize) { +    if (LoadSize == 1) +      haveOneByteLoad = true; +    NumBlocks += RemainingSize / LoadSize; +    RemainingSize = RemainingSize % LoadSize; +    LoadSize = LoadSize / 2; +  } +  NumBlocksNonOneByte = haveOneByteLoad ? (NumBlocks - 1) : NumBlocks; + +  if (IsUsedForZeroCmp) +    NumBlocks = NumBlocks / NumLoadsPerBlock + +                (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0); + +  return NumBlocks; +} + +void MemCmpExpansion::setupResultBlockPHINodes() { +  IRBuilder<> Builder(CI->getContext()); +  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); +  Builder.SetInsertPoint(ResBlock.BB); +  ResBlock.PhiSrc1 = +      Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1"); +  ResBlock.PhiSrc2 = +      Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { +  IRBuilder<> Builder(CI->getContext()); + +  Builder.SetInsertPoint(&EndBlock->front()); +  PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size, +                                                   bool IsLittleEndian) { +  unsigned NumBytesProcessed = 0; +  // This loop populates each of the LoadCmpBlocks with IR sequence to handle +  // multiple loads per block +  for (unsigned i = 0; i < NumBlocks; ++i) { +    emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); +  } + +  emitMemCmpResultBlock(IsLittleEndian); +  return PhiRes; +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion(bool IsLittleEndian) { + +  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); +  uint64_t Size = SizeCast->getZExtValue(); + +  int LoadSize = MaxLoadSize; +  int NumBytesToBeProcessed = Size; + +  if (IsUsedForZeroCmp) { +    return getMemCmpExpansionZeroCase(Size, IsLittleEndian); +  } + +  unsigned Index = 0; +  // This loop calls emitLoadCompareBlock for comparing SizeVal bytes of the two +  // memcmp source. It starts with loading using the maximum load size set by +  // the target. It processes any remaining bytes using a load size which is the +  // next smallest power of 2. +  while (NumBytesToBeProcessed) { +    // Calculate how many blocks we can create with the current load size +    int NumBlocks = NumBytesToBeProcessed / LoadSize; +    int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; +    NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; + +    // For each NumBlocks, populate the instruction sequence for loading and +    // comparing LoadSize bytes +    while (NumBlocks--) { +      emitLoadCompareBlock(Index, LoadSize, GEPIndex, IsLittleEndian); +      Index++; +      GEPIndex++; +    } +    // Get the next LoadSize to use +    LoadSize = LoadSize / 2; +  } + +  emitMemCmpResultBlock(IsLittleEndian); +  return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced wtih new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +///  %0 = bitcast i32* %buffer2 to i8* +///  %1 = bitcast i32* %buffer1 to i8* +///  %2 = bitcast i8* %1 to i64* +///  %3 = bitcast i8* %0 to i64* +///  %4 = load i64, i64* %2 +///  %5 = load i64, i64* %3 +///  %6 = call i64 @llvm.bswap.i64(i64 %4) +///  %7 = call i64 @llvm.bswap.i64(i64 %5) +///  %8 = sub i64 %6, %7 +///  %9 = icmp ne i64 %8, 0 +///  br i1 %9, label %res_block, label %loadbb1 +/// res_block:                                        ; preds = %loadbb2, +/// %loadbb1, %loadbb +///  %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +///  %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +///  %10 = icmp ult i64 %phi.src1, %phi.src2 +///  %11 = select i1 %10, i32 -1, i32 1 +///  br label %endblock +/// loadbb1:                                          ; preds = %loadbb +///  %12 = bitcast i32* %buffer2 to i8* +///  %13 = bitcast i32* %buffer1 to i8* +///  %14 = bitcast i8* %13 to i32* +///  %15 = bitcast i8* %12 to i32* +///  %16 = getelementptr i32, i32* %14, i32 2 +///  %17 = getelementptr i32, i32* %15, i32 2 +///  %18 = load i32, i32* %16 +///  %19 = load i32, i32* %17 +///  %20 = call i32 @llvm.bswap.i32(i32 %18) +///  %21 = call i32 @llvm.bswap.i32(i32 %19) +///  %22 = zext i32 %20 to i64 +///  %23 = zext i32 %21 to i64 +///  %24 = sub i64 %22, %23 +///  %25 = icmp ne i64 %24, 0 +///  br i1 %25, label %res_block, label %loadbb2 +/// loadbb2:                                          ; preds = %loadbb1 +///  %26 = bitcast i32* %buffer2 to i8* +///  %27 = bitcast i32* %buffer1 to i8* +///  %28 = bitcast i8* %27 to i16* +///  %29 = bitcast i8* %26 to i16* +///  %30 = getelementptr i16, i16* %28, i16 6 +///  %31 = getelementptr i16, i16* %29, i16 6 +///  %32 = load i16, i16* %30 +///  %33 = load i16, i16* %31 +///  %34 = call i16 @llvm.bswap.i16(i16 %32) +///  %35 = call i16 @llvm.bswap.i16(i16 %33) +///  %36 = zext i16 %34 to i64 +///  %37 = zext i16 %35 to i64 +///  %38 = sub i64 %36, %37 +///  %39 = icmp ne i64 %38, 0 +///  br i1 %39, label %res_block, label %loadbb3 +/// loadbb3:                                          ; preds = %loadbb2 +///  %40 = bitcast i32* %buffer2 to i8* +///  %41 = bitcast i32* %buffer1 to i8* +///  %42 = getelementptr i8, i8* %41, i8 14 +///  %43 = getelementptr i8, i8* %40, i8 14 +///  %44 = load i8, i8* %42 +///  %45 = load i8, i8* %43 +///  %46 = zext i8 %44 to i32 +///  %47 = zext i8 %45 to i32 +///  %48 = sub i32 %46, %47 +///  br label %endblock +/// endblock:                                         ; preds = %res_block, +/// %loadbb3 +///  %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +///  ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, +                         const TargetLowering *TLI, const DataLayout *DL) { +  NumMemCmpCalls++; +  IRBuilder<> Builder(CI->getContext()); + +  // TTI call to check if target would like to expand memcmp and get the +  // MaxLoadSize +  unsigned MaxLoadSize; +  if (!TTI->expandMemCmp(CI, MaxLoadSize)) +    return false; + +  // Early exit from expansion if -Oz +  if (CI->getParent()->getParent()->optForMinSize()) { +    return false; +  } + +  // Early exit from expansion if size is not a constant +  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); +  if (!SizeCast) { +    NumMemCmpNotConstant++; +    return false; +  } + +  // Early exit from expansion if size greater than max bytes to load +  uint64_t SizeVal = SizeCast->getZExtValue(); + +  unsigned NumLoads = 0; +  unsigned RemainingSize = SizeVal; +  unsigned LoadSize = MaxLoadSize; +  while (RemainingSize) { +    NumLoads += RemainingSize / LoadSize; +    RemainingSize = RemainingSize % LoadSize; +    LoadSize = LoadSize / 2; +  } + +  if (NumLoads > +      TLI->getMaxExpandSizeMemcmp(CI->getParent()->getParent()->optForSize())) { +    NumMemCmpGreaterThanMax++; +    return false; +  } + +  NumMemCmpInlined++; + +  // MemCmpHelper object, creates and sets up basic blocks required for +  // expanding memcmp with size SizeVal +  unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; +  MemCmpExpansion MemCmpHelper(CI, MaxLoadSize, NumLoadsPerBlock); + +  Value *Res = MemCmpHelper.getMemCmpExpansion(DL->isLittleEndian()); + +  // Replace call with result of expansion and erarse call. +  CI->replaceAllUsesWith(Res); +  CI->eraseFromParent(); + +  return true; +} +  bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {    BasicBlock *BB = CI->getParent(); @@ -1780,6 +2380,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {      CI->eraseFromParent();      return true;    } + +  LibFunc Func; +  if (TLInfo->getLibFunc(*CI->getCalledFunction(), Func) && +      Func == LibFunc_memcmp) { +    if (expandMemCmp(CI, TTI, TLI, DL)) { +      ModifiedDT = true; +      return true; +    } +  }    return false;  } @@ -4927,6 +5536,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {    return true;  } +  namespace {  /// \brief Helper class to promote a scalar operation to a vector one.  /// This class is used to move downward extractelement transition.  | 
