diff options
Diffstat (limited to 'lib/Transforms')
| -rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 142 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/DeadStoreElimination.cpp | 19 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 3 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 37 | 
4 files changed, 109 insertions, 92 deletions
| diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 5dab9efab220..234d0ecc2e5e 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -822,32 +822,42 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {  /// malloc into a global, and any loads of GV as uses of the new global.  static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,                                                       CallInst *CI, -                                                     BitCastInst *BCI, +                                                     const Type *AllocTy,                                                       Value* NElems,                                                       LLVMContext &Context,                                                       TargetData* TD) { -  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV -               << "  CALL = " << *CI << "  BCI = " << *BCI << '\n'); +  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');    const Type *IntPtrTy = TD->getIntPtrType(Context); +  // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have +  // returned NULL and we would not be here). +  BitCastInst *BCI = NULL; +  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; ) +    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)))) +      break; +    ConstantInt *NElements = cast<ConstantInt>(NElems);    if (NElements->getZExtValue() != 1) {      // If we have an array allocation, transform it to a single element      // allocation to make the code below simpler. -    Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), -                                 NElements->getZExtValue()); -    Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); -    Instruction* NewMI = cast<Instruction>(NewM); +    Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue()); +    unsigned TypeSize = TD->getTypeAllocSize(NewTy); +    if (const StructType *ST = dyn_cast<StructType>(NewTy)) +      TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); +    Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy, +                                         ConstantInt::get(IntPtrTy, TypeSize));      Value* Indices[2];      Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); -    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, -                                              NewMI->getName()+".el0", CI); -    BCI->replaceAllUsesWith(NewGEP); -    BCI->eraseFromParent(); +    Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2, +                                              NewCI->getName()+".el0", CI); +    Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI); +    if (BCI) BCI->replaceAllUsesWith(NewGEP); +    CI->replaceAllUsesWith(Cast); +    if (BCI) BCI->eraseFromParent();      CI->eraseFromParent(); -    BCI = cast<BitCastInst>(NewMI); -    CI = extractMallocCallFromBitCast(NewMI); +    BCI = dyn_cast<BitCastInst>(NewCI); +    CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);    }    // Create the new global variable.  The contents of the malloc'd memory is @@ -861,8 +871,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,                                               GV,                                               GV->isThreadLocal()); -  // Anything that used the malloc now uses the global directly. -  BCI->replaceAllUsesWith(NewGV); +  // Anything that used the malloc or its bitcast now uses the global directly. +  if (BCI) BCI->replaceAllUsesWith(NewGV); +  CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));    Constant *RepValue = NewGV;    if (NewGV->getType() != GV->getType()->getElementType()) @@ -930,9 +941,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,      GV->getParent()->getGlobalList().insert(GV, InitBool); -  // Now the GV is dead, nuke it and the malloc. +  // Now the GV is dead, nuke it and the malloc (both CI and BCI).    GV->eraseFromParent(); -  BCI->eraseFromParent(); +  if (BCI) BCI->eraseFromParent();    CI->eraseFromParent();    // To further other optimizations, loop over all users of NewGV and try to @@ -1273,13 +1284,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,  /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break  /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, -                                            CallInst *CI, BitCastInst* BCI, -                                            Value* NElems, -                                            LLVMContext &Context, +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, +                                            Value* NElems, LLVMContext &Context,                                              TargetData *TD) { -  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI  -               << " BITCAST = " << *BCI << '\n'); +  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');    const Type* MAT = getMallocAllocatedType(CI);    const StructType *STy = cast<StructType>(MAT); @@ -1287,8 +1295,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,    // it into GV).  If there are other uses, change them to be uses of    // the global to simplify later code.  This also deletes the store    // into GV. -  ReplaceUsesOfMallocWithGlobal(BCI, GV); -   +  ReplaceUsesOfMallocWithGlobal(CI, GV); +    // Okay, at this point, there are no users of the malloc.  Insert N    // new mallocs at the same place as CI, and N globals.    std::vector<Value*> FieldGlobals; @@ -1306,11 +1314,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,                           GV->isThreadLocal());      FieldGlobals.push_back(NGV); -    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), -                                        FieldTy, NElems, -                                        BCI->getName() + ".f" + Twine(FieldNo)); +    unsigned TypeSize = TD->getTypeAllocSize(FieldTy); +    if (const StructType* ST = dyn_cast<StructType>(FieldTy)) +      TypeSize = TD->getStructLayout(ST)->getSizeInBytes(); +    const Type* IntPtrTy = TD->getIntPtrType(Context); +    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, +                                        ConstantInt::get(IntPtrTy, TypeSize), +                                        NElems, +                                        CI->getName() + ".f" + Twine(FieldNo));      FieldMallocs.push_back(NMI); -    new StoreInst(NMI, NGV, BCI); +    new StoreInst(NMI, NGV, CI);    }    // The tricky aspect of this transformation is handling the case when malloc @@ -1327,18 +1340,18 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,    //    }    Value *RunningOr = 0;    for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { -    Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], -                              Constant::getNullValue(FieldMallocs[i]->getType()), -                                  "isnull"); +    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], +                             Constant::getNullValue(FieldMallocs[i]->getType()), +                               "isnull");      if (!RunningOr)        RunningOr = Cond;   // First seteq      else -      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); +      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);    }    // Split the basic block at the old malloc. -  BasicBlock *OrigBB = BCI->getParent(); -  BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); +  BasicBlock *OrigBB = CI->getParent(); +  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");    // Create the block to check the first condition.  Put all these blocks at the    // end of the function as they are unlikely to be executed. @@ -1374,9 +1387,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,    }    BranchInst::Create(ContBB, NullPtrBlock); -   -  // CI and BCI are no longer needed, remove them. -  BCI->eraseFromParent(); + +  // CI is no longer needed, remove it.    CI->eraseFromParent();    /// InsertedScalarizedLoads - As we process loads, if we can't immediately @@ -1463,14 +1475,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,  /// cast of malloc.  static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,                                                 CallInst *CI, -                                               BitCastInst *BCI, +                                               const Type *AllocTy,                                                 Module::global_iterator &GVI,                                                 TargetData *TD,                                                 LLVMContext &Context) { -  // If we can't figure out the type being malloced, then we can't optimize. -  const Type *AllocTy = getMallocAllocatedType(CI); -  assert(AllocTy); -    // If this is a malloc of an abstract type, don't touch it.    if (!AllocTy->isSized())      return false; @@ -1491,7 +1499,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,    // for.    {      SmallPtrSet<PHINode*, 8> PHIs; -    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) +    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))        return false;    }   @@ -1499,16 +1507,16 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,    // transform the program to use global memory instead of malloc'd memory.    // This eliminates dynamic allocation, avoids an indirection accessing the    // data, and exposes the resultant global to further GlobalOpt. -  Value *NElems = getMallocArraySize(CI, Context, TD);    // We cannot optimize the malloc if we cannot determine malloc array size. -  if (NElems) { +  if (Value *NElems = getMallocArraySize(CI, Context, TD)) {      if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))        // Restrict this transformation to only working on small allocations        // (2048 bytes currently), as we don't want to introduce a 16M global or        // something.        if (TD &&             NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { -        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD); +        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, +                                            Context, TD);          return true;        } @@ -1526,26 +1534,29 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,        // This the structure has an unreasonable number of fields, leave it        // alone.        if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && -          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { +          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {          // If this is a fixed size array, transform the Malloc to be an alloc of          // structs.  malloc [100 x struct],1 -> malloc struct, 100          if (const ArrayType *AT =                                dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { -          Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), -                                                AT->getNumElements()); -          Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), -                                                AllocSTy, NumElements, -                                                BCI->getName()); -          Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); -          BCI->replaceAllUsesWith(Cast); -          BCI->eraseFromParent(); +          const Type *IntPtrTy = TD->getIntPtrType(Context); +          unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes(); +          Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); +          Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); +          Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, +                                                       AllocSize, NumElements, +                                                       CI->getName()); +          Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); +          CI->replaceAllUsesWith(Cast);            CI->eraseFromParent(); -          BCI = cast<BitCastInst>(NewMI); -          CI = extractMallocCallFromBitCast(NewMI); +          CI = dyn_cast<BitCastInst>(Malloc) ? +               extractMallocCallFromBitCast(Malloc): +               cast<CallInst>(Malloc);          } -        GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD); +        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD),  +                                   Context, TD);          return true;        }      } @@ -1577,15 +1588,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,        if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))          return true;      } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { -      if (getMallocAllocatedType(CI)) { -        BitCastInst* BCI = NULL; -        for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); -             UI != E; ) -          BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)); -        if (BCI && -            TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) -          return true; -      } +      const Type* MallocType = getMallocAllocatedType(CI); +      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,  +                                                           GVI, TD, Context)) +        return true;      }    } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 60b12fd8679e..90436f40661c 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -78,6 +78,21 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination");  FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } +/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the +/// stored size of V2.  This returns false if we don't know. +/// +static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) { +  const Type *V1Ty = V1->getType(), *V2Ty = V2->getType(); +   +  // Exactly the same type, must have exactly the same size. +  if (V1Ty == V2Ty) return true; +   +  // If we don't have target data, we don't know. +  if (TD == 0) return false; +   +  return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty); +} +  bool DSE::runOnBasicBlock(BasicBlock &BB) {    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();    TD = getAnalysisIfAvailable<TargetData>(); @@ -118,9 +133,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {      // If this is a store-store dependence, then the previous store is dead so      // long as this store is at least as big as it.      if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst())) -      if (TD && -          TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <= -          TD->getTypeStoreSize(SI->getOperand(0)->getType())) { +      if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){          // Delete the store and now-dead instructions that feed it.          DeleteDeadInstruction(DepStore);          NumFastStores++; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 8b11edd891fd..10c9ec6d5a4c 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -68,9 +68,6 @@ namespace {      static char ID; // Pass identification      JumpThreading() : FunctionPass(&ID) {} -    virtual void getAnalysisUsage(AnalysisUsage &AU) const { -    } -      bool runOnFunction(Function &F);      void FindLoopHeaders(Function &F); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 05a0eeef2d37..509a6dbc12f0 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -370,13 +370,13 @@ private:    /// by properly seeding constants etc.    LatticeVal &getValueState(Value *V) {      assert(!isa<StructType>(V->getType()) && "Should use getStructValueState"); -     -    // TODO: Change to do insert+find in one operation. -    DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V); -    if (I != ValueState.end()) -      return I->second;  // Common case, already in the map. -    LatticeVal &LV = ValueState[V]; +    std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I = +      ValueState.insert(std::make_pair(V, LatticeVal())); +    LatticeVal &LV = I.first->second; + +    if (!I.second) +      return LV;  // Common case, already in the map.      if (Constant *C = dyn_cast<Constant>(V)) {        // Undef values remain undefined. @@ -395,15 +395,15 @@ private:      assert(isa<StructType>(V->getType()) && "Should use getValueState");      assert(i < cast<StructType>(V->getType())->getNumElements() &&             "Invalid element #"); -     -    // TODO: Change to do insert+find in one operation. -    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator -      I = StructValueState.find(std::make_pair(V, i)); -    if (I != StructValueState.end()) -      return I->second;  // Common case, already in the map. -     -    LatticeVal &LV = StructValueState[std::make_pair(V, i)]; -     + +    std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator, +              bool> I = StructValueState.insert( +                        std::make_pair(std::make_pair(V, i), LatticeVal())); +    LatticeVal &LV = I.first->second; + +    if (!I.second) +      return LV;  // Common case, already in the map. +      if (Constant *C = dyn_cast<Constant>(V)) {        if (isa<UndefValue>(C))          ; // Undef values remain undefined. @@ -1280,9 +1280,10 @@ CallOverdefined:        }        if (const StructType *STy = dyn_cast<StructType>(AI->getType())) { -        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) -          mergeInValue(getStructValueState(AI, i), AI, -                       getStructValueState(*CAI, i)); +        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { +          LatticeVal CallArg = getStructValueState(*CAI, i); +          mergeInValue(getStructValueState(AI, i), AI, CallArg); +        }        } else {          mergeInValue(AI, getValueState(*CAI));        } | 
