diff options
Diffstat (limited to 'lib/Transforms/IPO/GlobalOpt.cpp')
| -rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 505 | 
1 files changed, 57 insertions, 448 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a44386e6c15f..9ced2e89a7e6 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -822,140 +822,18 @@ static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {  /// malloc, there is no reason to actually DO the malloc.  Instead, turn the  /// malloc into a global, and any loads of GV as uses of the new global.  static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, -                                                     MallocInst *MI, -                                                     LLVMContext &Context) { -  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI); -  ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize()); - -  if (NElements->getZExtValue() != 1) { -    // If we have an array allocation, transform it to a single element -    // allocation to make the code below simpler. -    Type *NewTy = ArrayType::get(MI->getAllocatedType(), -                                 NElements->getZExtValue()); -    MallocInst *NewMI = -      new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)), -                     MI->getAlignment(), MI->getName(), MI); -    Value* Indices[2]; -    Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context)); -    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, -                                              NewMI->getName()+".el0", MI); -    MI->replaceAllUsesWith(NewGEP); -    MI->eraseFromParent(); -    MI = NewMI; -  } - -  // Create the new global variable.  The contents of the malloc'd memory is -  // undefined, so initialize with an undef value. -  // FIXME: This new global should have the alignment returned by malloc.  Code -  // could depend on malloc returning large alignment (on the mac, 16 bytes) but -  // this would only guarantee some lower alignment. -  Constant *Init = UndefValue::get(MI->getAllocatedType()); -  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),  -                                             MI->getAllocatedType(), false, -                                             GlobalValue::InternalLinkage, Init, -                                             GV->getName()+".body", -                                             GV, -                                             GV->isThreadLocal()); -   -  // Anything that used the malloc now uses the global directly. -  MI->replaceAllUsesWith(NewGV); - -  Constant *RepValue = NewGV; -  if (NewGV->getType() != GV->getType()->getElementType()) -    RepValue = ConstantExpr::getBitCast(RepValue,  -                                        GV->getType()->getElementType()); - -  // If there is a comparison against null, we will insert a global bool to -  // keep track of whether the global was initialized yet or not. -  GlobalVariable *InitBool = -    new GlobalVariable(Context, Type::getInt1Ty(Context), false, -                       GlobalValue::InternalLinkage, -                       ConstantInt::getFalse(Context), GV->getName()+".init", -                       GV->isThreadLocal()); -  bool InitBoolUsed = false; - -  // Loop over all uses of GV, processing them in turn. -  std::vector<StoreInst*> Stores; -  while (!GV->use_empty()) -    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { -      while (!LI->use_empty()) { -        Use &LoadUse = LI->use_begin().getUse(); -        if (!isa<ICmpInst>(LoadUse.getUser())) -          LoadUse = RepValue; -        else { -          ICmpInst *CI = cast<ICmpInst>(LoadUse.getUser()); -          // Replace the cmp X, 0 with a use of the bool value. -          Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); -          InitBoolUsed = true; -          switch (CI->getPredicate()) { -          default: llvm_unreachable("Unknown ICmp Predicate!"); -          case ICmpInst::ICMP_ULT: -          case ICmpInst::ICMP_SLT: -            LV = ConstantInt::getFalse(Context);   // X < null -> always false -            break; -          case ICmpInst::ICMP_ULE: -          case ICmpInst::ICMP_SLE: -          case ICmpInst::ICMP_EQ: -            LV = BinaryOperator::CreateNot(LV, "notinit", CI); -            break; -          case ICmpInst::ICMP_NE: -          case ICmpInst::ICMP_UGE: -          case ICmpInst::ICMP_SGE: -          case ICmpInst::ICMP_UGT: -          case ICmpInst::ICMP_SGT: -            break;  // no change. -          } -          CI->replaceAllUsesWith(LV); -          CI->eraseFromParent(); -        } -      } -      LI->eraseFromParent(); -    } else { -      StoreInst *SI = cast<StoreInst>(GV->use_back()); -      // The global is initialized when the store to it occurs. -      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); -      SI->eraseFromParent(); -    } - -  // If the initialization boolean was used, insert it, otherwise delete it. -  if (!InitBoolUsed) { -    while (!InitBool->use_empty())  // Delete initializations -      cast<Instruction>(InitBool->use_back())->eraseFromParent(); -    delete InitBool; -  } else -    GV->getParent()->getGlobalList().insert(GV, InitBool); - - -  // Now the GV is dead, nuke it and the malloc. -  GV->eraseFromParent(); -  MI->eraseFromParent(); - -  // To further other optimizations, loop over all users of NewGV and try to -  // constant prop them.  This will promote GEP instructions with constant -  // indices into GEP constant-exprs, which will allow global-opt to hack on it. -  ConstantPropUsersOf(NewGV, Context); -  if (RepValue != NewGV) -    ConstantPropUsersOf(RepValue, Context); - -  return NewGV; -} - -/// OptimizeGlobalAddressOfMalloc - This function takes the specified global -/// variable, and transforms the program as if it always contained the result of -/// the specified malloc.  Because it is always the result of the specified -/// malloc, there is no reason to actually DO the malloc.  Instead, turn the -/// malloc into a global, and any loads of GV as uses of the new global. -static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,                                                       CallInst *CI,                                                       BitCastInst *BCI,                                                       LLVMContext &Context,                                                       TargetData* TD) { +  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV +               << "  CALL = " << *CI << "  BCI = " << *BCI << '\n'); +    const Type *IntPtrTy = TD->getIntPtrType(Context); -  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *CI); - -  ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI, -                                                                Context, TD)); +  Value* ArraySize = getMallocArraySize(CI, Context, TD); +  assert(ArraySize && "not a malloc whose array size can be determined"); +  ConstantInt *NElements = cast<ConstantInt>(ArraySize);    if (NElements->getZExtValue() != 1) {      // If we have an array allocation, transform it to a single element      // allocation to make the code below simpler. @@ -976,9 +854,6 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,    // Create the new global variable.  The contents of the malloc'd memory is    // undefined, so initialize with an undef value. -  // FIXME: This new global should have the alignment returned by malloc.  Code -  // could depend on malloc returning large alignment (on the mac, 16 bytes) but -  // this would only guarantee some lower alignment.    const Type *MAT = getMallocAllocatedType(CI);    Constant *Init = UndefValue::get(MAT);    GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),  @@ -1398,185 +1273,6 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,    }  } -/// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break -/// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, -                                            LLVMContext &Context){ -  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI); -  const StructType *STy = cast<StructType>(MI->getAllocatedType()); - -  // There is guaranteed to be at least one use of the malloc (storing -  // it into GV).  If there are other uses, change them to be uses of -  // the global to simplify later code.  This also deletes the store -  // into GV. -  ReplaceUsesOfMallocWithGlobal(MI, GV); -   -  // Okay, at this point, there are no users of the malloc.  Insert N -  // new mallocs at the same place as MI, and N globals. -  std::vector<Value*> FieldGlobals; -  std::vector<MallocInst*> FieldMallocs; -   -  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ -    const Type *FieldTy = STy->getElementType(FieldNo); -    const Type *PFieldTy = PointerType::getUnqual(FieldTy); -     -    GlobalVariable *NGV = -      new GlobalVariable(*GV->getParent(), -                         PFieldTy, false, GlobalValue::InternalLinkage, -                         Constant::getNullValue(PFieldTy), -                         GV->getName() + ".f" + Twine(FieldNo), GV, -                         GV->isThreadLocal()); -    FieldGlobals.push_back(NGV); -     -    MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), -                                     MI->getName() + ".f" + Twine(FieldNo), MI); -    FieldMallocs.push_back(NMI); -    new StoreInst(NMI, NGV, MI); -  } -   -  // The tricky aspect of this transformation is handling the case when malloc -  // fails.  In the original code, malloc failing would set the result pointer -  // of malloc to null.  In this case, some mallocs could succeed and others -  // could fail.  As such, we emit code that looks like this: -  //    F0 = malloc(field0) -  //    F1 = malloc(field1) -  //    F2 = malloc(field2) -  //    if (F0 == 0 || F1 == 0 || F2 == 0) { -  //      if (F0) { free(F0); F0 = 0; } -  //      if (F1) { free(F1); F1 = 0; } -  //      if (F2) { free(F2); F2 = 0; } -  //    } -  Value *RunningOr = 0; -  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { -    Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i], -                              Constant::getNullValue(FieldMallocs[i]->getType()), -                                  "isnull"); -    if (!RunningOr) -      RunningOr = Cond;   // First seteq -    else -      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI); -  } - -  // Split the basic block at the old malloc. -  BasicBlock *OrigBB = MI->getParent(); -  BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont"); -   -  // Create the block to check the first condition.  Put all these blocks at the -  // end of the function as they are unlikely to be executed. -  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", -                                                OrigBB->getParent()); -   -  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond -  // branch on RunningOr. -  OrigBB->getTerminator()->eraseFromParent(); -  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); -   -  // Within the NullPtrBlock, we need to emit a comparison and branch for each -  // pointer, because some may be null while others are not. -  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { -    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); -    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,  -                              Constant::getNullValue(GVVal->getType()), -                              "tmp"); -    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",  -                                               OrigBB->getParent()); -    BasicBlock *NextBlock = BasicBlock::Create(Context, "next",  -                                               OrigBB->getParent()); -    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); - -    // Fill in FreeBlock. -    new FreeInst(GVVal, FreeBlock); -    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], -                  FreeBlock); -    BranchInst::Create(NextBlock, FreeBlock); -     -    NullPtrBlock = NextBlock; -  } -   -  BranchInst::Create(ContBB, NullPtrBlock); -   -  // MI is no longer needed, remove it. -  MI->eraseFromParent(); - -  /// InsertedScalarizedLoads - As we process loads, if we can't immediately -  /// update all uses of the load, keep track of what scalarized loads are -  /// inserted for a given load. -  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; -  InsertedScalarizedValues[GV] = FieldGlobals; -   -  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; -   -  // Okay, the malloc site is completely handled.  All of the uses of GV are now -  // loads, and all uses of those loads are simple.  Rewrite them to use loads -  // of the per-field globals instead. -  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { -    Instruction *User = cast<Instruction>(*UI++); -     -    if (LoadInst *LI = dyn_cast<LoadInst>(User)) { -      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, -                                   Context); -      continue; -    } -     -    // Must be a store of null. -    StoreInst *SI = cast<StoreInst>(User); -    assert(isa<ConstantPointerNull>(SI->getOperand(0)) && -           "Unexpected heap-sra user!"); -     -    // Insert a store of null into each global. -    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { -      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); -      Constant *Null = Constant::getNullValue(PT->getElementType()); -      new StoreInst(Null, FieldGlobals[i], SI); -    } -    // Erase the original store. -    SI->eraseFromParent(); -  } - -  // While we have PHIs that are interesting to rewrite, do it. -  while (!PHIsToRewrite.empty()) { -    PHINode *PN = PHIsToRewrite.back().first; -    unsigned FieldNo = PHIsToRewrite.back().second; -    PHIsToRewrite.pop_back(); -    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); -    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); - -    // Add all the incoming values.  This can materialize more phis. -    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { -      Value *InVal = PN->getIncomingValue(i); -      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, -                               PHIsToRewrite, Context); -      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); -    } -  } -   -  // Drop all inter-phi links and any loads that made it this far. -  for (DenseMap<Value*, std::vector<Value*> >::iterator -       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); -       I != E; ++I) { -    if (PHINode *PN = dyn_cast<PHINode>(I->first)) -      PN->dropAllReferences(); -    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) -      LI->dropAllReferences(); -  } -   -  // Delete all the phis and loads now that inter-references are dead. -  for (DenseMap<Value*, std::vector<Value*> >::iterator -       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); -       I != E; ++I) { -    if (PHINode *PN = dyn_cast<PHINode>(I->first)) -      PN->eraseFromParent(); -    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) -      LI->eraseFromParent(); -  } -   -  // The old global is now dead, remove it. -  GV->eraseFromParent(); - -  ++NumHeapSRA; -  return cast<GlobalVariable>(FieldGlobals[0]); -} -  /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break  /// it up into multiple allocations of arrays of the fields.  static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, @@ -1587,6 +1283,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,                 << " BITCAST = " << *BCI << '\n');    const Type* MAT = getMallocAllocatedType(CI);    const StructType *STy = cast<StructType>(MAT); +  Value* ArraySize = getMallocArraySize(CI, Context, TD); +  assert(ArraySize && "not a malloc whose array size can be determined");    // There is guaranteed to be at least one use of the malloc (storing    // it into GV).  If there are other uses, change them to be uses of @@ -1611,8 +1309,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,                           GV->isThreadLocal());      FieldGlobals.push_back(NGV); -    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, -                                        getMallocArraySize(CI, Context, TD), +    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), +                                        FieldTy, ArraySize,                                          BCI->getName() + ".f" + Twine(FieldNo));      FieldMallocs.push_back(NMI);      new StoreInst(NMI, NGV, BCI); @@ -1766,95 +1464,6 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,  /// pointer global variable with a single value stored it that is a malloc or  /// cast of malloc.  static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, -                                               MallocInst *MI, -                                               Module::global_iterator &GVI, -                                               TargetData *TD, -                                               LLVMContext &Context) { -  // If this is a malloc of an abstract type, don't touch it. -  if (!MI->getAllocatedType()->isSized()) -    return false; -   -  // We can't optimize this global unless all uses of it are *known* to be -  // of the malloc value, not of the null initializer value (consider a use -  // that compares the global's value against zero to see if the malloc has -  // been reached).  To do this, we check to see if all uses of the global -  // would trap if the global were null: this proves that they must all -  // happen after the malloc. -  if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) -    return false; -   -  // We can't optimize this if the malloc itself is used in a complex way, -  // for example, being stored into multiple globals.  This allows the -  // malloc to be stored into the specified global, loaded setcc'd, and -  // GEP'd.  These are all things we could transform to using the global -  // for. -  { -    SmallPtrSet<PHINode*, 8> PHIs; -    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs)) -      return false; -  } -   -   -  // If we have a global that is only initialized with a fixed size malloc, -  // transform the program to use global memory instead of malloc'd memory. -  // This eliminates dynamic allocation, avoids an indirection accessing the -  // data, and exposes the resultant global to further GlobalOpt. -  if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) { -    // Restrict this transformation to only working on small allocations -    // (2048 bytes currently), as we don't want to introduce a 16M global or -    // something. -    if (TD && -        NElements->getZExtValue()* -        TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) { -      GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context); -      return true; -    } -  } -   -  // If the allocation is an array of structures, consider transforming this -  // into multiple malloc'd arrays, one for each field.  This is basically -  // SRoA for malloc'd memory. -  const Type *AllocTy = MI->getAllocatedType(); -   -  // If this is an allocation of a fixed size array of structs, analyze as a -  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100 -  if (!MI->isArrayAllocation()) -    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) -      AllocTy = AT->getElementType(); -   -  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { -    // This the structure has an unreasonable number of fields, leave it -    // alone. -    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && -        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) { -       -      // If this is a fixed size array, transform the Malloc to be an alloc of -      // structs.  malloc [100 x struct],1 -> malloc struct, 100 -      if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) { -        MallocInst *NewMI =  -          new MallocInst(AllocSTy,  -                  ConstantInt::get(Type::getInt32Ty(Context), -                  AT->getNumElements()), -                         "", MI); -        NewMI->takeName(MI); -        Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); -        MI->replaceAllUsesWith(Cast); -        MI->eraseFromParent(); -        MI = NewMI; -      } -       -      GVI = PerformHeapAllocSRoA(GV, MI, Context); -      return true; -    } -  } -   -  return false; -}   - -/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a -/// pointer global variable with a single value stored it that is a malloc or -/// cast of malloc. -static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,                                                 CallInst *CI,                                                 BitCastInst *BCI,                                                 Module::global_iterator &GVI, @@ -1892,52 +1501,55 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,    // transform the program to use global memory instead of malloc'd memory.    // This eliminates dynamic allocation, avoids an indirection accessing the    // data, and exposes the resultant global to further GlobalOpt. -  if (ConstantInt *NElements = -              dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) { -    // Restrict this transformation to only working on small allocations -    // (2048 bytes currently), as we don't want to introduce a 16M global or -    // something. -    if (TD &&  -        NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { -      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); -      return true; -    } -  } -   -  // If the allocation is an array of structures, consider transforming this -  // into multiple malloc'd arrays, one for each field.  This is basically -  // SRoA for malloc'd memory. - -  // If this is an allocation of a fixed size array of structs, analyze as a -  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100 -  if (!isArrayMalloc(CI, Context, TD)) -    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) -      AllocTy = AT->getElementType(); -   -  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { -    // This the structure has an unreasonable number of fields, leave it -    // alone. -    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && -        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { - -      // If this is a fixed size array, transform the Malloc to be an alloc of -      // structs.  malloc [100 x struct],1 -> malloc struct, 100 -      if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { -        Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), -                                              AT->getNumElements()); -        Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), -                                              AllocSTy, NumElements, -                                              BCI->getName()); -        Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); -        BCI->replaceAllUsesWith(Cast); -        BCI->eraseFromParent(); -        CI->eraseFromParent(); -        BCI = cast<BitCastInst>(NewMI); -        CI = extractMallocCallFromBitCast(NewMI); +  Value *NElems = getMallocArraySize(CI, Context, TD); +  // We cannot optimize the malloc if we cannot determine malloc array size. +  if (NElems) { +    if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) +      // Restrict this transformation to only working on small allocations +      // (2048 bytes currently), as we don't want to introduce a 16M global or +      // something. +      if (TD &&  +          NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { +        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); +        return true;        } +   +    // If the allocation is an array of structures, consider transforming this +    // into multiple malloc'd arrays, one for each field.  This is basically +    // SRoA for malloc'd memory. + +    // If this is an allocation of a fixed size array of structs, analyze as a +    // variable size array.  malloc [100 x struct],1 -> malloc struct, 100 +    if (!isArrayMalloc(CI, Context, TD)) +      if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) +        AllocTy = AT->getElementType(); +   +    if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { +      // This the structure has an unreasonable number of fields, leave it +      // alone. +      if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && +          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + +        // If this is a fixed size array, transform the Malloc to be an alloc of +        // structs.  malloc [100 x struct],1 -> malloc struct, 100 +        if (const ArrayType *AT = +                              dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { +          Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), +                                                AT->getNumElements()); +          Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), +                                                AllocSTy, NumElements, +                                                BCI->getName()); +          Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); +          BCI->replaceAllUsesWith(Cast); +          BCI->eraseFromParent(); +          CI->eraseFromParent(); +          BCI = cast<BitCastInst>(NewMI); +          CI = extractMallocCallFromBitCast(NewMI); +        } -      GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); -      return true; +        GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); +        return true; +      }      }    } @@ -1966,9 +1578,6 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,        // Optimize away any trapping uses of the loaded value.        if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))          return true; -    } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { -      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context)) -        return true;      } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {        if (getMallocAllocatedType(CI)) {          BitCastInst* BCI = NULL;  | 
