vendor/llvm/llvm-r84176

author: Roman Divacky <rdivacky@FreeBSD.org> 2009-10-15 07:47:49 +0000
committer: Roman Divacky <rdivacky@FreeBSD.org> 2009-10-15 07:47:49 +0000
commit: 26137f35958fa2237eceb06ef7003e3c19159129 (patch)
tree: 6441ce5f8a25ef18b4a8082f3cc834f7c8556f61
parent: 59850d0874429601812bc13408cb1f776649027c (diff)
28 files changed, 420 insertions, 377 deletions
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 632728bf0d17..52ac5f9c5dfb 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -89,7 +89,7 @@ template<> struct DenseMapInfo<unsigned long long> {
   static inline unsigned long long getEmptyKey() { return ~0ULL; }
   static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
   static unsigned getHashValue(const unsigned long long& Val) {
-    return Val * 37ULL;
+    return (unsigned)Val * 37ULL;
   }
   static bool isPod() { return true; }
   static bool isEqual(const unsigned long long& LHS,
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 14f4ac8123d6..86279099e32d 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -988,8 +988,8 @@ public:
     BumpPtrAllocator& getAllocator() { return F.getAllocator(); }
 
   private:
-    Factory(const Factory& RHS) {};
-    void operator=(const Factory& RHS) {};
+    Factory(const Factory& RHS) {}
+    void operator=(const Factory& RHS) {}
   };
 
   friend class Factory;
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 39563f733068..fdbbb1ee20ea 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -500,8 +500,8 @@ namespace llvm {
     /// ComputeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
-                                       SDep& dep) const { };
+    virtual void ComputeOperandLatency(SUnit *, SUnit *,
+                                       SDep&) const { }
 
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h
index 3daafd58a7c2..ba6ff4714ce4 100644
--- a/include/llvm/CompilerDriver/CompilationGraph.h
+++ b/include/llvm/CompilerDriver/CompilationGraph.h
@@ -43,7 +43,7 @@ namespace llvmc {
   class Edge : public llvm::RefCountedBaseVPTR<Edge> {
   public:
     Edge(const std::string& T) : ToolName_(T) {}
-    virtual ~Edge() {};
+    virtual ~Edge() {}
 
     const std::string& ToolName() const { return ToolName_; }
     virtual unsigned Weight(const InputLanguagesSet& InLangs) const = 0;
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 21dee553474c..9f6fb63fbe56 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -149,7 +149,7 @@ public:
   /// CheckInvariants - For testing only.  Return true if all internal
   /// invariants are preserved, or return false and set ErrorStr to a helpful
   /// error message.
-  virtual bool CheckInvariants(std::string &ErrorStr) {
+  virtual bool CheckInvariants(std::string &) {
     return true;
   }
 
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index 63c2da2e7dfd..13b97b992ce2 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -181,7 +181,7 @@ public:
   /// duplicates
   void Profile(FoldingSetNodeID &ID) const;
 
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+  virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
     llvm_unreachable("This should never be called because MDNodes have no ops");
   }
 
@@ -291,7 +291,7 @@ public:
     return false;
   }
 
-  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+  virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
     llvm_unreachable(
                 "This should never be called because NamedMDNodes have no ops");
   }
@@ -361,7 +361,7 @@ public:
 
   /// ValueIsDeleted - This handler is used to update metadata store
   /// when a value is deleted.
-  void ValueIsDeleted(const Value *V) {}
+  void ValueIsDeleted(const Value *) {}
   void ValueIsDeleted(const Instruction *Inst) {
     removeMDs(Inst);
   }
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 2b5cc57e75dd..60865aa8ad45 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -57,8 +57,8 @@ public:
   }
 
   static inline bool classof(const Operator *) { return true; }
-  static inline bool classof(const Instruction *I) { return true; }
-  static inline bool classof(const ConstantExpr *I) { return true; }
+  static inline bool classof(const Instruction *) { return true; }
+  static inline bool classof(const ConstantExpr *) { return true; }
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) || isa<ConstantExpr>(V);
   }
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index eb4c92281c9b..27919365227c 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -276,7 +276,7 @@ public:
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
-  virtual bool doInitialization(Module &M) { return false; }
+  virtual bool doInitialization(Module &) { return false; }
   
   /// runOnFunction - Virtual method overriden by subclasses to do the
   /// per-function processing of the pass.
@@ -328,7 +328,7 @@ public:
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
-  virtual bool doInitialization(Module &M) { return false; }
+  virtual bool doInitialization(Module &) { return false; }
 
   /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
   /// to do any necessary per-function initialization.
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index dc73979bb09b..ca32f75b2888 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -660,7 +660,7 @@ template<>
 class parser<std::string> : public basic_parser<std::string> {
 public:
   // parse - Return true on error.
-  bool parse(Option &, StringRef ArgName, StringRef Arg, std::string &Value) {
+  bool parse(Option &, StringRef, StringRef Arg, std::string &Value) {
     Value = Arg.str();
     return false;
   }
@@ -681,7 +681,7 @@ template<>
 class parser<char> : public basic_parser<char> {
 public:
   // parse - Return true on error.
-  bool parse(Option &, StringRef ArgName, StringRef Arg, char &Value) {
+  bool parse(Option &, StringRef, StringRef Arg, char &Value) {
     Value = Arg[0];
     return false;
   }
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 55c3c4ffbd76..362390f62876 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -29,10 +29,10 @@ namespace llvm {
     unsigned Line, Col;
 
     DebugLocTuple()
-      : Scope(0), InlinedAtLoc(0), Line(~0U), Col(~0U) {};
+      : Scope(0), InlinedAtLoc(0), Line(~0U), Col(~0U) {}
 
     DebugLocTuple(MDNode *n, MDNode *i, unsigned l, unsigned c)
-      : Scope(n), InlinedAtLoc(i), Line(l), Col(c) {};
+      : Scope(n), InlinedAtLoc(i), Line(l), Col(c) {}
 
     bool operator==(const DebugLocTuple &DLT) const {
       return Scope == DLT.Scope &&
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 7827dd83804b..8012b6fdfc08 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -51,7 +51,7 @@ private:
   /// for a \see write_impl() call to handle the data which has been put into
   /// this buffer.
   char *OutBufStart, *OutBufEnd, *OutBufCur;
-  
+
   enum BufferKind {
     Unbuffered = 0,
     InternalBuffer,
@@ -211,7 +211,7 @@ public:
     return *this;
   }
 
-  raw_ostream &operator<<(double N);  
+  raw_ostream &operator<<(double N);
 
   /// write_hex - Output \arg N in hexadecimal, without any prefix or padding.
   raw_ostream &write_hex(unsigned long long N);
@@ -224,8 +224,8 @@ public:
 
   /// indent - Insert 'NumSpaces' spaces.
   raw_ostream &indent(unsigned NumSpaces);
-  
-  
+
+
   /// Changes the foreground color of text that will be output from this point
   /// forward.
   /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to
@@ -233,8 +233,8 @@ public:
   /// @param bold bold/brighter text, default false
   /// @param bg if true change the background, default: change foreground
   /// @returns itself so it can be used within << invocations
-  virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
-                                   bool  bg=false) { return *this; }
+  virtual raw_ostream &changeColor(enum Colors, bool = false,
+				   bool = false) { return *this; }
 
   /// Resets the colors to terminal defaults. Call this when you are done
   /// outputting colored text, or before program exit.
@@ -253,7 +253,7 @@ private:
   /// write_impl - The is the piece of the class that is implemented
   /// by subclasses.  This writes the \args Size bytes starting at
   /// \arg Ptr to the underlying stream.
-  /// 
+  ///
   /// This function is guaranteed to only be called at a point at which it is
   /// safe for the subclass to install a new buffer via SetBuffer.
   ///
@@ -331,7 +331,7 @@ class raw_fd_ostream : public raw_ostream {
   virtual size_t preferred_buffer_size();
 
 public:
-  
+
   enum {
     /// F_Excl - When opening a file, this flag makes raw_fd_ostream
     /// report an error if the file already exists.
@@ -346,7 +346,7 @@ public:
     /// make this distinction.
     F_Binary = 4
   };
-  
+
   /// raw_fd_ostream - Open the specified file for writing. If an error occurs,
   /// information about the error is put into ErrorInfo, and the stream should
   /// be immediately destroyed; the string will be empty if no error occurred.
@@ -359,10 +359,10 @@ public:
 
   /// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
   /// ShouldClose is true, this closes the file when the stream is destroyed.
-  raw_fd_ostream(int fd, bool shouldClose, 
-                 bool unbuffered=false) : raw_ostream(unbuffered), FD(fd), 
+  raw_fd_ostream(int fd, bool shouldClose,
+                 bool unbuffered=false) : raw_ostream(unbuffered), FD(fd),
                                           ShouldClose(shouldClose) {}
-  
+
   ~raw_fd_ostream();
 
   /// close - Manually flush the stream and close the file.
@@ -465,7 +465,7 @@ public:
 class raw_null_ostream : public raw_ostream {
   /// write_impl - See raw_ostream::write_impl.
   virtual void write_impl(const char *Ptr, size_t size);
-  
+
   /// current_pos - Return the current position within the stream, not
   /// counting the bytes currently in the buffer.
   virtual uint64_t current_pos();
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 2c4efc4985b3..c5be6167e060 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -310,6 +310,28 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
       switch (II->getIntrinsicID()) {
       default: break;
+      case Intrinsic::memcpy:
+      case Intrinsic::memmove: {
+        unsigned Len = ~0U;
+        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3)))
+          Len = LenCI->getZExtValue();
+        Value *Dest = II->getOperand(1);
+        Value *Src = II->getOperand(2);
+        if (alias(Dest, Len, P, Size) == NoAlias) {
+          if (alias(Src, Len, P, Size) == NoAlias)
+            return NoModRef;
+          return Ref;
+        }
+        }
+        break;
+      case Intrinsic::memset:
+        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) {
+          unsigned Len = LenCI->getZExtValue();
+          Value *Dest = II->getOperand(1);
+          if (alias(Dest, Len, P, Size) == NoAlias)
+            return NoModRef;
+        }
+        break;
       case Intrinsic::atomic_cmp_swap:
       case Intrinsic::atomic_swap:
       case Intrinsic::atomic_load_add:
@@ -322,9 +344,25 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
       case Intrinsic::atomic_load_min:
       case Intrinsic::atomic_load_umax:
       case Intrinsic::atomic_load_umin:
-        if (alias(II->getOperand(1), Size, P, Size) == NoAlias)
-          return NoModRef;
+        if (TD) {
+          Value *Op1 = II->getOperand(1);
+          unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
+          if (alias(Op1, Op1Size, P, Size) == NoAlias)
+            return NoModRef;
+        }
         break;
+      case Intrinsic::lifetime_start:
+      case Intrinsic::lifetime_end:
+      case Intrinsic::invariant_start: {
+        unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
+        if (alias(II->getOperand(2), PtrSize, P, Size) == NoAlias)
+          return NoModRef;
+      }
+      case Intrinsic::invariant_end: {
+        unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
+        if (alias(II->getOperand(3), PtrSize, P, Size) == NoAlias)
+          return NoModRef;
+      }
       }
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 4394ec08ef22..291485109562 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -145,7 +145,10 @@ class DbgConcreteScope;
 class VISIBILITY_HIDDEN DbgScope {
   DbgScope *Parent;                   // Parent to this scope.
   DIDescriptor Desc;                  // Debug info descriptor for scope.
-                                      // Either subprogram or block.
+                                      // FIXME use WeakVH for Desc.
+  WeakVH InlinedAt;                   // If this scope represents inlined
+                                      // function body then this is the location
+                                      // where this body is inlined.
   unsigned StartLabelID;              // Label ID of the beginning of scope.
   unsigned EndLabelID;                // Label ID of the end of scope.
   const MachineInstr *LastInsn;       // Last instruction of this scope.
@@ -157,14 +160,17 @@ class VISIBILITY_HIDDEN DbgScope {
   // Private state for dump()
   mutable unsigned IndentLevel;
 public:
-  DbgScope(DbgScope *P, DIDescriptor D)
-    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), LastInsn(0),
-      FirstInsn(0), IndentLevel(0) {}
+  DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
+    : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0), 
+      LastInsn(0), FirstInsn(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
   DbgScope *getParent()          const { return Parent; }
   DIDescriptor getDesc()         const { return Desc; }
+  MDNode *getInlinedAt()   const { 
+    return dyn_cast_or_null<MDNode>(InlinedAt); 
+  }
   unsigned getStartLabelID()     const { return StartLabelID; }
   unsigned getEndLabelID()       const { return EndLabelID; }
   SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
@@ -1296,29 +1302,39 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
 
 /// getOrCreateScope - Returns the scope associated with the given descriptor.
 ///
-DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI) {
+DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
+                                  MDNode *InlinedAt) {
   DbgScope *&Slot = DbgScopeMap[N];
   if (Slot) return Slot;
 
   DbgScope *Parent = NULL;
 
-  DIDescriptor Scope(N);
-  if (Scope.isCompileUnit()) {
-    return NULL;
-  } else if (Scope.isSubprogram()) {
-    DISubprogram SP(N);
-    DIDescriptor ParentDesc = SP.getContext();
-    if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
-      Parent = getDbgScope(ParentDesc.getNode(), MI);
-  } else if (Scope.isLexicalBlock()) {
-    DILexicalBlock DB(N);
-    DIDescriptor ParentDesc = DB.getContext();
-    if (!ParentDesc.isNull())
-      Parent = getDbgScope(ParentDesc.getNode(), MI);
-  } else
-    assert (0 && "Unexpected scope info");
-
-  Slot = new DbgScope(Parent, DIDescriptor(N));
+  if (InlinedAt) {
+    DILocation IL(InlinedAt);
+    assert (!IL.isNull() && "Invalid InlindAt location!");
+    DenseMap<MDNode *, DbgScope *>::iterator DSI = 
+      DbgScopeMap.find(IL.getScope().getNode());
+    assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!");
+    Parent = DSI->second;
+  } else {
+    DIDescriptor Scope(N);
+    if (Scope.isCompileUnit()) {
+      return NULL;
+    } else if (Scope.isSubprogram()) {
+      DISubprogram SP(N);
+      DIDescriptor ParentDesc = SP.getContext();
+      if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
+        Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
+    } else if (Scope.isLexicalBlock()) {
+      DILexicalBlock DB(N);
+      DIDescriptor ParentDesc = DB.getContext();
+      if (!ParentDesc.isNull())
+        Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
+    } else
+      assert (0 && "Unexpected scope info");
+  }
+
+  Slot = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
   Slot->setFirstInsn(MI);
 
   if (Parent)
@@ -1795,7 +1811,10 @@ void DwarfDebug::CollectVariableInfo() {
     DIVariable DV (Var);
     if (DV.isNull()) continue;
     unsigned VSlot = VI->second;
-    DbgScope *Scope = getDbgScope(DV.getContext().getNode(),  NULL);
+    DenseMap<MDNode *, DbgScope *>::iterator DSI = 
+      DbgScopeMap.find(DV.getContext().getNode());
+    assert (DSI != DbgScopeMap.end() && "Unable to find variable scope!");
+    DbgScope *Scope = DSI->second;
     Scope->AddVariable(new DbgVariable(DV, VSlot, false));
   }
 }
@@ -1849,7 +1868,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
       // into a scope DIE at the end.
       DIDescriptor D(DLT.Scope);
       if (!D.isCompileUnit()) {
-        DbgScope *Scope = getDbgScope(DLT.Scope, MInsn);
+        DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
         Scope->setLastInsn(MInsn);
       }
     }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index bd377c5593cc..7f711042c071 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -364,7 +364,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
   /// getDbgScope - Returns the scope associated with the given descriptor.
   ///
   DbgScope *getOrCreateScope(MDNode *N);
-  DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI);
+  DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
 
   /// ConstructDbgScope - Construct the components of a scope.
   ///
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 932fae4f316c..42b24a65b450 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -24,7 +24,7 @@
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(NumHeaderAligned, "Number of loop header aligned");
+STATISTIC(NumLoopsAligned,  "Number of loops aligned");
 STATISTIC(NumIntraElim,     "Number of intra loop branches eliminated");
 STATISTIC(NumIntraMoved,    "Number of intra loop branches moved");
 
@@ -42,9 +42,6 @@ namespace {
     SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4>
     UncondJmpMBBs;
 
-    /// LoopHeaders - A list of BBs which are loop headers.
-    SmallVector<MachineBasicBlock*, 4> LoopHeaders;
-
   public:
     static char ID;
     CodePlacementOpt() : MachineFunctionPass(&ID) {}
@@ -62,9 +59,8 @@ namespace {
 
   private:
     bool OptimizeIntraLoopEdges();
-    bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
-                               SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign);
     bool AlignLoops(MachineFunction &MF);
+    bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
   };
 
   char CodePlacementOpt::ID = 0;
@@ -233,57 +229,12 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges() {
       ChangedMBBs.insert(FtMBB);
     }
     Changed = true;
-
-    // If BB is the loop latch, we may have a new loop headr.
-    if (MBB == L->getLoopLatch()) {
-      assert(MLI->isLoopHeader(SuccMBB) &&
-             "Only succ of loop latch is not the header?");
-      if (HasOneIntraSucc && IntraSucc)
-        std::replace(LoopHeaders.begin(),LoopHeaders.end(), SuccMBB, IntraSucc);
-    }
   }
 
   ++NumIntraMoved;
   return Changed;
 }
 
-/// HeaderShouldBeAligned - Return true if the specified loop header block
-/// should be aligned. For now, we will not align it if all the predcessors
-/// (i.e. loop back edges) are laid out above the header. FIXME: Do not
-/// align small loops.
-bool
-CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
-                               SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign) {
-  if (DoNotAlign.count(MBB))
-    return false;
-
-  bool BackEdgeBelow = false;
-  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-         PE = MBB->pred_end(); PI != PE; ++PI) {
-    MachineBasicBlock *PredMBB = *PI;
-    if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) {
-      BackEdgeBelow = true;
-      break;
-    }
-  }
-
-  if (!BackEdgeBelow)
-    return false;
-
-  // Ok, we are going to align this loop header. If it's an inner loop,
-  // do not align its outer loop.
-  MachineBasicBlock *PreHeader = L->getLoopPreheader();
-  if (PreHeader) {
-    MachineLoop *L = MLI->getLoopFor(PreHeader);
-    if (L) {
-      MachineBasicBlock *HeaderBlock = L->getHeader();
-      HeaderBlock->setAlignment(0);
-      DoNotAlign.insert(HeaderBlock);
-    }
-  }
-  return true;
-}
-
 /// AlignLoops - Align loop headers to target preferred alignments.
 ///
 bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
@@ -295,26 +246,37 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
   if (!Align)
     return false;  // Don't care about loop alignment.
 
-  // Make sure blocks are numbered in order
-  MF.RenumberBlocks();
+  bool Changed = false;
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I)
+    Changed |= AlignLoop(MF, *I, Align);
 
+  return Changed;
+}
+
+bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
+                                 unsigned Align) {
   bool Changed = false;
-  SmallPtrSet<MachineBasicBlock*, 4> DoNotAlign;
-  for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) {
-    MachineBasicBlock *HeaderMBB = LoopHeaders[i];
-    MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB));
-    MachineLoop *L = MLI->getLoopFor(HeaderMBB);
-    if (L == MLI->getLoopFor(PredMBB))
-      // If previously BB is in the same loop, don't align this BB. We want
-      // to prevent adding noop's inside a loop.
-      continue;
-    if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) {
-      HeaderMBB->setAlignment(Align);
-      Changed = true;
-      ++NumHeaderAligned;
-    }
+
+  // Do alignment for nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    Changed |= AlignLoop(MF, *I, Align);
+
+  MachineBasicBlock *TopMBB = L->getHeader();
+  if (TopMBB == MF.begin()) return Changed;
+
+  MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(TopMBB));
+  while (MLI->getLoopFor(PredMBB) == L) {
+    TopMBB = PredMBB;
+    if (TopMBB == MF.begin()) return Changed;
+    PredMBB = prior(MachineFunction::iterator(TopMBB));
   }
 
+  TopMBB->setAlignment(Align);
+  Changed = true;
+  ++NumLoopsAligned;
+
   return Changed;
 }
 
@@ -326,7 +288,7 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
   TLI = MF.getTarget().getTargetLowering();
   TII = MF.getTarget().getInstrInfo();
 
-  // Analyze the BBs first and keep track of loop headers and BBs that
+  // Analyze the BBs first and keep track of BBs that
   // end with an unconditional jmp to another block in the same loop.
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = I;
@@ -335,8 +297,6 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
     MachineLoop *L = MLI->getLoopFor(MBB);
     if (!L)
       continue;
-    if (MLI->isLoopHeader(MBB))
-      LoopHeaders.push_back(MBB);
 
     MachineBasicBlock *TBB = 0, *FBB = 0;
     SmallVector<MachineOperand, 4> Cond;
@@ -352,7 +312,6 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
 
   ChangedMBBs.clear();
   UncondJmpMBBs.clear();
-  LoopHeaders.clear();
 
   return Changed;
 }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 139e0291ea7a..96c655c1a9b8 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -323,10 +323,21 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       // The last partial def kills the register.
       LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
                                                 true/*IsImp*/, true/*IsKill*/));
-    else
+    else {
+      MachineOperand *MO =
+        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
       // If the last reference is the last def, then it's not used at all.
       // That is, unless we are currently processing the last reference itself.
       LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+      if (NeedEC) {
+        // If we are adding a subreg def and the superreg def is marked early
+        // clobber, add an early clobber marker to the subreg def.
+        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+        if (MO)
+          MO->setIsEarlyClobber();
+      }
+    }
   } else if (!PhysRegUse[Reg]) {
     // Partial uses. Mark register def dead and add implicit def of
     // sub-registers which are used.
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index cbe5c7cb51e3..b9d3ba78794e 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -212,17 +212,17 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
         isEarlyClobber()) {
       OS << '<';
       bool NeedComma = false;
-      if (isImplicit()) {
-        if (NeedComma) OS << ',';
-        OS << (isDef() ? "imp-def" : "imp-use");
-        NeedComma = true;
-      } else if (isDef()) {
+      if (isDef()) {
         if (NeedComma) OS << ',';
         if (isEarlyClobber())
           OS << "earlyclobber,";
+        if (isImplicit())
+          OS << "imp-";
         OS << "def";
         NeedComma = true;
-      }
+      } else if (isImplicit())
+          OS << "imp-use";
+
       if (isKill() || isDead() || isUndef()) {
         if (NeedComma) OS << ',';
         if (isKill())  OS << "kill";
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 8793df7705fa..7af0bba19735 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -767,7 +767,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
     unsigned CurrentScratchReg = 0;
     bool havePrevValue = false;
     unsigned PrevScratchReg = 0;
-    int PrevValue;
+    int PrevValue = 0;
     MachineInstr *PrevLastUseMI = NULL;
     unsigned PrevLastUseOp = 0;
     bool trackingCurrentValue = false;
@@ -778,9 +778,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
     // directly.
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
       MachineInstr *MI = I;
-      // Likewise, call getNumOperands() each iteration, as the MI may change
-      // inside the loop (with 'i' updated accordingly).
-      for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
         if (MI->getOperand(i).isReg()) {
           MachineOperand &MO = MI->getOperand(i);
           unsigned Reg = MO.getReg();
@@ -853,6 +851,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
               // just calculating the value we already have.
               BB->erase(I, LastUseMI);
               MI = I = LastUseMI;
+              e = MI->getNumOperands();
 
               CurrentScratchReg = PrevScratchReg;
               // Extend the live range of the register
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 309e3ba2ac25..b0bd04bade40 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -74,16 +74,28 @@ namespace {
 
     /// CPUser - One user of a constant pool, keeping the machine instruction
     /// pointer, the constant pool being referenced, and the max displacement
-    /// allowed from the instruction to the CP.
+    /// allowed from the instruction to the CP.  The HighWaterMark records the
+    /// highest basic block where a new CPEntry can be placed.  To ensure this
+    /// pass terminates, the CP entries are initially placed at the end of the
+    /// function and then move monotonically to lower addresses.  The
+    /// exception to this rule is when the current CP entry for a particular
+    /// CPUser is out of range, but there is another CP entry for the same
+    /// constant value in range.  We want to use the existing in-range CP
+    /// entry, but if it later moves out of range, the search for new water
+    /// should resume where it left off.  The HighWaterMark is used to record
+    /// that point.
     struct CPUser {
       MachineInstr *MI;
       MachineInstr *CPEMI;
+      MachineBasicBlock *HighWaterMark;
       unsigned MaxDisp;
       bool NegOk;
       bool IsSoImm;
       CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
              bool neg, bool soimm)
-        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {}
+        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+        HighWaterMark = CPEMI->getParent();
+      }
     };
 
     /// CPUsers - Keep track of all of the machine instructions that use various
@@ -962,8 +974,8 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
          B = WaterList.begin();; --IP) {
     MachineBasicBlock* WaterBB = *IP;
     // Check if water is in range and at a lower address than the current one.
-    if (WaterIsInRange(UserOffset, WaterBB, U) &&
-        WaterBB->getNumber() < U.CPEMI->getParent()->getNumber()) {
+    if (WaterBB->getNumber() < U.HighWaterMark->getNumber() &&
+        WaterIsInRange(UserOffset, WaterBB, U)) {
       unsigned WBBId = WaterBB->getNumber();
       if (isThumb &&
           (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
@@ -1006,14 +1018,12 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
                                BBSizes[UserMBB->getNumber()];
   assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
 
-  // If the use is at the end of the block, or the end of the block
-  // is within range, make new water there.  (The addition below is
-  // for the unconditional branch we will be adding:  4 bytes on ARM + Thumb2,
-  // 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
+  // If the block does not end in an unconditional branch already, and if the
+  // end of the block is within range, make new water there.  (The addition
+  // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+  // Thumb2, 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
   // inside OffsetIsInRange.
-  // If the block ends in an unconditional branch already, it is water,
-  // and is known to be out of range, so we'll always be adding a branch.)
-  if (&UserMBB->back() == UserMI ||
+  if (BBHasFallthrough(UserMBB) &&
       OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
                       U.MaxDisp, U.NegOk, U.IsSoImm)) {
     DEBUG(errs() << "Split at end of block\n");
@@ -1131,6 +1141,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
 
   // Now that we have an island to add the CPE to, clone the original CPE and
   // add it to the island.
+  U.HighWaterMark = NewIsland;
   U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
                     TII->get(ARM::CONSTPOOL_ENTRY))
                 .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bebf4e839994..c39de0a12b8f 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -133,6 +133,13 @@ private:
   SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
                     unsigned *QOpcodes0, unsigned *QOpcodes1);
 
+  /// SelectVST - Select NEON store intrinsics.  NumVecs should
+  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// stores of D registers and even subregs and odd subregs of Q registers.
+  /// For NumVecs == 2, QOpcodes1 is not used.
+  SDNode *SelectVST(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
+                    unsigned *QOpcodes0, unsigned *QOpcodes1);
+
   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
   /// load/store of D registers and even subregs and odd subregs of Q registers.
@@ -1063,13 +1070,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
     ResTys.push_back(MemAddr.getValueType());
     ResTys.push_back(MVT::Other);
 
-    // Load the even subreg.
+    // Load the even subregs.
     unsigned Opc = QOpcodes0[OpcodeIndex];
     const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
     SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
     Chain = SDValue(VLdA, NumVecs+1);
 
-    // Load the odd subreg.
+    // Load the odd subregs.
     Opc = QOpcodes1[OpcodeIndex];
     const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
     SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
@@ -1085,6 +1092,95 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
   return NULL;
 }
 
+SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
+                                   unsigned *DOpcodes, unsigned *QOpcodes0,
+                                   unsigned *QOpcodes1) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+  SDNode *N = Op.getNode();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, MemUpdate, MemOpc;
+  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getOperand(3).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vst type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v1i64: OpcodeIndex = 3; break;
+    // Quad-register operations:
+  case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 2; break;
+  }
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(MemUpdate);
+  Ops.push_back(MemOpc);
+
+  if (is64BitVector) {
+    unsigned Opc = DOpcodes[OpcodeIndex];
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      Ops.push_back(N->getOperand(Vec+3));
+    Ops.push_back(Chain);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
+  }
+
+  EVT RegVT = GetNEONSubregVT(VT);
+  if (NumVecs == 2) {
+    // Quad registers are directly supported for VST2,
+    // storing 2 pairs of D regs.
+    unsigned Opc = QOpcodes0[OpcodeIndex];
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                   N->getOperand(Vec+3)));
+      Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                   N->getOperand(Vec+3)));
+    }
+    Ops.push_back(Chain);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
+  }
+
+  // Otherwise, quad registers are stored with two separate instructions,
+  // where one stores the even registers and the other stores the odd registers.
+
+  // Enable writeback to the address register.
+  MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+
+  // Store the even subregs.
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+                                                 N->getOperand(Vec+3)));
+  Ops.push_back(Chain);
+  unsigned Opc = QOpcodes0[OpcodeIndex];
+  SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                        MVT::Other, Ops.data(), NumVecs+4);
+  Chain = SDValue(VStA, 1);
+
+  // Store the odd subregs.
+  Ops[0] = SDValue(VStA, 0); // MemAddr
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+                                                N->getOperand(Vec+3));
+  Ops[NumVecs+3] = Chain;
+  Opc = QOpcodes1[OpcodeIndex];
+  SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+                                        MVT::Other, Ops.data(), NumVecs+4);
+  Chain = SDValue(VStB, 1);
+  ReplaceUses(SDValue(N, 0), Chain);
+  return NULL;
+}
+
 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
                                          unsigned NumVecs, unsigned *DOpcodes,
                                          unsigned *QOpcodes0,
@@ -1612,9 +1708,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
-    EVT VT = N->getValueType(0);
-    unsigned Opc = 0;
-
     switch (IntNo) {
     default:
       break;
@@ -1664,178 +1757,26 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     }
 
     case Intrinsic::arm_neon_vst2: {
-      SDValue MemAddr, MemUpdate, MemOpc;
-      if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
-        return NULL;
-      SDValue Chain = N->getOperand(0);
-      VT = N->getOperand(3).getValueType();
-      if (VT.is64BitVector()) {
-        switch (VT.getSimpleVT().SimpleTy) {
-        default: llvm_unreachable("unhandled vst2 type");
-        case MVT::v8i8:  Opc = ARM::VST2d8; break;
-        case MVT::v4i16: Opc = ARM::VST2d16; break;
-        case MVT::v2f32:
-        case MVT::v2i32: Opc = ARM::VST2d32; break;
-        case MVT::v1i64: Opc = ARM::VST2d64; break;
-        }
-        const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
-                                N->getOperand(3), N->getOperand(4), Chain };
-        return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6);
-      }
-      // Quad registers are stored as pairs of double registers.
-      EVT RegVT;
-      switch (VT.getSimpleVT().SimpleTy) {
-      default: llvm_unreachable("unhandled vst2 type");
-      case MVT::v16i8: Opc = ARM::VST2q8; RegVT = MVT::v8i8; break;
-      case MVT::v8i16: Opc = ARM::VST2q16; RegVT = MVT::v4i16; break;
-      case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break;
-      case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break;
-      }
-      SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(3));
-      SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(3));
-      SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(4));
-      SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(4));
-      const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
-                              D0, D1, D2, D3, Chain };
-      return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
+      unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+                              ARM::VST2d32, ARM::VST2d64 };
+      unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+      return SelectVST(Op, 2, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vst3: {
-      SDValue MemAddr, MemUpdate, MemOpc;
-      if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
-        return NULL;
-      SDValue Chain = N->getOperand(0);
-      VT = N->getOperand(3).getValueType();
-      if (VT.is64BitVector()) {
-        switch (VT.getSimpleVT().SimpleTy) {
-        default: llvm_unreachable("unhandled vst3 type");
-        case MVT::v8i8:  Opc = ARM::VST3d8; break;
-        case MVT::v4i16: Opc = ARM::VST3d16; break;
-        case MVT::v2f32:
-        case MVT::v2i32: Opc = ARM::VST3d32; break;
-        case MVT::v1i64: Opc = ARM::VST3d64; break;
-        }
-        const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
-                                N->getOperand(3), N->getOperand(4),
-                                N->getOperand(5), Chain };
-        return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7);
-      }
-      // Quad registers are stored with two separate instructions, where one
-      // stores the even registers and the other stores the odd registers.
-      EVT RegVT;
-      unsigned Opc2 = 0;
-      switch (VT.getSimpleVT().SimpleTy) {
-      default: llvm_unreachable("unhandled vst3 type");
-      case MVT::v16i8:
-        Opc = ARM::VST3q8a;  Opc2 = ARM::VST3q8b;  RegVT = MVT::v8i8; break;
-      case MVT::v8i16:
-        Opc = ARM::VST3q16a; Opc2 = ARM::VST3q16b; RegVT = MVT::v4i16; break;
-      case MVT::v4f32:
-        Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2f32; break;
-      case MVT::v4i32:
-        Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break;
-      }
-      // Enable writeback to the address register.
-      MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
-      SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(3));
-      SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(4));
-      SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(5));
-      const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, D0, D2, D4, Chain };
-      SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                            MVT::Other, OpsA, 7);
-      Chain = SDValue(VStA, 1);
-
-      SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(3));
-      SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(4));
-      SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(5));
-      MemAddr = SDValue(VStA, 0);
-      const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, D1, D3, D5, Chain };
-      SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(),
-                                            MVT::Other, OpsB, 7);
-      Chain = SDValue(VStB, 1);
-      ReplaceUses(SDValue(N, 0), Chain);
-      return NULL;
+      unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
+                              ARM::VST3d32, ARM::VST3d64 };
+      unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
+      unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+      return SelectVST(Op, 3, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst4: {
-      SDValue MemAddr, MemUpdate, MemOpc;
-      if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
-        return NULL;
-      SDValue Chain = N->getOperand(0);
-      VT = N->getOperand(3).getValueType();
-      if (VT.is64BitVector()) {
-        switch (VT.getSimpleVT().SimpleTy) {
-        default: llvm_unreachable("unhandled vst4 type");
-        case MVT::v8i8:  Opc = ARM::VST4d8; break;
-        case MVT::v4i16: Opc = ARM::VST4d16; break;
-        case MVT::v2f32:
-        case MVT::v2i32: Opc = ARM::VST4d32; break;
-        case MVT::v1i64: Opc = ARM::VST4d64; break;
-        }
-        const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
-                                N->getOperand(3), N->getOperand(4),
-                                N->getOperand(5), N->getOperand(6), Chain };
-        return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
-      }
-      // Quad registers are stored with two separate instructions, where one
-      // stores the even registers and the other stores the odd registers.
-      EVT RegVT;
-      unsigned Opc2 = 0;
-      switch (VT.getSimpleVT().SimpleTy) {
-      default: llvm_unreachable("unhandled vst4 type");
-      case MVT::v16i8:
-        Opc = ARM::VST4q8a;  Opc2 = ARM::VST4q8b;  RegVT = MVT::v8i8; break;
-      case MVT::v8i16:
-        Opc = ARM::VST4q16a; Opc2 = ARM::VST4q16b; RegVT = MVT::v4i16; break;
-      case MVT::v4f32:
-        Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2f32; break;
-      case MVT::v4i32:
-        Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break;
-      }
-      // Enable writeback to the address register.
-      MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
-      SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(3));
-      SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(4));
-      SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(5));
-      SDValue D6 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
-                                                  N->getOperand(6));
-      const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc,
-                               D0, D2, D4, D6, Chain };
-      SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                            MVT::Other, OpsA, 8);
-      Chain = SDValue(VStA, 1);
-
-      SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(3));
-      SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(4));
-      SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(5));
-      SDValue D7 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
-                                                  N->getOperand(6));
-      MemAddr = SDValue(VStA, 0);
-      const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc,
-                               D1, D3, D5, D7, Chain };
-      SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(),
-                                            MVT::Other, OpsB, 8);
-      Chain = SDValue(VStB, 1);
-      ReplaceUses(SDValue(N, 0), Chain);
-      return NULL;
+      unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
+                              ARM::VST4d32, ARM::VST4d64 };
+      unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
+      unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+      return SelectVST(Op, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 
     case Intrinsic::arm_neon_vst2lane: {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 8adfac3fb4c5..6ec78bc72ee7 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -377,12 +377,15 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
                IIC_iALUr, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+    let Inst{4} = 0;
     let Inst{25} = 0;
     let isCommutable = Commutable;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                IIC_iALUsr, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
     let Inst{25} = 0;
   }
 }
@@ -401,11 +404,14 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
                IIC_iALUr, opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
     let isCommutable = Commutable;
+    let Inst{4} = 0;
     let Inst{25} = 0;
   }
   def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                IIC_iALUsr, opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
     let Inst{25} = 0;
   }
 }
@@ -426,6 +432,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
                opc, " $a, $b",
                [(opnode GPR:$a, GPR:$b)]> {
+    let Inst{4} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
     let isCommutable = Commutable;
@@ -433,6 +440,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
                opc, " $a, $b",
                [(opnode GPR:$a, so_reg:$b)]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
@@ -486,12 +495,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
     let isCommutable = Commutable;
+    let Inst{4} = 0;
     let Inst{25} = 0;
   }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                 DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUnused]> {
+    let Inst{4} = 1;
+    let Inst{7} = 0;
     let Inst{25} = 0;
   }
   // Carry setting variants
@@ -507,6 +519,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
+    let Inst{4} = 0;
     let Inst{25} = 0;
   }
   def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
@@ -514,6 +527,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
                Requires<[IsARM, CarryDefIsUsed]> {
     let Defs = [CPSR];
+    let Inst{4} = 1;
+    let Inst{7} = 0;
     let Inst{25} = 0;
   }
 }
@@ -924,10 +939,18 @@ def STM : AXI4st<(outs),
 
 let neverHasSideEffects = 1 in
 def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
-                "mov", " $dst, $src", []>, UnaryDP;
+                "mov", " $dst, $src", []>, UnaryDP {
+  let Inst{4} = 0;
+  let Inst{25} = 0;
+}
+
 def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), 
                 DPSoRegFrm, IIC_iMOVsr,
-                "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
+                "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
+  let Inst{4} = 1;
+  let Inst{7} = 0;
+  let Inst{25} = 0;
+}
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
@@ -1146,10 +1169,15 @@ def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
 
 def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
                   "mvn", " $dst, $src",
-                  [(set GPR:$dst, (not GPR:$src))]>, UnaryDP;
+                  [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
+  let Inst{4} = 0;
+}
 def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
                   IIC_iMOVsr, "mvn", " $dst, $src",
-                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
+                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
+  let Inst{4} = 1;
+  let Inst{7} = 0;
+}
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, 
                   IIC_iMOVi, "mvn", " $dst, $imm",
@@ -1461,20 +1489,27 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
 def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
                 IIC_iCMOVr, "mov", " $dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst">, UnaryDP;
+                RegConstraint<"$false = $dst">, UnaryDP {
+  let Inst{4} = 0;
+  let Inst{25} = 0;
+}
 
 def MOVCCs : AI1<0b1101, (outs GPR:$dst),
                         (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr,
                 "mov", " $dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $dst">, UnaryDP;
+                RegConstraint<"$false = $dst">, UnaryDP {
+  let Inst{4} = 1;
+  let Inst{7} = 0;
+  let Inst{25} = 0;
+}
 
 def MOVCCi : AI1<0b1101, (outs GPR:$dst),
                         (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
                 "mov", " $dst, $true",
    [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $dst">, UnaryDP {
-    let Inst{25} = 1;
+  let Inst{25} = 1;
 }
 
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index cd370aa97adb..b34aff7dcb3e 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1951,9 +1951,9 @@ defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u",
                              int_arm_neon_vpaddlu>;
 
 //   VPADAL   : Vector Pairwise Add and Accumulate Long
-defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s",
+defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s",
                               int_arm_neon_vpadals>;
-defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u",
+defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u",
                               int_arm_neon_vpadalu>;
 
 //   VPMAX    : Vector Pairwise Maximum
@@ -2038,7 +2038,7 @@ defm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
 defm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
                             IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>;
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
+defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
 //   VSHR     : Vector Shift Right (Immediate)
 defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>;
 defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7c96c49a34b9..f635af3974d8 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -56,6 +56,7 @@
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
@@ -186,7 +187,7 @@ namespace {
 
     /// Builder - This is an IRBuilder that automatically inserts new
     /// instructions into the worklist when they are created.
-    typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy;
+    typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
     BuilderTy *Builder;
         
     static char ID; // Pass identification, replacement for typeid
@@ -12675,16 +12676,19 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 /// many instructions are dead or constant).  Additionally, if we find a branch
 /// whose condition is a known constant, we only visit the reachable successors.
 ///
-static void AddReachableCodeToWorklist(BasicBlock *BB, 
+static bool AddReachableCodeToWorklist(BasicBlock *BB, 
                                        SmallPtrSet<BasicBlock*, 64> &Visited,
                                        InstCombiner &IC,
                                        const TargetData *TD) {
+  bool MadeIRChange = false;
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
   
   std::vector<Instruction*> InstrsForInstCombineWorklist;
   InstrsForInstCombineWorklist.reserve(128);
 
+  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
+  
   while (!Worklist.empty()) {
     BB = Worklist.back();
     Worklist.pop_back();
@@ -12704,14 +12708,38 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
       }
       
       // ConstantProp instruction if trivially constant.
-      if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
-        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
-                     << *Inst << '\n');
-        Inst->replaceAllUsesWith(C);
-        ++NumConstProp;
-        Inst->eraseFromParent();
-        continue;
+      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+        if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                       << *Inst << '\n');
+          Inst->replaceAllUsesWith(C);
+          ++NumConstProp;
+          Inst->eraseFromParent();
+          continue;
+        }
+      
+      
+      
+      if (TD) {
+        // See if we can constant fold its operands.
+        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+             i != e; ++i) {
+          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+          if (CE == 0) continue;
+          
+          // If we already folded this constant, don't try again.
+          if (!FoldedConstants.insert(CE))
+            continue;
+          
+          Constant *NewC =
+            ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+          if (NewC && NewC != CE) {
+            *i = NewC;
+            MadeIRChange = true;
+          }
+        }
       }
+      
 
       InstrsForInstCombineWorklist.push_back(Inst);
     }
@@ -12753,11 +12781,12 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
   // some N^2 behavior in pathological cases.
   IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
                               InstrsForInstCombineWorklist.size());
+  
+  return MadeIRChange;
 }
 
 bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
   MadeIRChange = false;
-  TD = getAnalysisIfAvailable<TargetData>();
   
   DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
         << F.getNameStr() << "\n");
@@ -12767,7 +12796,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     // the reachable instructions.  Ignore blocks that are not reachable.  Keep
     // track of which blocks we visit.
     SmallPtrSet<BasicBlock*, 64> Visited;
-    AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
 
     // Do a quick scan over the function.  If we find any blocks that are
     // unreachable, remove any instructions inside of them.  This prevents
@@ -12786,7 +12815,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
             MadeIRChange = true;
           }
 
-
           // If I is not void type then replaceAllUsesWith undef.
           // This allows ValueHandlers and custom metadata to adjust itself.
           if (!I->getType()->isVoidTy())
@@ -12810,28 +12838,17 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     }
 
     // Instruction isn't dead, see if we can constant propagate it.
-    if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
-      DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+      if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
-      // Add operands to the worklist.
-      ReplaceInstUsesWith(*I, C);
-      ++NumConstProp;
-      EraseInstFromFunction(*I);
-      MadeIRChange = true;
-      continue;
-    }
-
-    if (TD) {
-      // See if we can constant fold its operands.
-      for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i))
-          if (Constant *NewC = ConstantFoldConstantExpression(CE,   
-                                  F.getContext(), TD))
-            if (NewC != CE) {
-              *i = NewC;
-              MadeIRChange = true;
-            }
-    }
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
 
     // See if we can trivially sink this instruction to a successor basic block.
     if (I->hasOneUse()) {
@@ -12927,12 +12944,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 bool InstCombiner::runOnFunction(Function &F) {
   MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
   Context = &F.getContext();
-  
+  TD = getAnalysisIfAvailable<TargetData>();
+
   
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
-  IRBuilder<true, ConstantFolder, InstCombineIRInserter> 
-    TheBuilder(F.getContext(), ConstantFolder(F.getContext()),
+  IRBuilder<true, TargetFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
                InstCombineIRInserter(Worklist));
   Builder = &TheBuilder;
   
diff --git a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
index 3ccbc2f04f37..5ea26e76a6af 100644
--- a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
+++ b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -1,16 +1,16 @@
-; RUN: opt -gvn -S < %s | FileCheck %s
+; RUN: opt -gvn -instcombine -S < %s | FileCheck %s
 
 declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8)
 
-define void @foo(i8* %ptr) {
+define i8 @foo(i8* %ptr) {
   %P = getelementptr i8* %ptr, i32 0
   %Q = getelementptr i8* %ptr, i32 1
 ; CHECK: getelementptr
   %X = load i8* %P
-; CHECK: = load
   %Y = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %Q, i8 1)
   %Z = load i8* %P
 ; CHECK-NOT: = load
-  ret void
-; CHECK: ret void
+  %A = sub i8 %X, %Z
+  ret i8 %A
+; CHECK: ret i8 0
 }
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
index 03e69e7a1a49..fc9d1f0428fb 100644
--- a/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=x86 | grep align | count 3
+; RUN: llc < %s -march=x86 | grep align | count 4
+
+; TODO: Is it a good idea to align inner loops? It's hard to know without
+; knowing what their trip counts are, or other dynamic information. For
+; now, CodeGen aligns all loops.
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index 3e68f9486cfa..d4c5c6723243 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -1,4 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep align | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CodeGen should align the top of the loop, which differs from the loop
+; header in this case.
+
+; CHECK: jmp LBB1_2
+; CHECK: .align
+; CHECK: LBB1_1:
 
 @A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
 
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index fe0e03649ddc..5bf39e5b2946 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -66,8 +66,7 @@ NoOutput("disable-output",
          cl::desc("Do not write result bitcode file"), cl::Hidden);
 
 static cl::opt<bool>
-OutputAssembly("S",
-         cl::desc("Write output as LLVM assembly"), cl::Hidden);
+OutputAssembly("S", cl::desc("Write output as LLVM assembly"));
 
 static cl::opt<bool>
 NoVerify("disable-verify", cl::desc("Do not verify result module"), cl::Hidden);
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 55d37493ea5a..d7aaea7134fc 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -166,6 +166,7 @@ TEST_F(JITTest, FarCallToKnownFunction) {
   EXPECT_EQ(8, TestFunctionPtr());
 }
 
+#if !defined(__arm__) && !defined(__ppc__)
 // Test a function C which calls A and B which call each other.
 TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
   TheJIT->DisableLazyCompilation();
@@ -220,6 +221,7 @@ TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
 
   F1Ptr();
 }
+#endif
 
 // Regression test for PR5162.  This used to trigger an AssertingVH inside the
 // JIT's Function to stub mapping.
author	Roman Divacky <rdivacky@FreeBSD.org>	2009-10-15 07:47:49 +0000
committer	Roman Divacky <rdivacky@FreeBSD.org>	2009-10-15 07:47:49 +0000
commit	26137f35958fa2237eceb06ef7003e3c19159129 (patch)
tree	6441ce5f8a25ef18b4a8082f3cc834f7c8556f61
parent	59850d0874429601812bc13408cb1f776649027c (diff)