summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Divacky <rdivacky@FreeBSD.org>2009-10-15 07:47:49 +0000
committerRoman Divacky <rdivacky@FreeBSD.org>2009-10-15 07:47:49 +0000
commit26137f35958fa2237eceb06ef7003e3c19159129 (patch)
tree6441ce5f8a25ef18b4a8082f3cc834f7c8556f61
parent59850d0874429601812bc13408cb1f776649027c (diff)
downloadsrc-test2-26137f35958fa2237eceb06ef7003e3c19159129.tar.gz
src-test2-26137f35958fa2237eceb06ef7003e3c19159129.zip
Notes
-rw-r--r--include/llvm/ADT/DenseMapInfo.h2
-rw-r--r--include/llvm/ADT/ImmutableSet.h4
-rw-r--r--include/llvm/CodeGen/ScheduleDAG.h4
-rw-r--r--include/llvm/CompilerDriver/CompilationGraph.h2
-rw-r--r--include/llvm/ExecutionEngine/JITMemoryManager.h2
-rw-r--r--include/llvm/Metadata.h6
-rw-r--r--include/llvm/Operator.h4
-rw-r--r--include/llvm/Pass.h4
-rw-r--r--include/llvm/Support/CommandLine.h4
-rw-r--r--include/llvm/Support/DebugLoc.h4
-rw-r--r--include/llvm/Support/raw_ostream.h26
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp42
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp67
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h2
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp101
-rw-r--r--lib/CodeGen/LiveVariables.cpp13
-rw-r--r--lib/CodeGen/MachineInstr.cpp12
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp7
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp33
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp283
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td49
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td6
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp90
-rw-r--r--test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll10
-rw-r--r--test/CodeGen/X86/avoid-loop-align-2.ll6
-rw-r--r--test/CodeGen/X86/avoid-loop-align.ll9
-rw-r--r--tools/opt/opt.cpp3
-rw-r--r--unittests/ExecutionEngine/JIT/JITTest.cpp2
28 files changed, 420 insertions, 377 deletions
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 632728bf0d17..52ac5f9c5dfb 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -89,7 +89,7 @@ template<> struct DenseMapInfo<unsigned long long> {
static inline unsigned long long getEmptyKey() { return ~0ULL; }
static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
static unsigned getHashValue(const unsigned long long& Val) {
- return Val * 37ULL;
+ return (unsigned)Val * 37ULL;
}
static bool isPod() { return true; }
static bool isEqual(const unsigned long long& LHS,
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 14f4ac8123d6..86279099e32d 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -988,8 +988,8 @@ public:
BumpPtrAllocator& getAllocator() { return F.getAllocator(); }
private:
- Factory(const Factory& RHS) {};
- void operator=(const Factory& RHS) {};
+ Factory(const Factory& RHS) {}
+ void operator=(const Factory& RHS) {}
};
friend class Factory;
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 39563f733068..fdbbb1ee20ea 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -500,8 +500,8 @@ namespace llvm {
/// ComputeOperandLatency - Override dependence edge latency using
/// operand use/def information
///
- virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
- SDep& dep) const { };
+ virtual void ComputeOperandLatency(SUnit *, SUnit *,
+ SDep&) const { }
/// Schedule - Order nodes according to selected style, filling
/// in the Sequence member.
diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h
index 3daafd58a7c2..ba6ff4714ce4 100644
--- a/include/llvm/CompilerDriver/CompilationGraph.h
+++ b/include/llvm/CompilerDriver/CompilationGraph.h
@@ -43,7 +43,7 @@ namespace llvmc {
class Edge : public llvm::RefCountedBaseVPTR<Edge> {
public:
Edge(const std::string& T) : ToolName_(T) {}
- virtual ~Edge() {};
+ virtual ~Edge() {}
const std::string& ToolName() const { return ToolName_; }
virtual unsigned Weight(const InputLanguagesSet& InLangs) const = 0;
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 21dee553474c..9f6fb63fbe56 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -149,7 +149,7 @@ public:
/// CheckInvariants - For testing only. Return true if all internal
/// invariants are preserved, or return false and set ErrorStr to a helpful
/// error message.
- virtual bool CheckInvariants(std::string &ErrorStr) {
+ virtual bool CheckInvariants(std::string &) {
return true;
}
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index 63c2da2e7dfd..13b97b992ce2 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -181,7 +181,7 @@ public:
/// duplicates
void Profile(FoldingSetNodeID &ID) const;
- virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+ virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
llvm_unreachable("This should never be called because MDNodes have no ops");
}
@@ -291,7 +291,7 @@ public:
return false;
}
- virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+ virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
llvm_unreachable(
"This should never be called because NamedMDNodes have no ops");
}
@@ -361,7 +361,7 @@ public:
/// ValueIsDeleted - This handler is used to update metadata store
/// when a value is deleted.
- void ValueIsDeleted(const Value *V) {}
+ void ValueIsDeleted(const Value *) {}
void ValueIsDeleted(const Instruction *Inst) {
removeMDs(Inst);
}
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 2b5cc57e75dd..60865aa8ad45 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -57,8 +57,8 @@ public:
}
static inline bool classof(const Operator *) { return true; }
- static inline bool classof(const Instruction *I) { return true; }
- static inline bool classof(const ConstantExpr *I) { return true; }
+ static inline bool classof(const Instruction *) { return true; }
+ static inline bool classof(const ConstantExpr *) { return true; }
static inline bool classof(const Value *V) {
return isa<Instruction>(V) || isa<ConstantExpr>(V);
}
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index eb4c92281c9b..27919365227c 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -276,7 +276,7 @@ public:
/// doInitialization - Virtual method overridden by subclasses to do
/// any necessary per-module initialization.
///
- virtual bool doInitialization(Module &M) { return false; }
+ virtual bool doInitialization(Module &) { return false; }
/// runOnFunction - Virtual method overriden by subclasses to do the
/// per-function processing of the pass.
@@ -328,7 +328,7 @@ public:
/// doInitialization - Virtual method overridden by subclasses to do
/// any necessary per-module initialization.
///
- virtual bool doInitialization(Module &M) { return false; }
+ virtual bool doInitialization(Module &) { return false; }
/// doInitialization - Virtual method overridden by BasicBlockPass subclasses
/// to do any necessary per-function initialization.
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index dc73979bb09b..ca32f75b2888 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -660,7 +660,7 @@ template<>
class parser<std::string> : public basic_parser<std::string> {
public:
// parse - Return true on error.
- bool parse(Option &, StringRef ArgName, StringRef Arg, std::string &Value) {
+ bool parse(Option &, StringRef, StringRef Arg, std::string &Value) {
Value = Arg.str();
return false;
}
@@ -681,7 +681,7 @@ template<>
class parser<char> : public basic_parser<char> {
public:
// parse - Return true on error.
- bool parse(Option &, StringRef ArgName, StringRef Arg, char &Value) {
+ bool parse(Option &, StringRef, StringRef Arg, char &Value) {
Value = Arg[0];
return false;
}
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 55c3c4ffbd76..362390f62876 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -29,10 +29,10 @@ namespace llvm {
unsigned Line, Col;
DebugLocTuple()
- : Scope(0), InlinedAtLoc(0), Line(~0U), Col(~0U) {};
+ : Scope(0), InlinedAtLoc(0), Line(~0U), Col(~0U) {}
DebugLocTuple(MDNode *n, MDNode *i, unsigned l, unsigned c)
- : Scope(n), InlinedAtLoc(i), Line(l), Col(c) {};
+ : Scope(n), InlinedAtLoc(i), Line(l), Col(c) {}
bool operator==(const DebugLocTuple &DLT) const {
return Scope == DLT.Scope &&
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 7827dd83804b..8012b6fdfc08 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -51,7 +51,7 @@ private:
/// for a \see write_impl() call to handle the data which has been put into
/// this buffer.
char *OutBufStart, *OutBufEnd, *OutBufCur;
-
+
enum BufferKind {
Unbuffered = 0,
InternalBuffer,
@@ -211,7 +211,7 @@ public:
return *this;
}
- raw_ostream &operator<<(double N);
+ raw_ostream &operator<<(double N);
/// write_hex - Output \arg N in hexadecimal, without any prefix or padding.
raw_ostream &write_hex(unsigned long long N);
@@ -224,8 +224,8 @@ public:
/// indent - Insert 'NumSpaces' spaces.
raw_ostream &indent(unsigned NumSpaces);
-
-
+
+
/// Changes the foreground color of text that will be output from this point
/// forward.
/// @param colors ANSI color to use, the special SAVEDCOLOR can be used to
@@ -233,8 +233,8 @@ public:
/// @param bold bold/brighter text, default false
/// @param bg if true change the background, default: change foreground
/// @returns itself so it can be used within << invocations
- virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
- bool bg=false) { return *this; }
+ virtual raw_ostream &changeColor(enum Colors, bool = false,
+ bool = false) { return *this; }
/// Resets the colors to terminal defaults. Call this when you are done
/// outputting colored text, or before program exit.
@@ -253,7 +253,7 @@ private:
/// write_impl - The is the piece of the class that is implemented
/// by subclasses. This writes the \args Size bytes starting at
/// \arg Ptr to the underlying stream.
- ///
+ ///
/// This function is guaranteed to only be called at a point at which it is
/// safe for the subclass to install a new buffer via SetBuffer.
///
@@ -331,7 +331,7 @@ class raw_fd_ostream : public raw_ostream {
virtual size_t preferred_buffer_size();
public:
-
+
enum {
/// F_Excl - When opening a file, this flag makes raw_fd_ostream
/// report an error if the file already exists.
@@ -346,7 +346,7 @@ public:
/// make this distinction.
F_Binary = 4
};
-
+
/// raw_fd_ostream - Open the specified file for writing. If an error occurs,
/// information about the error is put into ErrorInfo, and the stream should
/// be immediately destroyed; the string will be empty if no error occurred.
@@ -359,10 +359,10 @@ public:
/// raw_fd_ostream ctor - FD is the file descriptor that this writes to. If
/// ShouldClose is true, this closes the file when the stream is destroyed.
- raw_fd_ostream(int fd, bool shouldClose,
- bool unbuffered=false) : raw_ostream(unbuffered), FD(fd),
+ raw_fd_ostream(int fd, bool shouldClose,
+ bool unbuffered=false) : raw_ostream(unbuffered), FD(fd),
ShouldClose(shouldClose) {}
-
+
~raw_fd_ostream();
/// close - Manually flush the stream and close the file.
@@ -465,7 +465,7 @@ public:
class raw_null_ostream : public raw_ostream {
/// write_impl - See raw_ostream::write_impl.
virtual void write_impl(const char *Ptr, size_t size);
-
+
/// current_pos - Return the current position within the stream, not
/// counting the bytes currently in the buffer.
virtual uint64_t current_pos();
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 2c4efc4985b3..c5be6167e060 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -310,6 +310,28 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ unsigned Len = ~0U;
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3)))
+ Len = LenCI->getZExtValue();
+ Value *Dest = II->getOperand(1);
+ Value *Src = II->getOperand(2);
+ if (alias(Dest, Len, P, Size) == NoAlias) {
+ if (alias(Src, Len, P, Size) == NoAlias)
+ return NoModRef;
+ return Ref;
+ }
+ }
+ break;
+ case Intrinsic::memset:
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) {
+ unsigned Len = LenCI->getZExtValue();
+ Value *Dest = II->getOperand(1);
+ if (alias(Dest, Len, P, Size) == NoAlias)
+ return NoModRef;
+ }
+ break;
case Intrinsic::atomic_cmp_swap:
case Intrinsic::atomic_swap:
case Intrinsic::atomic_load_add:
@@ -322,9 +344,25 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
case Intrinsic::atomic_load_min:
case Intrinsic::atomic_load_umax:
case Intrinsic::atomic_load_umin:
- if (alias(II->getOperand(1), Size, P, Size) == NoAlias)
- return NoModRef;
+ if (TD) {
+ Value *Op1 = II->getOperand(1);
+ unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
+ if (alias(Op1, Op1Size, P, Size) == NoAlias)
+ return NoModRef;
+ }
break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
+ if (alias(II->getOperand(2), PtrSize, P, Size) == NoAlias)
+ return NoModRef;
+ }
+ case Intrinsic::invariant_end: {
+ unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
+ if (alias(II->getOperand(3), PtrSize, P, Size) == NoAlias)
+ return NoModRef;
+ }
}
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 4394ec08ef22..291485109562 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -145,7 +145,10 @@ class DbgConcreteScope;
class VISIBILITY_HIDDEN DbgScope {
DbgScope *Parent; // Parent to this scope.
DIDescriptor Desc; // Debug info descriptor for scope.
- // Either subprogram or block.
+ // FIXME use WeakVH for Desc.
+ WeakVH InlinedAt; // If this scope represents inlined
+ // function body then this is the location
+ // where this body is inlined.
unsigned StartLabelID; // Label ID of the beginning of scope.
unsigned EndLabelID; // Label ID of the end of scope.
const MachineInstr *LastInsn; // Last instruction of this scope.
@@ -157,14 +160,17 @@ class VISIBILITY_HIDDEN DbgScope {
// Private state for dump()
mutable unsigned IndentLevel;
public:
- DbgScope(DbgScope *P, DIDescriptor D)
- : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), LastInsn(0),
- FirstInsn(0), IndentLevel(0) {}
+ DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
+ : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0),
+ LastInsn(0), FirstInsn(0), IndentLevel(0) {}
virtual ~DbgScope();
// Accessors.
DbgScope *getParent() const { return Parent; }
DIDescriptor getDesc() const { return Desc; }
+ MDNode *getInlinedAt() const {
+ return dyn_cast_or_null<MDNode>(InlinedAt);
+ }
unsigned getStartLabelID() const { return StartLabelID; }
unsigned getEndLabelID() const { return EndLabelID; }
SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
@@ -1296,29 +1302,39 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
/// getOrCreateScope - Returns the scope associated with the given descriptor.
///
-DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI) {
+DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
+ MDNode *InlinedAt) {
DbgScope *&Slot = DbgScopeMap[N];
if (Slot) return Slot;
DbgScope *Parent = NULL;
- DIDescriptor Scope(N);
- if (Scope.isCompileUnit()) {
- return NULL;
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(N);
- DIDescriptor ParentDesc = SP.getContext();
- if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
- Parent = getDbgScope(ParentDesc.getNode(), MI);
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(N);
- DIDescriptor ParentDesc = DB.getContext();
- if (!ParentDesc.isNull())
- Parent = getDbgScope(ParentDesc.getNode(), MI);
- } else
- assert (0 && "Unexpected scope info");
-
- Slot = new DbgScope(Parent, DIDescriptor(N));
+ if (InlinedAt) {
+ DILocation IL(InlinedAt);
+ assert (!IL.isNull() && "Invalid InlindAt location!");
+ DenseMap<MDNode *, DbgScope *>::iterator DSI =
+ DbgScopeMap.find(IL.getScope().getNode());
+ assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!");
+ Parent = DSI->second;
+ } else {
+ DIDescriptor Scope(N);
+ if (Scope.isCompileUnit()) {
+ return NULL;
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(N);
+ DIDescriptor ParentDesc = SP.getContext();
+ if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
+ Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ if (!ParentDesc.isNull())
+ Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
+ } else
+ assert (0 && "Unexpected scope info");
+ }
+
+ Slot = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
Slot->setFirstInsn(MI);
if (Parent)
@@ -1795,7 +1811,10 @@ void DwarfDebug::CollectVariableInfo() {
DIVariable DV (Var);
if (DV.isNull()) continue;
unsigned VSlot = VI->second;
- DbgScope *Scope = getDbgScope(DV.getContext().getNode(), NULL);
+ DenseMap<MDNode *, DbgScope *>::iterator DSI =
+ DbgScopeMap.find(DV.getContext().getNode());
+ assert (DSI != DbgScopeMap.end() && "Unable to find variable scope!");
+ DbgScope *Scope = DSI->second;
Scope->AddVariable(new DbgVariable(DV, VSlot, false));
}
}
@@ -1849,7 +1868,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
// into a scope DIE at the end.
DIDescriptor D(DLT.Scope);
if (!D.isCompileUnit()) {
- DbgScope *Scope = getDbgScope(DLT.Scope, MInsn);
+ DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
Scope->setLastInsn(MInsn);
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index bd377c5593cc..7f711042c071 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -364,7 +364,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
/// getDbgScope - Returns the scope associated with the given descriptor.
///
DbgScope *getOrCreateScope(MDNode *N);
- DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI);
+ DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
/// ConstructDbgScope - Construct the components of a scope.
///
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 932fae4f316c..42b24a65b450 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -24,7 +24,7 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumHeaderAligned, "Number of loop header aligned");
+STATISTIC(NumLoopsAligned, "Number of loops aligned");
STATISTIC(NumIntraElim, "Number of intra loop branches eliminated");
STATISTIC(NumIntraMoved, "Number of intra loop branches moved");
@@ -42,9 +42,6 @@ namespace {
SmallVector<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 4>
UncondJmpMBBs;
- /// LoopHeaders - A list of BBs which are loop headers.
- SmallVector<MachineBasicBlock*, 4> LoopHeaders;
-
public:
static char ID;
CodePlacementOpt() : MachineFunctionPass(&ID) {}
@@ -62,9 +59,8 @@ namespace {
private:
bool OptimizeIntraLoopEdges();
- bool HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
- SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign);
bool AlignLoops(MachineFunction &MF);
+ bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
};
char CodePlacementOpt::ID = 0;
@@ -233,57 +229,12 @@ bool CodePlacementOpt::OptimizeIntraLoopEdges() {
ChangedMBBs.insert(FtMBB);
}
Changed = true;
-
- // If BB is the loop latch, we may have a new loop headr.
- if (MBB == L->getLoopLatch()) {
- assert(MLI->isLoopHeader(SuccMBB) &&
- "Only succ of loop latch is not the header?");
- if (HasOneIntraSucc && IntraSucc)
- std::replace(LoopHeaders.begin(),LoopHeaders.end(), SuccMBB, IntraSucc);
- }
}
++NumIntraMoved;
return Changed;
}
-/// HeaderShouldBeAligned - Return true if the specified loop header block
-/// should be aligned. For now, we will not align it if all the predcessors
-/// (i.e. loop back edges) are laid out above the header. FIXME: Do not
-/// align small loops.
-bool
-CodePlacementOpt::HeaderShouldBeAligned(MachineBasicBlock *MBB, MachineLoop *L,
- SmallPtrSet<MachineBasicBlock*, 4> &DoNotAlign) {
- if (DoNotAlign.count(MBB))
- return false;
-
- bool BackEdgeBelow = false;
- for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
- PE = MBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *PredMBB = *PI;
- if (PredMBB == MBB || PredMBB->getNumber() > MBB->getNumber()) {
- BackEdgeBelow = true;
- break;
- }
- }
-
- if (!BackEdgeBelow)
- return false;
-
- // Ok, we are going to align this loop header. If it's an inner loop,
- // do not align its outer loop.
- MachineBasicBlock *PreHeader = L->getLoopPreheader();
- if (PreHeader) {
- MachineLoop *L = MLI->getLoopFor(PreHeader);
- if (L) {
- MachineBasicBlock *HeaderBlock = L->getHeader();
- HeaderBlock->setAlignment(0);
- DoNotAlign.insert(HeaderBlock);
- }
- }
- return true;
-}
-
/// AlignLoops - Align loop headers to target preferred alignments.
///
bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
@@ -295,26 +246,37 @@ bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
if (!Align)
return false; // Don't care about loop alignment.
- // Make sure blocks are numbered in order
- MF.RenumberBlocks();
+ bool Changed = false;
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ I != E; ++I)
+ Changed |= AlignLoop(MF, *I, Align);
+ return Changed;
+}
+
+bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
+ unsigned Align) {
bool Changed = false;
- SmallPtrSet<MachineBasicBlock*, 4> DoNotAlign;
- for (unsigned i = 0, e = LoopHeaders.size(); i != e; ++i) {
- MachineBasicBlock *HeaderMBB = LoopHeaders[i];
- MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(HeaderMBB));
- MachineLoop *L = MLI->getLoopFor(HeaderMBB);
- if (L == MLI->getLoopFor(PredMBB))
- // If previously BB is in the same loop, don't align this BB. We want
- // to prevent adding noop's inside a loop.
- continue;
- if (HeaderShouldBeAligned(HeaderMBB, L, DoNotAlign)) {
- HeaderMBB->setAlignment(Align);
- Changed = true;
- ++NumHeaderAligned;
- }
+
+ // Do alignment for nested loops.
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= AlignLoop(MF, *I, Align);
+
+ MachineBasicBlock *TopMBB = L->getHeader();
+ if (TopMBB == MF.begin()) return Changed;
+
+ MachineBasicBlock *PredMBB = prior(MachineFunction::iterator(TopMBB));
+ while (MLI->getLoopFor(PredMBB) == L) {
+ TopMBB = PredMBB;
+ if (TopMBB == MF.begin()) return Changed;
+ PredMBB = prior(MachineFunction::iterator(TopMBB));
}
+ TopMBB->setAlignment(Align);
+ Changed = true;
+ ++NumLoopsAligned;
+
return Changed;
}
@@ -326,7 +288,7 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
- // Analyze the BBs first and keep track of loop headers and BBs that
+ // Analyze the BBs first and keep track of BBs that
// end with an unconditional jmp to another block in the same loop.
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = I;
@@ -335,8 +297,6 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
MachineLoop *L = MLI->getLoopFor(MBB);
if (!L)
continue;
- if (MLI->isLoopHeader(MBB))
- LoopHeaders.push_back(MBB);
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
@@ -352,7 +312,6 @@ bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
ChangedMBBs.clear();
UncondJmpMBBs.clear();
- LoopHeaders.clear();
return Changed;
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 139e0291ea7a..96c655c1a9b8 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -323,10 +323,21 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// The last partial def kills the register.
LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
true/*IsImp*/, true/*IsKill*/));
- else
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
// If the last reference is the last def, then it's not used at all.
// That is, unless we are currently processing the last reference itself.
LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
} else if (!PhysRegUse[Reg]) {
// Partial uses. Mark register def dead and add implicit def of
// sub-registers which are used.
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index cbe5c7cb51e3..b9d3ba78794e 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -212,17 +212,17 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
isEarlyClobber()) {
OS << '<';
bool NeedComma = false;
- if (isImplicit()) {
- if (NeedComma) OS << ',';
- OS << (isDef() ? "imp-def" : "imp-use");
- NeedComma = true;
- } else if (isDef()) {
+ if (isDef()) {
if (NeedComma) OS << ',';
if (isEarlyClobber())
OS << "earlyclobber,";
+ if (isImplicit())
+ OS << "imp-";
OS << "def";
NeedComma = true;
- }
+ } else if (isImplicit())
+ OS << "imp-use";
+
if (isKill() || isDead() || isUndef()) {
if (NeedComma) OS << ',';
if (isKill()) OS << "kill";
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 8793df7705fa..7af0bba19735 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -767,7 +767,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
unsigned CurrentScratchReg = 0;
bool havePrevValue = false;
unsigned PrevScratchReg = 0;
- int PrevValue;
+ int PrevValue = 0;
MachineInstr *PrevLastUseMI = NULL;
unsigned PrevLastUseOp = 0;
bool trackingCurrentValue = false;
@@ -778,9 +778,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
MachineInstr *MI = I;
- // Likewise, call getNumOperands() each iteration, as the MI may change
- // inside the loop (with 'i' updated accordingly).
- for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
if (MI->getOperand(i).isReg()) {
MachineOperand &MO = MI->getOperand(i);
unsigned Reg = MO.getReg();
@@ -853,6 +851,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// just calculating the value we already have.
BB->erase(I, LastUseMI);
MI = I = LastUseMI;
+ e = MI->getNumOperands();
CurrentScratchReg = PrevScratchReg;
// Extend the live range of the register
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 309e3ba2ac25..b0bd04bade40 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -74,16 +74,28 @@ namespace {
/// CPUser - One user of a constant pool, keeping the machine instruction
/// pointer, the constant pool being referenced, and the max displacement
- /// allowed from the instruction to the CP.
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
struct CPUser {
MachineInstr *MI;
MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
unsigned MaxDisp;
bool NegOk;
bool IsSoImm;
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
- : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {}
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ HighWaterMark = CPEMI->getParent();
+ }
};
/// CPUsers - Keep track of all of the machine instructions that use various
@@ -962,8 +974,8 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
B = WaterList.begin();; --IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and at a lower address than the current one.
- if (WaterIsInRange(UserOffset, WaterBB, U) &&
- WaterBB->getNumber() < U.CPEMI->getParent()->getNumber()) {
+ if (WaterBB->getNumber() < U.HighWaterMark->getNumber() &&
+ WaterIsInRange(UserOffset, WaterBB, U)) {
unsigned WBBId = WaterBB->getNumber();
if (isThumb &&
(BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
@@ -1006,14 +1018,12 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
BBSizes[UserMBB->getNumber()];
assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
- // If the use is at the end of the block, or the end of the block
- // is within range, make new water there. (The addition below is
- // for the unconditional branch we will be adding: 4 bytes on ARM + Thumb2,
- // 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there. (The addition
+ // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+ // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for
// inside OffsetIsInRange.
- // If the block ends in an unconditional branch already, it is water,
- // and is known to be out of range, so we'll always be adding a branch.)
- if (&UserMBB->back() == UserMI ||
+ if (BBHasFallthrough(UserMBB) &&
OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
U.MaxDisp, U.NegOk, U.IsSoImm)) {
DEBUG(errs() << "Split at end of block\n");
@@ -1131,6 +1141,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
+ U.HighWaterMark = NewIsland;
U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bebf4e839994..c39de0a12b8f 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -133,6 +133,13 @@ private:
SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
+ /// SelectVST - Select NEON store intrinsics. NumVecs should
+ /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// stores of D registers and even subregs and odd subregs of Q registers.
+ /// For NumVecs == 2, QOpcodes1 is not used.
+ SDNode *SelectVST(SDValue Op, unsigned NumVecs, unsigned *DOpcodes,
+ unsigned *QOpcodes0, unsigned *QOpcodes1);
+
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and even subregs and odd subregs of Q registers.
@@ -1063,13 +1070,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
ResTys.push_back(MemAddr.getValueType());
ResTys.push_back(MVT::Other);
- // Load the even subreg.
+ // Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
Chain = SDValue(VLdA, NumVecs+1);
- // Load the odd subreg.
+ // Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
@@ -1085,6 +1092,95 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
+ unsigned *DOpcodes, unsigned *QOpcodes0,
+ unsigned *QOpcodes1) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue MemAddr, MemUpdate, MemOpc;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ return NULL;
+
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getOperand(3).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+
+ unsigned OpcodeIndex;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("unhandled vst type");
+ // Double-register operations:
+ case MVT::v8i8: OpcodeIndex = 0; break;
+ case MVT::v4i16: OpcodeIndex = 1; break;
+ case MVT::v2f32:
+ case MVT::v2i32: OpcodeIndex = 2; break;
+ case MVT::v1i64: OpcodeIndex = 3; break;
+ // Quad-register operations:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v4f32:
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(MemUpdate);
+ Ops.push_back(MemOpc);
+
+ if (is64BitVector) {
+ unsigned Opc = DOpcodes[OpcodeIndex];
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
+ }
+
+ EVT RegVT = GetNEONSubregVT(VT);
+ if (NumVecs == 2) {
+ // Quad registers are directly supported for VST2,
+ // storing 2 pairs of D regs.
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+ N->getOperand(Vec+3)));
+ }
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
+ }
+
+ // Otherwise, quad registers are stored with two separate instructions,
+ // where one stores the even registers and the other stores the odd registers.
+
+ // Enable writeback to the address register.
+ MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
+
+ // Store the even subregs.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+4);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd subregs.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ Ops[NumVecs+3] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+4);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
+}
+
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0,
@@ -1612,9 +1708,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- EVT VT = N->getValueType(0);
- unsigned Opc = 0;
-
switch (IntNo) {
default:
break;
@@ -1664,178 +1757,26 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
}
case Intrinsic::arm_neon_vst2: {
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
- return NULL;
- SDValue Chain = N->getOperand(0);
- VT = N->getOperand(3).getValueType();
- if (VT.is64BitVector()) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled vst2 type");
- case MVT::v8i8: Opc = ARM::VST2d8; break;
- case MVT::v4i16: Opc = ARM::VST2d16; break;
- case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VST2d32; break;
- case MVT::v1i64: Opc = ARM::VST2d64; break;
- }
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
- N->getOperand(3), N->getOperand(4), Chain };
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6);
- }
- // Quad registers are stored as pairs of double registers.
- EVT RegVT;
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled vst2 type");
- case MVT::v16i8: Opc = ARM::VST2q8; RegVT = MVT::v8i8; break;
- case MVT::v8i16: Opc = ARM::VST2q16; RegVT = MVT::v4i16; break;
- case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break;
- case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break;
- }
- SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(3));
- SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(3));
- SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(4));
- SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(4));
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
- D0, D1, D2, D3, Chain };
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
+ unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+ ARM::VST2d32, ARM::VST2d64 };
+ unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ return SelectVST(Op, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
- return NULL;
- SDValue Chain = N->getOperand(0);
- VT = N->getOperand(3).getValueType();
- if (VT.is64BitVector()) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled vst3 type");
- case MVT::v8i8: Opc = ARM::VST3d8; break;
- case MVT::v4i16: Opc = ARM::VST3d16; break;
- case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VST3d32; break;
- case MVT::v1i64: Opc = ARM::VST3d64; break;
- }
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
- N->getOperand(3), N->getOperand(4),
- N->getOperand(5), Chain };
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7);
- }
- // Quad registers are stored with two separate instructions, where one
- // stores the even registers and the other stores the odd registers.
- EVT RegVT;
- unsigned Opc2 = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled vst3 type");
- case MVT::v16i8:
- Opc = ARM::VST3q8a; Opc2 = ARM::VST3q8b; RegVT = MVT::v8i8; break;
- case MVT::v8i16:
- Opc = ARM::VST3q16a; Opc2 = ARM::VST3q16b; RegVT = MVT::v4i16; break;
- case MVT::v4f32:
- Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2f32; break;
- case MVT::v4i32:
- Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break;
- }
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
- SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(3));
- SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(4));
- SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(5));
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, D0, D2, D4, Chain };
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, OpsA, 7);
- Chain = SDValue(VStA, 1);
-
- SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(3));
- SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(4));
- SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(5));
- MemAddr = SDValue(VStA, 0);
- const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, D1, D3, D5, Chain };
- SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(),
- MVT::Other, OpsB, 7);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
+ unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
+ ARM::VST3d32, ARM::VST3d64 };
+ unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a };
+ unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b };
+ return SelectVST(Op, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
- return NULL;
- SDValue Chain = N->getOperand(0);
- VT = N->getOperand(3).getValueType();
- if (VT.is64BitVector()) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled vst4 type");
- case MVT::v8i8: Opc = ARM::VST4d8; break;
- case MVT::v4i16: Opc = ARM::VST4d16; break;
- case MVT::v2f32:
- case MVT::v2i32: Opc = ARM::VST4d32; break;
- case MVT::v1i64: Opc = ARM::VST4d64; break;
- }
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
- N->getOperand(3), N->getOperand(4),
- N->getOperand(5), N->getOperand(6), Chain };
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8);
- }
- // Quad registers are stored with two separate instructions, where one
- // stores the even registers and the other stores the odd registers.
- EVT RegVT;
- unsigned Opc2 = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled vst4 type");
- case MVT::v16i8:
- Opc = ARM::VST4q8a; Opc2 = ARM::VST4q8b; RegVT = MVT::v8i8; break;
- case MVT::v8i16:
- Opc = ARM::VST4q16a; Opc2 = ARM::VST4q16b; RegVT = MVT::v4i16; break;
- case MVT::v4f32:
- Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2f32; break;
- case MVT::v4i32:
- Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break;
- }
- // Enable writeback to the address register.
- MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32);
-
- SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(3));
- SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(4));
- SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(5));
- SDValue D6 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(6));
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc,
- D0, D2, D4, D6, Chain };
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, OpsA, 8);
- Chain = SDValue(VStA, 1);
-
- SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(3));
- SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(4));
- SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(5));
- SDValue D7 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(6));
- MemAddr = SDValue(VStA, 0);
- const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc,
- D1, D3, D5, D7, Chain };
- SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(),
- MVT::Other, OpsB, 8);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
+ unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
+ ARM::VST4d32, ARM::VST4d64 };
+ unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a };
+ unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b };
+ return SelectVST(Op, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 8adfac3fb4c5..6ec78bc72ee7 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -377,12 +377,15 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, " $dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+ let Inst{4} = 0;
let Inst{25} = 0;
let isCommutable = Commutable;
}
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
IIC_iALUsr, opc, " $dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -401,11 +404,14 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
IIC_iALUr, opc, "s $dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
let isCommutable = Commutable;
+ let Inst{4} = 0;
let Inst{25} = 0;
}
def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
IIC_iALUsr, opc, "s $dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -426,6 +432,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
opc, " $a, $b",
[(opnode GPR:$a, GPR:$b)]> {
+ let Inst{4} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
let isCommutable = Commutable;
@@ -433,6 +440,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
opc, " $a, $b",
[(opnode GPR:$a, so_reg:$b)]> {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -486,12 +495,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
Requires<[IsARM, CarryDefIsUnused]> {
let isCommutable = Commutable;
+ let Inst{4} = 0;
let Inst{25} = 0;
}
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
Requires<[IsARM, CarryDefIsUnused]> {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
let Inst{25} = 0;
}
// Carry setting variants
@@ -507,6 +519,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
+ let Inst{4} = 0;
let Inst{25} = 0;
}
def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
@@ -514,6 +527,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
+ let Inst{4} = 1;
+ let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -924,10 +939,18 @@ def STM : AXI4st<(outs),
let neverHasSideEffects = 1 in
def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
- "mov", " $dst, $src", []>, UnaryDP;
+ "mov", " $dst, $src", []>, UnaryDP {
+ let Inst{4} = 0;
+ let Inst{25} = 0;
+}
+
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
- "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
+ "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
+ let Inst{25} = 0;
+}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
@@ -1146,10 +1169,15 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mvn", " $dst, $src",
- [(set GPR:$dst, (not GPR:$src))]>, UnaryDP;
+ [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
+ let Inst{4} = 0;
+}
def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
IIC_iMOVsr, "mvn", " $dst, $src",
- [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
+ [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
+}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
IIC_iMOVi, "mvn", " $dst, $imm",
@@ -1461,20 +1489,27 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", " $dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst">, UnaryDP;
+ RegConstraint<"$false = $dst">, UnaryDP {
+ let Inst{4} = 0;
+ let Inst{25} = 0;
+}
def MOVCCs : AI1<0b1101, (outs GPR:$dst),
(ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr,
"mov", " $dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst">, UnaryDP;
+ RegConstraint<"$false = $dst">, UnaryDP {
+ let Inst{4} = 1;
+ let Inst{7} = 0;
+ let Inst{25} = 0;
+}
def MOVCCi : AI1<0b1101, (outs GPR:$dst),
(ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
"mov", " $dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst">, UnaryDP {
- let Inst{25} = 1;
+ let Inst{25} = 1;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index cd370aa97adb..b34aff7dcb3e 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1951,9 +1951,9 @@ defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u",
int_arm_neon_vpaddlu>;
// VPADAL : Vector Pairwise Add and Accumulate Long
-defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s",
+defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s",
int_arm_neon_vpadals>;
-defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u",
+defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u",
int_arm_neon_vpadalu>;
// VPMAX : Vector Pairwise Maximum
@@ -2038,7 +2038,7 @@ defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>;
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
+defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>;
// VSHR : Vector Shift Right (Immediate)
defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>;
defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7c96c49a34b9..f635af3974d8 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -56,6 +56,7 @@
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/TargetFolder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -186,7 +187,7 @@ namespace {
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
- typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy;
+ typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
@@ -12675,16 +12676,19 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
/// many instructions are dead or constant). Additionally, if we find a branch
/// whose condition is a known constant, we only visit the reachable successors.
///
-static void AddReachableCodeToWorklist(BasicBlock *BB,
+static bool AddReachableCodeToWorklist(BasicBlock *BB,
SmallPtrSet<BasicBlock*, 64> &Visited,
InstCombiner &IC,
const TargetData *TD) {
+ bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(BB);
std::vector<Instruction*> InstrsForInstCombineWorklist;
InstrsForInstCombineWorklist.reserve(128);
+ SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
+
while (!Worklist.empty()) {
BB = Worklist.back();
Worklist.pop_back();
@@ -12704,14 +12708,38 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
}
// ConstantProp instruction if trivially constant.
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
- << *Inst << '\n');
- Inst->replaceAllUsesWith(C);
- ++NumConstProp;
- Inst->eraseFromParent();
- continue;
+ if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+
+
+ if (TD) {
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+ i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == 0) continue;
+
+ // If we already folded this constant, don't try again.
+ if (!FoldedConstants.insert(CE))
+ continue;
+
+ Constant *NewC =
+ ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+ if (NewC && NewC != CE) {
+ *i = NewC;
+ MadeIRChange = true;
+ }
+ }
}
+
InstrsForInstCombineWorklist.push_back(Inst);
}
@@ -12753,11 +12781,12 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
// some N^2 behavior in pathological cases.
IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
InstrsForInstCombineWorklist.size());
+
+ return MadeIRChange;
}
bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = false;
- TD = getAnalysisIfAvailable<TargetData>();
DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getNameStr() << "\n");
@@ -12767,7 +12796,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// the reachable instructions. Ignore blocks that are not reachable. Keep
// track of which blocks we visit.
SmallPtrSet<BasicBlock*, 64> Visited;
- AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+ MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
@@ -12786,7 +12815,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = true;
}
-
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!I->getType()->isVoidTy())
@@ -12810,28 +12838,17 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
}
// Instruction isn't dead, see if we can constant propagate it.
- if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
- DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+ DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
- // Add operands to the worklist.
- ReplaceInstUsesWith(*I, C);
- ++NumConstProp;
- EraseInstFromFunction(*I);
- MadeIRChange = true;
- continue;
- }
-
- if (TD) {
- // See if we can constant fold its operands.
- for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i))
- if (Constant *NewC = ConstantFoldConstantExpression(CE,
- F.getContext(), TD))
- if (NewC != CE) {
- *i = NewC;
- MadeIRChange = true;
- }
- }
+ // Add operands to the worklist.
+ ReplaceInstUsesWith(*I, C);
+ ++NumConstProp;
+ EraseInstFromFunction(*I);
+ MadeIRChange = true;
+ continue;
+ }
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
@@ -12927,12 +12944,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
bool InstCombiner::runOnFunction(Function &F) {
MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
Context = &F.getContext();
-
+ TD = getAnalysisIfAvailable<TargetData>();
+
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
- IRBuilder<true, ConstantFolder, InstCombineIRInserter>
- TheBuilder(F.getContext(), ConstantFolder(F.getContext()),
+ IRBuilder<true, TargetFolder, InstCombineIRInserter>
+ TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
diff --git a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
index 3ccbc2f04f37..5ea26e76a6af 100644
--- a/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
+++ b/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -1,16 +1,16 @@
-; RUN: opt -gvn -S < %s | FileCheck %s
+; RUN: opt -gvn -instcombine -S < %s | FileCheck %s
declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8)
-define void @foo(i8* %ptr) {
+define i8 @foo(i8* %ptr) {
%P = getelementptr i8* %ptr, i32 0
%Q = getelementptr i8* %ptr, i32 1
; CHECK: getelementptr
%X = load i8* %P
-; CHECK: = load
%Y = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %Q, i8 1)
%Z = load i8* %P
; CHECK-NOT: = load
- ret void
-; CHECK: ret void
+ %A = sub i8 %X, %Z
+ ret i8 %A
+; CHECK: ret i8 0
}
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
index 03e69e7a1a49..fc9d1f0428fb 100644
--- a/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=x86 | grep align | count 3
+; RUN: llc < %s -march=x86 | grep align | count 4
+
+; TODO: Is it a good idea to align inner loops? It's hard to know without
+; knowing what their trip counts are, or other dynamic information. For
+; now, CodeGen aligns all loops.
@x = external global i32* ; <i32**> [#uses=1]
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index 3e68f9486cfa..d4c5c6723243 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -1,4 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep align | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CodeGen should align the top of the loop, which differs from the loop
+; header in this case.
+
+; CHECK: jmp LBB1_2
+; CHECK: .align
+; CHECK: LBB1_1:
@A = common global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=1]
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index fe0e03649ddc..5bf39e5b2946 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -66,8 +66,7 @@ NoOutput("disable-output",
cl::desc("Do not write result bitcode file"), cl::Hidden);
static cl::opt<bool>
-OutputAssembly("S",
- cl::desc("Write output as LLVM assembly"), cl::Hidden);
+OutputAssembly("S", cl::desc("Write output as LLVM assembly"));
static cl::opt<bool>
NoVerify("disable-verify", cl::desc("Do not verify result module"), cl::Hidden);
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 55d37493ea5a..d7aaea7134fc 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -166,6 +166,7 @@ TEST_F(JITTest, FarCallToKnownFunction) {
EXPECT_EQ(8, TestFunctionPtr());
}
+#if !defined(__arm__) && !defined(__ppc__)
// Test a function C which calls A and B which call each other.
TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
TheJIT->DisableLazyCompilation();
@@ -220,6 +221,7 @@ TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
F1Ptr();
}
+#endif
// Regression test for PR5162. This used to trigger an AssertingVH inside the
// JIT's Function to stub mapping.