diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2010-07-15 17:06:11 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2010-07-15 17:06:11 +0000 |
commit | f3d15b0b3791d746d44d99b05d3bcb2e9bdf0eb3 (patch) | |
tree | 5b6d391c72c9875f0065f0e772e872bc8544834b /lib | |
parent | 66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75 (diff) |
Notes
Diffstat (limited to 'lib')
75 files changed, 1070 insertions, 674 deletions
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index dbefc2dedb2b4..24cd3433a2cab 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -440,27 +440,47 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, const TargetData *TD) { assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); - // FromHandle - This keeps a weakvh on the from value so that we can know if - // it gets deleted out from under us in a recursive simplification. + // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that + // we can know if it gets deleted out from under us or replaced in a + // recursive simplification. WeakVH FromHandle(From); + WeakVH ToHandle(To); while (!From->use_empty()) { // Update the instruction to use the new value. - Use &U = From->use_begin().getUse(); - Instruction *User = cast<Instruction>(U.getUser()); - U = To; + Use &TheUse = From->use_begin().getUse(); + Instruction *User = cast<Instruction>(TheUse.getUser()); + TheUse = To; + + // Check to see if the instruction can be folded due to the operand + // replacement. For example changing (or X, Y) into (or X, -1) can replace + // the 'or' with -1. + Value *SimplifiedVal; + { + // Sanity check to make sure 'User' doesn't dangle across + // SimplifyInstruction. + AssertingVH<> UserHandle(User); - // See if we can simplify it. - if (Value *V = SimplifyInstruction(User, TD)) { - // Recursively simplify this. - ReplaceAndSimplifyAllUses(User, V, TD); - - // If the recursive simplification ended up revisiting and deleting 'From' - // then we're done. - if (FromHandle == 0) - return; + SimplifiedVal = SimplifyInstruction(User, TD); + if (SimplifiedVal == 0) continue; } + + // Recursively simplify this user to the new value. + ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD); + From = dyn_cast_or_null<Instruction>((Value*)FromHandle); + To = ToHandle; + + assert(ToHandle && "To value deleted by recursive simplification?"); + + // If the recursive simplification ended up revisiting and deleting + // 'From' then we're done. + if (From == 0) + return; } + + // If 'From' has value handles referring to it, do a real RAUW to update them. + From->replaceAllUsesWith(To); + From->eraseFromParent(); } diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 38dcd2580e796..8d2712fd6e063 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -71,22 +71,24 @@ ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { // Are there zero predecessors of this block? if (PI == PE) { - Edge e = getEdge(0,BB); + Edge e = getEdge(0, BB); Count = getEdgeWeight(e); } else { // Otherwise, if there are predecessors, the execution count of this block is // the sum of the edge frequencies from the incoming edges. std::set<const BasicBlock*> ProcessedPreds; Count = 0; - for (; PI != PE; ++PI) - if (ProcessedPreds.insert(*PI).second) { - double w = getEdgeWeight(getEdge(*PI, BB)); + for (; PI != PE; ++PI) { + const BasicBlock *P = *PI; + if (ProcessedPreds.insert(P).second) { + double w = getEdgeWeight(getEdge(P, BB)); if (w == MissingValue) { Count = MissingValue; break; } Count += w; } + } } // If the predecessors did not suffice to get block weight, try successors. diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 67521814b0c6a..221b994db55fa 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -544,20 +544,21 @@ bool LLParser::ParseNamedMetadata() { return true; SmallVector<MDNode *, 8> Elts; - do { - // Null is a special case since it is typeless. - if (EatIfPresent(lltok::kw_null)) { - Elts.push_back(0); - continue; - } + if (Lex.getKind() != lltok::rbrace) + do { + // Null is a special case since it is typeless. + if (EatIfPresent(lltok::kw_null)) { + Elts.push_back(0); + continue; + } - if (ParseToken(lltok::exclaim, "Expected '!' here")) - return true; + if (ParseToken(lltok::exclaim, "Expected '!' here")) + return true; - MDNode *N = 0; - if (ParseMDNodeID(N)) return true; - Elts.push_back(N); - } while (EatIfPresent(lltok::comma)); + MDNode *N = 0; + if (ParseMDNodeID(N)) return true; + Elts.push_back(N); + } while (EatIfPresent(lltok::comma)); if (ParseToken(lltok::rbrace, "expected end of metadata node")) return true; @@ -2021,33 +2022,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ID.StrVal = Lex.getStrVal(); ID.Kind = ValID::t_LocalName; break; - case lltok::exclaim: // !{...} MDNode, !"foo" MDString - Lex.Lex(); - - if (EatIfPresent(lltok::lbrace)) { - SmallVector<Value*, 16> Elts; - if (ParseMDNodeVector(Elts, PFS) || - ParseToken(lltok::rbrace, "expected end of metadata node")) - return true; - - ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size()); - ID.Kind = ValID::t_MDNode; - return false; - } - - // Standalone metadata reference - // !{ ..., !42, ... } - if (Lex.getKind() == lltok::APSInt) { - if (ParseMDNodeID(ID.MDNodeVal)) return true; - ID.Kind = ValID::t_MDNode; - return false; - } - - // MDString: - // ::= '!' STRINGCONSTANT - if (ParseMDString(ID.MDStringVal)) return true; - ID.Kind = ValID::t_MDString; - return false; + case lltok::exclaim: // !42, !{...}, or !"foo" + return ParseMetadataValue(ID, PFS); case lltok::APSInt: ID.APSIntVal = Lex.getAPSIntVal(); ID.Kind = ValID::t_APSInt; @@ -2528,6 +2504,42 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) { return false; } +/// ParseMetadataValue +/// ::= !42 +/// ::= !{...} +/// ::= !"string" +bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) { + assert(Lex.getKind() == lltok::exclaim); + Lex.Lex(); + + // MDNode: + // !{ ... } + if (EatIfPresent(lltok::lbrace)) { + SmallVector<Value*, 16> Elts; + if (ParseMDNodeVector(Elts, PFS) || + ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size()); + ID.Kind = ValID::t_MDNode; + return false; + } + + // Standalone metadata reference + // !42 + if (Lex.getKind() == lltok::APSInt) { + if (ParseMDNodeID(ID.MDNodeVal)) return true; + ID.Kind = ValID::t_MDNode; + return false; + } + + // MDString: + // ::= '!' STRINGCONSTANT + if (ParseMDString(ID.MDStringVal)) return true; + ID.Kind = ValID::t_MDString; + return false; +} + //===----------------------------------------------------------------------===// // Function Parsing. @@ -3983,6 +3995,10 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'null' | TypeAndValue bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts, PerFunctionState *PFS) { + // Check for an empty list. + if (Lex.getKind() == lltok::rbrace) + return false; + do { // Null is a special case since it is typeless. if (EatIfPresent(lltok::kw_null)) { diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index c8f669f641a8f..f765a2ae4e6ce 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -308,6 +308,7 @@ namespace llvm { bool ParseGlobalValue(const Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); + bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); // Function Parsing. diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 527ae49b7143d..b3f0776d29d54 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -820,7 +820,7 @@ bool BitcodeReader::ParseMetadata() { IsFunctionLocal = true; // fall-through case bitc::METADATA_NODE: { - if (Record.empty() || Record.size() % 2 == 1) + if (Record.size() % 2 == 1) return Error("Invalid METADATA_NODE record"); unsigned Size = Record.size(); @@ -834,7 +834,8 @@ bool BitcodeReader::ParseMetadata() { else Elts.push_back(NULL); } - Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(), + Value *V = MDNode::getWhenValsUnresolved(Context, + Elts.data(), Elts.size(), IsFunctionLocal); IsFunctionLocal = false; MDValueList.AssignValue(V, NextMDValueNo++); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d9387a8e72c50..db1b37ab263fb 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -178,7 +178,7 @@ bool AsmPrinter::doInitialization(Module &M) { if (!M.getModuleInlineAsm().empty()) { OutStreamer.AddComment("Start of file scope inline assembly"); OutStreamer.AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm(), 0/*no loc cookie*/); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/); OutStreamer.AddComment("End of file scope inline assembly"); OutStreamer.AddBlankLine(); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index f6f3bae42a80c..202d9b67fd157 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -53,17 +53,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { } SourceMgr SrcMgr; - - // Ensure the buffer is newline terminated. - char *TmpString = 0; - if (Str.back() != '\n') { - TmpString = new char[Str.size() + 2]; - memcpy(TmpString, Str.data(), Str.size()); - TmpString[Str.size()] = '\n'; - TmpString[Str.size() + 1] = 0; - isNullTerminated = true; - Str = TmpString; - } // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); @@ -95,9 +84,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { /*NoFinalize*/ true); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); - - if (TmpString) - delete[] TmpString; } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 21a9b7d4db6f6..ad5728458062f 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -119,6 +119,7 @@ bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { // bool LiveInterval::overlapsFrom(const LiveInterval& other, const_iterator StartPos) const { + assert(!empty() && "empty interval"); const_iterator i = begin(); const_iterator ie = end(); const_iterator j = StartPos; @@ -161,16 +162,8 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, /// by [Start, End). bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { assert(Start < End && "Invalid range"); - const_iterator I = begin(); - const_iterator E = end(); - const_iterator si = std::upper_bound(I, E, Start); - const_iterator ei = std::upper_bound(I, E, End); - if (si != ei) - return true; - if (si == I) - return false; - --si; - return si->contains(Start); + const_iterator I = std::lower_bound(begin(), end(), End); + return I != begin() && (--I)->end > Start; } /// extendIntervalEndTo - This method is used when we want to extend the range @@ -868,6 +861,10 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << "?"; else OS << vni->def; + if (vni->hasPHIKill()) + OS << "-phikill"; + if (vni->hasRedefByEC()) + OS << "-ec"; } } } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 956d21c0b34b0..4c054f51f3a8a 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -497,11 +497,6 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { /// candidate for LICM. e.g. If the instruction is a call, then it's obviously /// not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { - // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they - // are an argument to some other otherwise-hoistable instruction? - if (I.isImplicitDef()) - return false; - // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) @@ -717,7 +712,9 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, bool MachineLICM::EliminateCSE(MachineInstr *MI, DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { - if (CI == CSEMap.end()) + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) return false; if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 25284d6f5fcf4..15778b46fe0a3 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -563,3 +563,26 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { return 0; } +namespace { + /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map + /// by source location, to avoid depending on the arbitrary order that + /// instruction selection visits variables in. + struct VariableDebugSorter { + bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A, + const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B) + const { + if (A.second.second.getLine() != B.second.second.getLine()) + return A.second.second.getLine() < B.second.second.getLine(); + if (A.second.second.getCol() != B.second.second.getCol()) + return A.second.second.getCol() < B.second.second.getCol(); + return false; + } + }; +} + +MachineModuleInfo::VariableDbgInfoMapTy & +MachineModuleInfo::getVariableDbgInfo() { + std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(), + VariableDebugSorter()); + return VariableDbgInfo; +} diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index ca4c477168758..2e31908f9fe2f 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -41,21 +41,51 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - const TargetInstrInfo *tii_) { +bool +ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, + unsigned Reg, unsigned OpIdx, + const TargetInstrInfo *tii_, + SmallSet<unsigned, 8> &ImpDefRegs) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && DstSubReg == 0) + Reg == SrcReg && + (DstSubReg == 0 || ImpDefRegs.count(DstReg))) return true; switch(OpIdx) { - case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0; - case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0; - default: return false; + case 1: + return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || + ImpDefRegs.count(MI->getOperand(0).getReg())); + case 2: + return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 || + ImpDefRegs.count(MI->getOperand(0).getReg())); + default: return false; } } +static bool isUndefCopy(MachineInstr *MI, unsigned Reg, + const TargetInstrInfo *tii_, + SmallSet<unsigned, 8> &ImpDefRegs) { + if (MI->isCopy()) { + MachineOperand &MO0 = MI->getOperand(0); + MachineOperand &MO1 = MI->getOperand(1); + if (MO1.getReg() != Reg) + return false; + if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg())) + return true; + return false; + } + + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { + if (Reg != SrcReg) + return false; + if (DstSubReg == 0 || ImpDefRegs.count(DstReg)) + return true; + } + return false; +} + /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure /// there is one implicit_def for each use. Add isUndef marker to /// implicit_def defs and their uses. @@ -104,7 +134,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Eliminate %reg1032:sub<def> = COPY undef. if (MI->isCopy() && MI->getOperand(0).getSubReg()) { MachineOperand &MO = MI->getOperand(1); - if (ImpDefRegs.count(MO.getReg())) { + if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); vi.removeKill(MI); @@ -126,7 +156,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) { + if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) { bool isKill = MO.isKill(); MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) @@ -223,11 +253,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg && - RMI->getOperand(0).getSubReg() == 0) || - (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && DstSubReg == 0)) { + if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) { RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 3f7e4a5fac428..decaa769e99fe 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -135,7 +135,7 @@ unsigned FastISel::getRegForValue(const Value *V) { !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) return FuncInfo.InitializeRegForValue(V); - MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + SavePoint SaveInsertPt = enterLocalValueArea(); // Materialize the value in a register. Emit any instructions in the // local value area. @@ -286,18 +286,22 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } -MachineBasicBlock::iterator FastISel::enterLocalValueArea() { +FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + DebugLoc OldDL = DL; recomputeInsertPt(); - return OldInsertPt; + DL = DebugLoc(); + SavePoint SP = { OldInsertPt, OldDL }; + return SP; } -void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) { +void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) LastLocalValue = llvm::prior(FuncInfo.InsertPt); // Restore the previous insert position. - FuncInfo.InsertPt = OldInsertPt; + FuncInfo.InsertPt = OldInsertPt.InsertPt; + DL = OldInsertPt.DL; } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -779,39 +783,8 @@ FastISel::SelectFNeg(const User *I) { } bool -FastISel::SelectLoad(const User *I) { - LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I)); - - // For a load from an alloca, make a limited effort to find the value - // already available in a register, avoiding redundant loads. - if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) { - BasicBlock::iterator ScanFrom = LI; - if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(), - LI->getParent(), ScanFrom)) { - if (!V->use_empty() && - (!isa<Instruction>(V) || - cast<Instruction>(V)->getParent() == LI->getParent() || - (isa<AllocaInst>(V) && - FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) && - (!isa<Argument>(V) || - LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) { - unsigned ResultReg = getRegForValue(V); - if (ResultReg != 0) { - UpdateValueMap(I, ResultReg); - return true; - } - } - } - } - - return false; -} - -bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { - case Instruction::Load: - return SelectLoad(I); case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); case Instruction::FAdd: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d323c163c143a..458e865a6b3c8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -820,7 +820,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); } // Otherwise create a new SDValue and remember it. @@ -3955,7 +3955,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == AliasAnalysis::NoAlias) { DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + false, I.getArgOperand(0), 0, + I.getArgOperand(1), 0)); return 0; } @@ -5522,10 +5523,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { break; } - if (OpInfo.ConstraintType == TargetLowering::C_Other) { - assert(!OpInfo.isIndirect && - "Don't know how to handle indirect other inputs yet!"); + // Treat indirect 'X' constraint as memory. + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) + OpInfo.ConstraintType = TargetLowering::C_Memory; + if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], Ops, DAG); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 793f3c7ab7424..e0949bd2856f7 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -712,7 +712,7 @@ bool AsmParser::ParseStatement() { return HadError; } -bool AsmParser::ParseAssignment(const StringRef &Name) { +bool AsmParser::ParseAssignment(StringRef Name) { // FIXME: Use better location, we should use proper tokens. SMLoc EqualLoc = Lexer.getLoc(); diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 485bf4d6c9d27..2e78557011331 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -761,7 +761,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, makeNaN(); } -APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text) +APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); @@ -2185,8 +2185,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, } APFloat::opStatus -APFloat::convertFromHexadecimalString(const StringRef &s, - roundingMode rounding_mode) +APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) { lostFraction lost_fraction = lfExactlyZero; integerPart *significand; @@ -2361,7 +2360,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, } APFloat::opStatus -APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode) +APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { decimalInfo D; opStatus fs; @@ -2471,7 +2470,7 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo } APFloat::opStatus -APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode) +APFloat::convertFromString(StringRef str, roundingMode rounding_mode) { assertArithmeticOK(*semantics); assert(!str.empty() && "Invalid string length"); diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 1341d214370f4..262fa42ab2ced 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -102,7 +102,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) clearUnusedBits(); } -APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix) +APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) : BitWidth(numbits), VAL(0) { assert(BitWidth && "Bitwidth too small"); fromString(numbits, Str, radix); @@ -613,7 +613,7 @@ APInt& APInt::flip(unsigned bitPosition) { return *this; } -unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) { +unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { assert(!str.empty() && "Invalid string length"); assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && "Radix should be 2, 8, 10, or 16!"); @@ -2046,7 +2046,7 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); } -void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) { +void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { // Check our assumptions here assert(!str.empty() && "Invalid string length"); assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index a7631de9d8c69..309ffb02dec68 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -19,7 +19,7 @@ #include <string> using namespace llvm; -Regex::Regex(const StringRef ®ex, unsigned Flags) { +Regex::Regex(StringRef regex, unsigned Flags) { unsigned flags = 0; preg = new llvm_regex(); preg->re_endp = regex.end(); @@ -52,7 +52,7 @@ unsigned Regex::getNumMatches() const { return preg->re_nsub; } -bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){ +bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){ unsigned nmatch = Matches ? preg->re_nsub+1 : 0; // pmatch needs to have at least one element. diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp index 1ee917f119f7e..ff607cf8c4add 100644 --- a/lib/Support/StringPool.cpp +++ b/lib/Support/StringPool.cpp @@ -22,7 +22,7 @@ StringPool::~StringPool() { assert(InternTable.empty() && "PooledStringPtr leaked!"); } -PooledStringPtr StringPool::intern(const StringRef &Key) { +PooledStringPtr StringPool::intern(StringRef Key) { table_t::iterator I = InternTable.find(Key); if (I != InternTable.end()) return PooledStringPtr(&*I); diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index 67018de812ed0..0209f5aaf832d 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -310,12 +310,9 @@ Program::Wait(unsigned secondsToWait, // fact of having a handler at all causes the wait below to return with EINTR, // unlike if we used SIG_IGN. if (secondsToWait) { -#if !defined(__HAIKU__) && !defined(__minix) - Act.sa_sigaction = 0; -#endif + memset(&Act, 0, sizeof(Act)); Act.sa_handler = TimeOutHandler; sigemptyset(&Act.sa_mask); - Act.sa_flags = 0; sigaction(SIGALRM, &Act, &Old); alarm(secondsToWait); } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index f1e6a9f083e2e..fa64d6c2a4b4d 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -48,6 +48,8 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; +def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", + "FP compare + branch is slow">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -129,7 +131,7 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureNEONForFP, FeatureT2ExtractPack]>; + FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index d316b13e04884..92a13f1d751ca 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -519,9 +519,8 @@ namespace ARM_AM { // // This is stored in two operands [regaddr, align]. The first is the // address register. The second operand is the value of the alignment - // specifier to use or zero if no explicit alignment. - // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific - // instruction. + // specifier in bytes or zero if no explicit alignment. + // Valid alignments depend on the specific instruction. //===--------------------------------------------------------------------===// // NEON Modified Immediates diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 98d8b85854283..0091df753eb78 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; + case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::CNEG: return "ARMISD::CNEG"; @@ -623,6 +624,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; + case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; + case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -2216,7 +2219,7 @@ static bool isFloatingPointZero(SDValue Op) { /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, + SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); @@ -2268,48 +2271,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, CompareType = ARMISD::CMPZ; break; } - ARMCC = DAG.getConstant(CondCode, MVT::i32); + ARMcc = DAG.getConstant(CondCode, MVT::i32); return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } -static bool canBitcastToInt(SDNode *Op) { - return Op->hasOneUse() && - ISD::isNormalLoad(Op) && - Op->getValueType(0) == MVT::f32; -} - -static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) { - if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) - return DAG.getLoad(MVT::i32, Op.getDebugLoc(), - Ld->getChain(), Ld->getBasePtr(), - Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); - - llvm_unreachable("Unknown VFP cmp argument!"); -} - /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue -ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, +ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl) const { - if (UnsafeFPMath && FiniteOnlyFPMath() && - (CC == ISD::SETEQ || CC == ISD::SETOEQ || - CC == ISD::SETNE || CC == ISD::SETUNE) && - canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) { - // If unsafe fp math optimization is enabled and there are no othter uses of - // the CMP operands, and the condition code is EQ oe NE, we can optimize it - // to an integer comparison. - if (CC == ISD::SETOEQ) - CC = ISD::SETEQ; - else if (CC == ISD::SETUNE) - CC = ISD::SETNE; - LHS = bitcastToInt(LHS, DAG); - RHS = bitcastToInt(RHS, DAG); - return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); - } - SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); @@ -2328,59 +2297,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (LHS.getValueType() == MVT::i32) { - SDValue ARMCC; + SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMCC, CCR, Cmp); + ARMcc, CCR, Cmp); if (CondCode2 != ARMCC::AL) { - SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); + SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); Result = DAG.getNode(ARMISD::CMOV, dl, VT, - Result, TrueVal, ARMCC2, CCR, Cmp2); + Result, TrueVal, ARMcc2, CCR, Cmp2); } return Result; } +/// canChangeToInt - Given the fp compare operand, return true if it is suitable +/// to morph to an integer compare sequence. +static bool canChangeToInt(SDValue Op, bool &SeenZero, + const ARMSubtarget *Subtarget) { + SDNode *N = Op.getNode(); + if (!N->hasOneUse()) + // Otherwise it requires moving the value from fp to integer registers. + return false; + if (!N->getNumValues()) + return false; + EVT VT = Op.getValueType(); + if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) + // f32 case is generally profitable. f64 case only makes sense when vcmpe + + // vmrs are very slow, e.g. cortex-a8. + return false; + + if (isFloatingPointZero(Op)) { + SeenZero = true; + return true; + } + return ISD::isNormalLoad(N); +} + +static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { + if (isFloatingPointZero(Op)) + return DAG.getConstant(0, MVT::i32); + + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) + return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ld->getBasePtr(), + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + llvm_unreachable("Unknown VFP cmp argument!"); +} + +static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, + SDValue &RetVal1, SDValue &RetVal2) { + if (isFloatingPointZero(Op)) { + RetVal1 = DAG.getConstant(0, MVT::i32); + RetVal2 = DAG.getConstant(0, MVT::i32); + return; + } + + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { + SDValue Ptr = Ld->getBasePtr(); + RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ptr, + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + EVT PtrType = Ptr.getValueType(); + unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); + SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), + PtrType, Ptr, DAG.getConstant(4, PtrType)); + RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), NewPtr, + Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, + Ld->isVolatile(), Ld->isNonTemporal(), + NewAlign); + return; + } + + llvm_unreachable("Unknown VFP cmp argument!"); +} + +/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some +/// f32 and even f64 comparisons to integer ones. +SDValue +ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + bool SeenZero = false; + if (canChangeToInt(LHS, SeenZero, Subtarget) && + canChangeToInt(RHS, SeenZero, Subtarget) && + // If one of the operand is zero, it's safe to ignore the NaN case. + (FiniteOnlyFPMath() || SeenZero)) { + // If unsafe fp math optimization is enabled and there are no othter uses of + // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // to an integer comparison. + if (CC == ISD::SETOEQ) + CC = ISD::SETEQ; + else if (CC == ISD::SETUNE) + CC = ISD::SETNE; + + SDValue ARMcc; + if (LHS.getValueType() == MVT::f32) { + LHS = bitcastf32Toi32(LHS, DAG); + RHS = bitcastf32Toi32(RHS, DAG); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, + Chain, Dest, ARMcc, CCR, Cmp); + } + + SDValue LHS1, LHS2; + SDValue RHS1, RHS2; + expandf64Toi32(LHS, DAG, LHS1, LHS2); + expandf64Toi32(RHS, DAG, RHS1, RHS2); + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + ARMcc = DAG.getConstant(CondCode, MVT::i32); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + } + + return SDValue(); +} + SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue Dest = Op.getOperand(4); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); if (LHS.getValueType() == MVT::i32) { - SDValue ARMCC; + SDValue ARMcc; + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, - Chain, Dest, ARMCC, CCR,Cmp); + Chain, Dest, ARMcc, CCR, Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + + if (UnsafeFPMath && + (CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE)) { + SDValue Result = OptimizeVFPBrcond(Op, DAG); + if (Result.getNode()) + return Result; + } + ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; + SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); if (CondCode2 != ARMCC::AL) { - ARMCC = DAG.getConstant(CondCode2, MVT::i32); - SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; + ARMcc = DAG.getConstant(CondCode2, MVT::i32); + SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); } return Res; @@ -2469,12 +2563,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); - SDValue Cmp = getVFPCmp(Tmp1, FP0, - ISD::SETLT, ARMCC, DAG, dl); + SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); + return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2553,51 +2646,18 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { } /// getZeroVector - Returns a vector of specified type with all zero elements. -/// +/// Zero vectors are used to represent vector negation and in those cases +/// will be implemented with the NEON VNEG instruction. However, VNEG does +/// not support i64 elements, so sometimes the zero vectors will need to be +/// explicitly constructed. Regardless, use a canonical VMOV to create the +/// zero vector. static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - - // Zero vectors are used to represent vector negation and in those cases - // will be implemented with the NEON VNEG instruction. However, VNEG does - // not support i64 elements, so sometimes the zero vectors will need to be - // explicitly constructed. For those cases, and potentially other uses in - // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted - // to their dest type. This ensures they get CSE'd. - SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0, MVT::i8); - SmallVector<SDValue, 8> Ops; - MVT TVT; - - if (VT.getSizeInBits() == 64) { - Ops.assign(8, Cst); TVT = MVT::v8i8; - } else { - Ops.assign(16, Cst); TVT = MVT::v16i8; - } - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); - - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); -} - -/// getOnesVector - Returns a vector of specified type with all bits set. -/// -static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { - assert(VT.isVector() && "Expected a vector type"); - - // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their - // dest type. This ensures they get CSE'd. - SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); - SmallVector<SDValue, 8> Ops; - MVT TVT; - - if (VT.getSizeInBits() == 64) { - Ops.assign(8, Cst); TVT = MVT::v8i8; - } else { - Ops.assign(16, Cst); TVT = MVT::v16i8; - } - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); - - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + // The canonical modified immediate encoding of a zero vector is....0! + SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); + EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two @@ -2611,7 +2671,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMCC; + SDValue ARMcc; unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); @@ -2627,9 +2687,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, dl); + ARMcc, DAG, dl); SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; @@ -2647,7 +2707,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMCC; + SDValue ARMcc; assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, @@ -2661,9 +2721,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, dl); + ARMcc, DAG, dl); SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; @@ -2850,13 +2910,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" -/// operand (e.g., VMOV). If so, return either the constant being -/// splatted or the encoded value, depending on the DoEncode parameter. +/// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - bool isVMOV, bool DoEncode) { + EVT &VT, bool is128Bits, bool isVMOV) { unsigned OpCmode, Imm; - EVT VT; // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified @@ -2868,16 +2926,18 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, switch (SplatBitSize) { case 8: + if (!isVMOV) + return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); OpCmode = 0xe; Imm = SplatBits; - VT = MVT::i8; + VT = is128Bits ? MVT::v16i8 : MVT::v8i8; break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. - VT = MVT::i16; + VT = is128Bits ? MVT::v8i16 : MVT::v4i16; if ((SplatBits & ~0xff) == 0) { // Value = 0x00nn: Op=x, Cmode=100x. OpCmode = 0x8; @@ -2897,7 +2957,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. - VT = MVT::i32; + VT = is128Bits ? MVT::v4i32 : MVT::v2i32; if ((SplatBits & ~0xff) == 0) { // Value = 0x000000nn: Op=x, Cmode=000x. OpCmode = 0; @@ -2949,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); case 64: { - // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. if (!isVMOV) return SDValue(); + // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; uint64_t Val = 0; unsigned ImmMask = 1; @@ -2969,7 +3029,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Op=1, Cmode=1110. OpCmode = 0x1e; SplatBits = Val; - VT = MVT::i64; + VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } @@ -2978,32 +3038,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); } - if (DoEncode) { - unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); - return DAG.getTargetConstant(EncodedVal, MVT::i32); - } - return DAG.getTargetConstant(SplatBits, VT); -} - -/// getNEONModImm - If this is a valid vector constant for a NEON instruction -/// with a "modified immediate" operand (e.g., VMOV) of the specified element -/// size, return the encoded value for that immediate. The ByteSize field -/// indicates the number of bytes of each element [1248]. -SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, - SelectionDAG &DAG) { - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ByteSize * 8)) - return SDValue(); - - if (SplatBitSize > ByteSize * 8) - return SDValue(); - - return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG, isVMOV, true); + unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + return DAG.getTargetConstant(EncodedVal, MVT::i32); } static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, @@ -3194,43 +3230,6 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } - -static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { - // Canonicalize all-zeros and all-ones vectors. - ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); - if (ConstVal->isNullValue()) - return getZeroVector(VT, DAG, dl); - if (ConstVal->isAllOnesValue()) - return getOnesVector(VT, DAG, dl); - - EVT CanonicalVT; - if (VT.is64BitVector()) { - switch (Val.getValueType().getSizeInBits()) { - case 8: CanonicalVT = MVT::v8i8; break; - case 16: CanonicalVT = MVT::v4i16; break; - case 32: CanonicalVT = MVT::v2i32; break; - case 64: CanonicalVT = MVT::v1i64; break; - default: llvm_unreachable("unexpected splat element type"); break; - } - } else { - assert(VT.is128BitVector() && "unknown splat vector size"); - switch (Val.getValueType().getSizeInBits()) { - case 8: CanonicalVT = MVT::v16i8; break; - case 16: CanonicalVT = MVT::v8i16; break; - case 32: CanonicalVT = MVT::v4i32; break; - case 64: CanonicalVT = MVT::v2i64; break; - default: llvm_unreachable("unexpected splat element type"); break; - } - } - - // Build a canonical splat for this value. - SmallVector<SDValue, 8> Ops; - Ops.assign(CanonicalVT.getVectorNumElements(), Val); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], - Ops.size()); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); -} - // If this is a case we can't handle, return null and let the default // expansion code take care of it. static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { @@ -3244,11 +3243,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { // Check if an immediate VMOV works. + EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), - SplatBitSize, DAG, true, false); - if (Val.getNode()) - return BuildSplat(Val, VT, DAG, dl); + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VmovVT, VT.is128BitVector(), true); + if (Val.getNode()) { + SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + } + + // Try an immediate VMVN. + uint64_t NegatedImm = (SplatBits.getZExtValue() ^ + ((1LL << SplatBitSize) - 1)); + Val = isNEONModifiedImm(NegatedImm, + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VmovVT, VT.is128BitVector(), false); + if (Val.getNode()) { + SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + } } } @@ -3825,6 +3838,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +static +MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) + if (*I != Succ) + return *I; + llvm_unreachable("Expecting a BB with two successors!"); +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -3941,6 +3963,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; } + case ARM::BCCi64: + case ARM::BCCZi64: { + // Compare both parts that make up the double comparison separately for + // equality. + bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; + + unsigned LHS1 = MI->getOperand(1).getReg(); + unsigned LHS2 = MI->getOperand(2).getReg(); + if (RHSisZero) { + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(LHS1).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(LHS2).addImm(0) + .addImm(ARMCC::EQ).addReg(ARM::CPSR); + } else { + unsigned RHS1 = MI->getOperand(3).getReg(); + unsigned RHS2 = MI->getOperand(4).getReg(); + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(LHS1).addReg(RHS1)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(LHS2).addReg(RHS2) + .addImm(ARMCC::EQ).addReg(ARM::CPSR); + } + + MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); + MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); + if (MI->getOperand(0).getImm() == ARMCC::NE) + std::swap(destMBB, exitMBB); + + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) + .addMBB(exitMBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + } + case ARM::tANDsp: case ARM::tADDspr_: case ARM::tSUBspi_: @@ -4180,6 +4242,35 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, return SDValue(); } +/// PerformVDUPLANECombine - Target-specific dag combine xforms for +/// ARMISD::VDUPLANE. +static SDValue PerformVDUPLANECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is + // redundant. + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BIT_CONVERT) + Op = Op.getOperand(0); + if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) + return SDValue(); + + // Make sure the VMOV element size is not bigger than the VDUPLANE elements. + unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); + // The canonical VMOV for a zero vector uses a 32-bit element size. + unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + unsigned EltBits; + if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) + EltSize = 8; + if (EltSize > VT.getVectorElementType().getSizeInBits()) + return SDValue(); + + SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); + return DCI.CombineTo(N, Res, false); +} + /// getVShiftImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. @@ -4558,6 +4649,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3a3866928a0e9..128b72e1e743e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -53,6 +53,8 @@ namespace llvm { CMOV, // ARM conditional move instructions. CNEG, // ARM conditional negate instructions. + BCC_i64, + RBIT, // ARM bitreverse instruction FTOSI, // FP to sint within a FP register. @@ -122,6 +124,10 @@ namespace llvm { VGETLANEu, // zero-extend vector extract element VGETLANEs, // sign-extend vector extract element + // Vector move immediate and move negated immediate: + VMOVIMM, + VMVNIMM, + // Vector duplicate: VDUP, VDUPLANE, @@ -150,13 +156,6 @@ namespace llvm { /// Define some predicates that are used for node matching. namespace ARM { - /// getNEONModImm - If this is a valid vector constant for a NEON - /// instruction with a "modified immediate" operand (e.g., VMOV) of the - /// specified element size, return the encoded value for that immediate. - /// The ByteSize field indicates the number of bytes of each element [1248]. - SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, - SelectionDAG &DAG); - /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) /// instruction, returns its 8-bit integer representation. Otherwise, @@ -363,9 +362,11 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; - SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; + SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; + SDValue getVFPCmp(SDValue LHS, SDValue RHS, + SelectionDAG &DAG, DebugLoc dl) const; + + SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c73e204a26b3f..51fc1522485fa 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -38,6 +38,12 @@ def SDT_ARMBr2JT : SDTypeProfile<0, 4, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, SDTCisVT<2, i32>, + SDTCisVT<3, i32>, SDTCisVT<4, i32>, + SDTCisVT<5, OtherVT>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -90,6 +96,9 @@ def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, [SDNPHasChain]>; +def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, + [SDNPHasChain]>; + def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; @@ -1685,13 +1694,19 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), } // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +// The assume-no-carry-in form uses the negation of the input since add/sub +// assume opposite meanings of the carry flag (i.e., carry == !borrow). +// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory +// details. def : ARMPat<(add GPR:$src, so_imm_neg:$imm), (SUBri GPR:$src, so_imm_neg:$imm)>; - -//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), -// (SUBSri GPR:$src, so_imm_neg:$imm)>; -//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm), -// (SBCri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), + (SUBSri GPR:$src, so_imm_neg:$imm)>; +// The with-carry-in form matches bitwise not instead of the negation. +// Effectively, the inverse interpretation of the carry flag already accounts +// for part of the negation. +def : ARMPat<(adde GPR:$src, so_imm_not:$imm), + (SBCri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2279,6 +2294,22 @@ defm CMNz : AI1_cmp_irs<0b1011, "cmn", def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), (CMNzri GPR:$src, so_imm_neg:$imm)>; +// Pseudo i64 compares for some floating point compares. +let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, + Defs = [CPSR] in { +def BCCi64 : PseudoInst<(outs), + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), + IIC_Br, + "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc", + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; + +def BCCZi64 : PseudoInst<(outs), + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), + IIC_Br, + "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc", + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>; +} // usesCustomInserter + // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a84315f73038c..7f7eb980abe83 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -65,6 +65,10 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; +def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; +def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -94,6 +98,20 @@ def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; +def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ + ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); + unsigned EltBits; + uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + return (EltBits == 32 && EltVal == 0); +}]>; + +def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ + ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); + unsigned EltBits; + uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + return (EltBits == 8 && EltVal == 0xff); +}]>; + //===----------------------------------------------------------------------===// // NEON operand definitions //===----------------------------------------------------------------------===// @@ -2318,10 +2336,10 @@ defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, // Vector Bitwise Operations. -def vnot8 : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>; -def vnot16 : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>; +def vnotd : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; +def vnotq : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; // VAND : Vector Bitwise AND @@ -2347,36 +2365,58 @@ def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, "vbic", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (and DPR:$src1, - (vnot8 DPR:$src2))))]>; + (vnotd DPR:$src2))))]>; def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, "vbic", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, - (vnot16 QPR:$src2))))]>; + (vnotq QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, "vorn", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (or DPR:$src1, - (vnot8 DPR:$src2))))]>; + (vnotd DPR:$src2))))]>; def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, "vorn", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, - (vnot16 QPR:$src2))))]>; + (vnotq QPR:$src2))))]>; + +// VMVN : Vector Bitwise NOT (Immediate) + +let isReMaterializable = 1 in { +def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$dst, $SIMM", "", + [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>; +def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$dst, $SIMM", "", + [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>; + +def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$dst, $SIMM", "", + [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>; +def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$dst, $SIMM", "", + [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>; +} // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", - [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; + [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", - [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; -def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; -def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>; + [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>; +def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; +def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), @@ -2385,14 +2425,14 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), - (and DPR:$src3, (vnot8 DPR:$src1)))))]>; + (and DPR:$src3, (vnotd DPR:$src1)))))]>; def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, IIC_VCNTiQ, "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), - (and QPR:$src3, (vnot16 QPR:$src1)))))]>; + (and QPR:$src3, (vnotq QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -2726,20 +2766,19 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, // Vector Negate. -def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; -def vneg8 : PatFrag<(ops node:$in), - (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>; -def vneg16 : PatFrag<(ops node:$in), - (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>; +def vnegd : PatFrag<(ops node:$in), + (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; +def vnegq : PatFrag<(ops node:$in), + (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>; + [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>; class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>; + [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>; // VNEG : Vector Negate (integer) def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; @@ -2759,12 +2798,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; -def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>; -def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>; -def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>; -def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>; -def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>; -def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>; +def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; +def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; +def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; +def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; +def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; +def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, @@ -2818,74 +2857,42 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), // VMOV : Vector Move (Immediate) -// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. -def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 1, true, *CurDAG); -}]>; -def vmovImm8 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm8>; - -// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. -def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 2, true, *CurDAG); -}]>; -def vmovImm16 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm16>; - -// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. -def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 4, true, *CurDAG); -}]>; -def vmovImm32 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm32>; - -// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. -def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ - return ARM::getNEONModImm(N, 8, true, *CurDAG); -}]>; -def vmovImm64 : PatLeaf<(build_vector), [{ - return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0; -}], VMOV_get_imm64>; - let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", - [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; + [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", - [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; + [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>; def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", - [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; + [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>; def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", - [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; + [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", - [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>; +def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", - [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; + [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", - [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; + [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", - [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; + [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 4692f2a421339..bbe675e81ab1d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -122,6 +122,10 @@ def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; }], imm_neg_XFORM>; +def imm0_255_not : PatLeaf<(i32 imm), [{ + return (uint32_t)(~N->getZExtValue()) < 255; +}], imm_comp_XFORM>; + // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 @@ -1391,13 +1395,32 @@ defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +// The assume-no-carry-in form uses the negation of the input since add/sub +// assume opposite meanings of the carry flag (i.e., carry == !borrow). +// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory +// details. +// The AddedComplexity preferences the first variant over the others since +// it can be shrunk to a 16-bit wide encoding, while the others cannot. +let AddedComplexity = 1 in +def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), + (t2SUBri GPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), + (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), + (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm), + (t2SUBSri GPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm), + (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>; +// The with-carry-in form matches bitwise not instead of the negation. +// Effectively, the inverse interpretation of the carry flag already accounts +// for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), - (t2SUBri GPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), - (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), - (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(adde GPR:$src, imm0_255_not:$imm), + (t2SBCSri GPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm), + (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -2435,7 +2458,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_Br, - "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8332bba22db9e..e7d92ede9b984 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -54,6 +54,9 @@ protected: /// the VML[AS] instructions are slow (if so, don't use them). bool SlowVMLx; + /// SlowFPBrcc - True if floating point compare + branch is slow. + bool SlowFPBrcc; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -133,6 +136,7 @@ protected: bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } + bool isFPBrccSlow() const { return SlowFPBrcc; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8415d1ad8827c..4b083244b2413 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -88,7 +88,7 @@ private: /// its register number, or -1 if there is no match. To allow return values /// to be used directly in register lists, arm registers have values between /// 0 and 15. - int MatchRegisterName(const StringRef &Name); + int MatchRegisterName(StringRef Name); /// } @@ -97,7 +97,7 @@ public: ARMAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -517,7 +517,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St, const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; - const StringRef &ShiftName = Tok.getString(); + StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") St = Lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") @@ -549,7 +549,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St, } /// A hack to allow some testing, to be replaced by a real table gen version. -int ARMAsmParser::MatchRegisterName(const StringRef &Name) { +int ARMAsmParser::MatchRegisterName(StringRef Name) { if (Name == "r0" || Name == "R0") return 0; else if (Name == "r1" || Name == "R1") @@ -593,7 +593,7 @@ MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst) { ARMOperand &Op0 = *(ARMOperand*)Operands[0]; assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); - const StringRef &Mnemonic = Op0.getToken(); + StringRef Mnemonic = Op0.getToken(); if (Mnemonic == "add" || Mnemonic == "stmfd" || Mnemonic == "str" || @@ -658,7 +658,7 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { } /// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, +bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { OwningPtr<ARMOperand> Op; ARMOperand::CreateToken(Op, Name, NameLoc); @@ -761,7 +761,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); - const StringRef &Mode = Tok.getString(); + StringRef Mode = Tok.getString(); if (Mode == "unified" || Mode == "UNIFIED") Parser.Lex(); else if (Mode == "divided" || Mode == "DIVIDED") diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 6a40cf3602e97..946f4744f5bbc 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -602,12 +602,8 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { - unsigned Align = MO2.getImm(); - assert((Align == 8 || Align == 16 || Align == 32) && - "unexpected NEON load/store alignment"); - Align <<= 3; // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << Align; + O << ", :" << (MO2.getImm() << 3); } O << "]"; } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 170819ad4f06a..edc934549b288 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -442,7 +442,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + O << ", :" << (MO2.getImm() << 3); } O << "]"; } diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 85d5ca05913cb..0cb8ff01181d7 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -590,3 +590,70 @@ than the Z bit, we'll need additional logic to reverse the conditionals associated with the comparison. Perhaps a pseudo-instruction for the comparison, with a post-codegen pass to clean up and handle the condition codes? See PR5694 for testcase. + +//===---------------------------------------------------------------------===// + +Given the following on armv5: +int test1(int A, int B) { + return (A&-8388481)|(B&8388480); +} + +We currently generate: + ldr r2, .LCPI0_0 + and r0, r0, r2 + ldr r2, .LCPI0_1 + and r1, r1, r2 + orr r0, r1, r0 + bx lr + +We should be able to replace the second ldr+and with a bic (i.e. reuse the +constant which was already loaded). Not sure what's necessary to do that. + +//===---------------------------------------------------------------------===// + +Given the following on ARMv7: +int test1(int A, int B) { + return (A&-8388481)|(B&8388480); +} + +We currently generate: + bfc r0, #7, #16 + movw r2, #:lower16:8388480 + movt r2, #:upper16:8388480 + and r1, r1, r2 + orr r0, r1, r0 + bx lr + +The following is much shorter: + lsr r1, r1, #7 + bfi r0, r1, #7, #16 + bx lr + + +//===---------------------------------------------------------------------===// + +The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: + +int a(int x) { return __builtin_bswap32(x); } + +a: + mov r1, #255, 24 + mov r2, #255, 16 + and r1, r1, r0, lsr #8 + and r2, r2, r0, lsl #8 + orr r1, r1, r0, lsr #24 + orr r0, r2, r0, lsl #24 + orr r0, r0, r1 + bx lr + +Something like the following would be better (fewer instructions/registers): + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 + bx lr + +A custom Thumb version would also be a slight improvement over the generic +version. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp index c67c6a235b11a..a35e8846e0724 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp +++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "AlphaMCAsmInfo.h" using namespace llvm; -AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) { +AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; PrivateGlobalPrefix = "$"; GPRel32Directive = ".gprel32"; diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/AlphaMCAsmInfo.h index c27065d28427b..837844bd29a93 100644 --- a/lib/Target/Alpha/AlphaMCAsmInfo.h +++ b/lib/Target/Alpha/AlphaMCAsmInfo.h @@ -14,14 +14,14 @@ #ifndef ALPHATARGETASMINFO_H #define ALPHATARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; struct AlphaMCAsmInfo : public MCAsmInfo { - explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT); + explicit AlphaMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp index 31470fb35b968..5b9d4a29794e4 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp @@ -15,7 +15,7 @@ using namespace llvm; -BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) { +BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) { GlobalPrefix = "_"; CommentString = "//"; HasSetDirective = false; diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/BlackfinMCAsmInfo.h index 0efc29523067b..c372aa247e04d 100644 --- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h @@ -14,14 +14,14 @@ #ifndef BLACKFINTARGETASMINFO_H #define BLACKFINTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; struct BlackfinMCAsmInfo : public MCAsmInfo { - explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT); + explicit BlackfinMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 68445cf6bf9d7..25ba88acc8ba6 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "SPUMCAsmInfo.h" using namespace llvm; -SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { +SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/SPUMCAsmInfo.h index 8d75ea84116a5..7f850d347f56c 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.h +++ b/lib/Target/CellSPU/SPUMCAsmInfo.h @@ -14,14 +14,14 @@ #ifndef SPUTARGETASMINFO_H #define SPUTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; struct SPULinuxMCAsmInfo : public MCAsmInfo { - explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT); + explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp index 7ae465dbc55d6..4abeb2ed5d6b8 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "MBlazeMCAsmInfo.h" using namespace llvm; -MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) { +MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h index bccb418673ff8..9d6ff3a11e782 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h @@ -14,15 +14,15 @@ #ifndef MBLAZETARGETASMINFO_H #define MBLAZETARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; class MBlazeMCAsmInfo : public MCAsmInfo { public: - explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT); + explicit MBlazeMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 8f97d255077fa..cc350e8a4f892 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -1621,8 +1621,7 @@ const char* MSILWriter::getLibraryName(const GlobalVariable* GV) { } -const char* MSILWriter::getLibraryForSymbol(const StringRef &Name, - bool isFunction, +const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction, CallingConv::ID CallingConv) { // TODO: Read *.def file with function and libraries definitions. return "MSVCRT.DLL"; diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h index a95ae2327c2c5..92a3abe5c0a74 100644 --- a/lib/Target/MSIL/MSILWriter.h +++ b/lib/Target/MSIL/MSILWriter.h @@ -246,7 +246,7 @@ namespace llvm { const char* getLibraryName(const GlobalVariable* GV); - const char* getLibraryForSymbol(const StringRef &Name, bool isFunction, + const char* getLibraryForSymbol(StringRef Name, bool isFunction, CallingConv::ID CallingConv); void printExternals(); diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index cfb499d132353..3f44944605544 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "MSP430MCAsmInfo.h" using namespace llvm; -MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { +MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) { PrivateGlobalPrefix = ".L"; WeakRefDirective ="\t.weak\t"; PCSymbol="."; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MSP430MCAsmInfo.h index 8318029ae78d4..f3138a22022da 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MSP430MCAsmInfo.h @@ -14,13 +14,14 @@ #ifndef MSP430TARGETASMINFO_H #define MSP430TARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; + struct MSP430MCAsmInfo : public MCAsmInfo { - explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT); + explicit MSP430MCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp index 89e3e11b0a7c2..fe48ab770e68b 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MipsMCAsmInfo.cpp @@ -14,7 +14,7 @@ #include "MipsMCAsmInfo.h" using namespace llvm; -MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) { +MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h index 33a4b5edb258f..15a867ead53e7 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MipsMCAsmInfo.h @@ -14,15 +14,15 @@ #ifndef MIPSTARGETASMINFO_H #define MIPSTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; class MipsMCAsmInfo : public MCAsmInfo { public: - explicit MipsMCAsmInfo(const Target &T, const StringRef &TT); + explicit MipsMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 6a4d0d6271212..7a948def3cfe4 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -416,7 +416,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num, if (!TagName.empty()) Tmp += ", " + TagName; for (int i = 0; i<Num; i++) - Tmp += "," + utostr(Aux[i] && 0xff); + Tmp += "," + utostr(Aux[i] & 0xff); OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp); } diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp index b08054270ceed..1bcc4971ebb74 100644 --- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp +++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp @@ -20,7 +20,7 @@ #include "PIC16ISelLowering.h" using namespace llvm; -PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) { +PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) { CommentString = ";"; GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL); GlobalDirective = "\tglobal\t"; diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h index e84db8532a151..6e1c111078ce3 100644 --- a/lib/Target/PIC16/PIC16MCAsmInfo.h +++ b/lib/Target/PIC16/PIC16MCAsmInfo.h @@ -25,7 +25,7 @@ namespace llvm { const char *RomData16bitsDirective; const char *RomData32bitsDirective; public: - PIC16MCAsmInfo(const Target &T, const StringRef &TT); + PIC16MCAsmInfo(const Target &T, StringRef TT); virtual const char *getDataASDirective(unsigned size, unsigned AS) const; }; diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp index 535c6f7c8a27e..d37d6d2313059 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp @@ -12,10 +12,9 @@ //===----------------------------------------------------------------------===// #include "SparcMCAsmInfo.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; -SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) { +SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = 0; // .xword is only supported by V9. diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/SparcMCAsmInfo.h index 12d6ef4a6f187..0cb6827d27719 100644 --- a/lib/Target/Sparc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/SparcMCAsmInfo.h @@ -14,13 +14,14 @@ #ifndef SPARCTARGETASMINFO_H #define SPARCTARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; + struct SparcELFMCAsmInfo : public MCAsmInfo { - explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT); + explicit SparcELFMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp index f9ccc47b0b944..4f7f70bd85f06 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -16,7 +16,7 @@ #include "llvm/MC/MCSectionELF.h" using namespace llvm; -SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) { +SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h index 87908f21f722a..a6a27e2f4b6d4 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h @@ -21,7 +21,7 @@ namespace llvm { class StringRef; struct SystemZMCAsmInfo : public MCAsmInfo { - explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT); + explicit SystemZMCAsmInfo(const Target &T, StringRef TT); virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index df523688c583b..47c91df1400e6 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -294,7 +294,7 @@ namespace llvm { /// option is specified on the command line. If this returns false (default), /// the code generator is not allowed to assume that FP arithmetic arguments /// and results are never NaNs or +-Infs. - bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; } + bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; } /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume /// that the rounding mode of the FPU can change from its default. diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index a856e9cc7a4e0..f1e66ab9d2c3f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -65,7 +65,7 @@ public: X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) : TargetAsmParser(T), Parser(_Parser) {} - virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc, + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); @@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } bool X86ATTAsmParser:: -ParseInstruction(const StringRef &Name, SMLoc NameLoc, +ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The various flavors of pushf and popf use Requires<In32BitMode> and // Requires<In64BitMode>, but the assembler doesn't yet implement that. @@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); else if (Name == "pushfl") return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); + else if (Name == "pusha") + return Error(NameLoc, "pusha cannot be encoded in 64-bit mode"); } else { if (Name == "popfq") return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 73bc603f18f13..08e6486d5b7a3 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -17,7 +17,6 @@ #include "X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" -#include "X86COFF.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" @@ -35,6 +34,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" @@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetCOFF()) { bool Intrn = MF.getFunction()->hasInternalLinkage(); OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT); - OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer.EndCOFFSymbolDef(); } @@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { E = COFFMMI.externals_end(); I != E; ++I) { OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT); - OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); + OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); OutStreamer.EndCOFFSymbolDef(); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 09f150bb79466..e67fc06a6cd75 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); - // Subtract the pic base. - Expr - = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), - Ctx), - Ctx); - - break; + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + // Subtract the pic base. + Expr = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), + Ctx), + Ctx); + break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h deleted file mode 100644 index 0a8e4e6ac6db8..0000000000000 --- a/lib/Target/X86/X86COFF.h +++ /dev/null @@ -1,95 +0,0 @@ -//===--- X86COFF.h - Some definitions from COFF documentations ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file just defines some symbols found in COFF documentation. They are -// used to emit function type information for COFF targets (Cygwin/Mingw32). -// -//===----------------------------------------------------------------------===// - -#ifndef X86COFF_H -#define X86COFF_H - -namespace COFF -{ -/// Storage class tells where and what the symbol represents -enum StorageClass { - C_EFCN = -1, ///< Physical end of function - C_NULL = 0, ///< No symbol - C_AUTO = 1, ///< External definition - C_EXT = 2, ///< External symbol - C_STAT = 3, ///< Static - C_REG = 4, ///< Register variable - C_EXTDEF = 5, ///< External definition - C_LABEL = 6, ///< Label - C_ULABEL = 7, ///< Undefined label - C_MOS = 8, ///< Member of structure - C_ARG = 9, ///< Function argument - C_STRTAG = 10, ///< Structure tag - C_MOU = 11, ///< Member of union - C_UNTAG = 12, ///< Union tag - C_TPDEF = 13, ///< Type definition - C_USTATIC = 14, ///< Undefined static - C_ENTAG = 15, ///< Enumeration tag - C_MOE = 16, ///< Member of enumeration - C_REGPARM = 17, ///< Register parameter - C_FIELD = 18, ///< Bit field - - C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block - C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function - C_EOS = 102, ///< End of structure - C_FILE = 103, ///< File name - C_LINE = 104, ///< Line number, reformatted as symbol - C_ALIAS = 105, ///< Duplicate tag - C_HIDDEN = 106 ///< External symbol in dmert public lib -}; - -/// The type of the symbol. This is made up of a base type and a derived type. -/// For example, pointer to int is "pointer to T" and "int" -enum SymbolType { - T_NULL = 0, ///< No type info - T_ARG = 1, ///< Void function argument (only used by compiler) - T_VOID = 1, ///< The same as above. Just named differently in some specs. - T_CHAR = 2, ///< Character - T_SHORT = 3, ///< Short integer - T_INT = 4, ///< Integer - T_LONG = 5, ///< Long integer - T_FLOAT = 6, ///< Floating point - T_DOUBLE = 7, ///< Double word - T_STRUCT = 8, ///< Structure - T_UNION = 9, ///< Union - T_ENUM = 10, ///< Enumeration - T_MOE = 11, ///< Member of enumeration - T_UCHAR = 12, ///< Unsigned character - T_USHORT = 13, ///< Unsigned short - T_UINT = 14, ///< Unsigned integer - T_ULONG = 15 ///< Unsigned long -}; - -/// Derived type of symbol -enum SymbolDerivedType { - DT_NON = 0, ///< No derived type - DT_PTR = 1, ///< Pointer to T - DT_FCN = 2, ///< Function returning T - DT_ARY = 3 ///< Array of T -}; - -/// Masks for extracting parts of type -enum SymbolTypeMasks { - N_BTMASK = 017, ///< Mask for base type - N_TMASK = 060 ///< Mask for derived type -}; - -/// Offsets of parts of type -enum Shifts { - N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT) -}; - -} - -#endif // X86COFF_H diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cdde24a156d0d..ce1370763b77f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GVOpFlags = GVFlags; // Prepare for inserting code in the local-value area. - MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + SavePoint SaveInsertPt = enterLocalValueArea(); if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; @@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { return false; // First issue a copy to GR16_ABCD or GR32_ABCD. - unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) - .addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CopyReg).addReg(InputReg); // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1a634744806e5..b3c48862898ff 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If the tailcall address may be in a register, then make sure it's // possible to register allocate for it. In 32-bit, the call address can // only target EAX, EDX, or ECX since the tail call must be scheduled after - // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, - // RDI, R8, R9, R11. - if (!isa<GlobalAddressSDNode>(Callee) && + // callee-saved registers are restored. These happen to be the same + // registers used to pass 'inreg' arguments so watch out for those. + if (!Subtarget->is64Bit() && + !isa<GlobalAddressSDNode>(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { - unsigned Limit = Subtarget->is64Bit() ? 8 : 3; unsigned NumInRegs = 0; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - if (VA.isRegLoc()) { - if (++NumInRegs == Limit) + if (!VA.isRegLoc()) + continue; + unsigned Reg = VA.getLocReg(); + switch (Reg) { + default: break; + case X86::EAX: case X86::EDX: case X86::ECX: + if (++NumInRegs == 3) return false; + break; } } } @@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned immOpc, unsigned LoadOpc, unsigned CXchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, @@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(tt); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); @@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(EAXreg); // insert branch @@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, const TargetRegisterClass *RC = X86::GR32RegisterClass; const unsigned LoadOpc = X86::MOV32rm; - const unsigned copyOpc = X86::MOV32rr; const unsigned NotOpc = X86::NOT32r; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); @@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); MIB.addReg(t5); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); MIB.addReg(t6); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); @@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3); MIB.addReg(X86::EAX); - MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4); MIB.addReg(X86::EDX); // insert branch @@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); if (argOpers[valArgIndx]->isReg()) - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); else MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); (*MIB).addOperand(*argOpers[valArgIndx]); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); @@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, (*MIB).setMemRefs(mInstr->memoperands_begin(), mInstr->memoperands_end()); - MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg()); MIB.addReg(X86::EAX); // insert branch @@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMXOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass); case X86::ATOMNAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, X86::MOV32rr, + X86::LCMPXCHG32, X86::NOT32r, X86::EAX, X86::GR32RegisterClass, true); case X86::ATOMMIN32: @@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMXOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass); case X86::ATOMNAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, X86::MOV16rr, + X86::LCMPXCHG16, X86::NOT16r, X86::AX, X86::GR16RegisterClass, true); case X86::ATOMMIN16: @@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMXOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass); case X86::ATOMNAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, X86::MOV8rr, + X86::LCMPXCHG8, X86::NOT8r, X86::AL, X86::GR8RegisterClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. @@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::ATOMAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMXOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass); case X86::ATOMNAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, X86::MOV64rr, + X86::LCMPXCHG64, X86::NOT64r, X86::RAX, X86::GR64RegisterClass, true); case X86::ATOMMIN64: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2d28e5cc2ea7a..4e4daa4bc5ca9 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -764,7 +764,6 @@ namespace llvm { unsigned immOpc, unsigned loadOpc, unsigned cxchgOpc, - unsigned copyOpc, unsigned notOpc, unsigned EAXreg, TargetRegisterClass *RC, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 97578af499bef..cc3fdf1efd7b6 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -106,6 +106,7 @@ class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } +class VEX_L { bit hasVEX_L = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register // to be encoded in a immediate field? + bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{33} = hasVEX_WPrefix; let TSFlags{34} = hasVEX_4VPrefix; let TSFlags{35} = hasVEX_i8ImmReg; + let TSFlags{36} = hasVEX_L; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f762b5827075d..ad0217adb4758 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -453,7 +453,13 @@ namespace X86II { // VEX_I8IMM - Specifies that the last register used in a AVX instruction, // must be encoded in the i8 immediate field. This usually happens in // instructions with 4 operands. - VEX_I8IMM = 1ULL << 35 + VEX_I8IMM = 1ULL << 35, + + // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + // instruction uses 256-bit wide registers. This is usually auto detected if + // a VR256 register is used, but some AVX instructions also have this field + // marked when using a f256 memory references. + VEX_L = 1ULL << 36 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ab0005b31bb83..ebe161b46bdcb 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB, VEX; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, @@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), // Convert packed single/double fp to doubleword let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; @@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; @@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; +let isAsmParserOnly = 1 in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; +} + // Convert packed single to packed double -let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, - Requires<[HasAVX]>; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, - Requires<[HasAVX]>; + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; @@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), // Convert packed double to packed single let isAsmParserOnly = 1 in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; -// FIXME: the memory form of this instruction should described using -// use extra asm syntax + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; @@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in { "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", SSEPackedDouble>, OpSize, VEX_4V; + let Pattern = []<dag> in { + defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } } let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // SSE3 - Conversion Instructions //===---------------------------------------------------------------------===// +// Convert Packed Double FP to Packed DW Integers let isAsmParserOnly = 1, Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + +// XMM only +def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + +// YMM only +def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; } def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; + +// Convert Packed DW Integers to Packed Double FP +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 633ddd49d74d0..23b0666f5f30f 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::VEX_W) VEX_W = 1; + if (TSFlags & X86II::VEX_L) + VEX_L = 1; + switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); case X86II::T8: // 0F 38 diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp index 5f6feae372352..42ab1b31d57a7 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp @@ -10,7 +10,7 @@ #include "XCoreMCAsmInfo.h" using namespace llvm; -XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) { +XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) { SupportsDebugInformation = true; Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/XCoreMCAsmInfo.h index 01f8e481a949a..840392263881b 100644 --- a/lib/Target/XCore/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/XCoreMCAsmInfo.h @@ -14,14 +14,15 @@ #ifndef XCORETARGETASMINFO_H #define XCORETARGETASMINFO_H +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { class Target; - class StringRef; + class XCoreMCAsmInfo : public MCAsmInfo { public: - explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT); + explicit XCoreMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 027a220bc7f98..9bb01f5699fe3 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -399,7 +399,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; - // If this call sites was obtained by inlining another function, verify + // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlinling the same function, // which would provide the same callsites, which would cause us to diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 55d5e2ac4ab8e..aeeafe7fd19dc 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -603,6 +603,10 @@ static void ThunkGToF(Function *F, Function *G) { } static void AliasGToF(Function *F, Function *G) { + // Darwin will trigger llvm_unreachable if asked to codegen an alias. + return ThunkGToF(F, G); + +#if 0 if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) return ThunkGToF(F, G); @@ -614,6 +618,7 @@ static void AliasGToF(Function *F, Function *G) { GA->setVisibility(G->getVisibility()); G->replaceAllUsesWith(GA); G->eraseFromParent(); +#endif } static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3f4a857c41a5a..5876f408343b6 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -472,6 +472,25 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> + // (icmp eq (A & (C1|C2)), (C1|C2)) + if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Instruction *I1 = dyn_cast<Instruction>(Val); + Instruction *I2 = dyn_cast<Instruction>(Val2); + if (I1 && I1->getOpcode() == Instruction::And && + I2 && I2->getOpcode() == Instruction::And && + I1->getOperand(0) == I1->getOperand(0)) { + ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1)); + ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1)); + if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() && + CI1->getValue().operator&(CI2->getValue()) == 0) { + Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); + Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr); + return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); + } + } + } } // From here on, we only handle: diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index c44fe9db6e3a7..f9ffdb10f2660 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -699,6 +699,34 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SI.setOperand(2, TrueVal); return &SI; } + + // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T + // Note: This is a canonicalization rather than an optimization, and is used + // to expose opportunities to other instcombine transforms. + Instruction* CondInst = dyn_cast<Instruction>(CondVal); + if (CondInst && CondInst->hasOneUse() && + CondInst->getOpcode() == Instruction::Or) { + ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0)); + ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1)); + if (LHSCmp && LHSCmp->hasOneUse() && + LHSCmp->getPredicate() == ICmpInst::ICMP_EQ && + RHSCmp && RHSCmp->hasOneUse() && + RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) { + ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1)); + ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1)); + if (C1 && C1->isZero() && C2 && C2->isZero()) { + LHSCmp->setPredicate(ICmpInst::ICMP_NE); + RHSCmp->setPredicate(ICmpInst::ICMP_NE); + Value *And = + InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp, + "and."+CondVal->getName()), SI); + SI.setOperand(0, And); + SI.setOperand(1, FalseVal); + SI.setOperand(2, TrueVal); + return &SI; + } + } + } return 0; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a250a88c99473..1f9b4156b9cd6 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2362,7 +2362,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // TODO: For now, just add the min and max offset, because it usually isn't // worthwhile looking at everything inbetween. - SmallVector<int64_t, 4> Worklist; + SmallVector<int64_t, 2> Worklist; Worklist.push_back(LU.MinOffset); if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); @@ -2376,7 +2376,14 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I; if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { - F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + std::swap(F.BaseRegs[i], F.BaseRegs.back()); + F.BaseRegs.pop_back(); + } else + F.BaseRegs[i] = NewG; (void)InsertFormula(LU, LUIdx, F); } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 0b48a8fce78b7..8e9113871f47b 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -306,7 +306,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { WeakVH BIHandle(BI); ReplaceAndSimplifyAllUses(Inst, V, TD); MadeChange = true; - if (BIHandle == 0) + if (BIHandle != BI) BI = BB->begin(); continue; } @@ -354,12 +354,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, // value into all of its uses. assert(PNV != PN && "hasConstantValue broken"); + Value *OldPhiIt = PhiIt; ReplaceAndSimplifyAllUses(PN, PNV, TD); // If recursive simplification ended up deleting the next PHI node we would // iterate to, then our iterator is invalid, restart scanning from the top // of the block. - if (PhiIt == 0) PhiIt = &BB->front(); + if (PhiIt != OldPhiIt) PhiIt = &BB->front(); } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index fd3ed3ea2d654..27b07d9731a58 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1377,8 +1377,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); - if (Cond == 0) return false; - + if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) + return false; // Only allow this if the condition is a simple instruction that can be // executed unconditionally. It must be in the same block as the branch, and @@ -1387,11 +1388,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ignore dbg intrinsics. while(isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt; - if ((!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) || - Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) { - return false; + + // Allow a single instruction to be hoisted in addition to the compare + // that feeds the branch. We later ensure that any values that _it_ uses + // were also live in the predecessor, so that we don't unnecessarily create + // register pressure or inhibit out-of-order execution. + Instruction *BonusInst = 0; + if (&*FrontIt != Cond && + FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && + FrontIt->isSafeToSpeculativelyExecute()) { + BonusInst = &*FrontIt; + ++FrontIt; } + // Only a single bonus inst is allowed. + if (&*FrontIt != Cond) + return false; + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. @@ -1429,6 +1442,44 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { !SafeToMergeTerminators(BI, PBI)) continue; + // Ensure that any values used in the bonus instruction are also used + // by the terminator of the predecessor. This means that those values + // must already have been resolved, so we won't be inhibiting the + // out-of-order core by speculating them earlier. + if (BonusInst) { + // Collect the values used by the bonus inst + SmallPtrSet<Value*, 4> UsedValues; + for (Instruction::op_iterator OI = BonusInst->op_begin(), + OE = BonusInst->op_end(); OI != OE; ++OI) { + Value* V = *OI; + if (!isa<Constant>(V)) + UsedValues.insert(V); + } + + SmallVector<std::pair<Value*, unsigned>, 4> Worklist; + Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); + + // Walk up to four levels back up the use-def chain of the predecessor's + // terminator to see if all those values were used. The choice of four + // levels is arbitrary, to provide a compile-time-cost bound. + while (!Worklist.empty()) { + std::pair<Value*, unsigned> Pair = Worklist.back(); + Worklist.pop_back(); + + if (Pair.second >= 4) continue; + UsedValues.erase(Pair.first); + if (UsedValues.empty()) break; + + if (Instruction* I = dyn_cast<Instruction>(Pair.first)) { + for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); + } + } + + if (!UsedValues.empty()) return false; + } + Instruction::BinaryOps Opc; bool InvertPredCond = false; @@ -1457,9 +1508,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PBI->setSuccessor(1, OldTrue); } + // If we have a bonus inst, clone it into the predecessor block. + Instruction *NewBonus = 0; + if (BonusInst) { + NewBonus = BonusInst->clone(); + PredBlock->getInstList().insert(PBI, NewBonus); + NewBonus->takeName(BonusInst); + BonusInst->setName(BonusInst->getName()+".old"); + } + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); + if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus); PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 7a471ef98e1da..09b8aa507d833 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -90,8 +90,7 @@ enum PrefixType { /// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either /// prefixed with % (if the string only contains simple characters) or is /// surrounded with ""'s (if it has special chars in it). Print it out. -static void PrintLLVMName(raw_ostream &OS, const StringRef &Name, - PrefixType Prefix) { +static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { assert(Name.data() && "Cannot get empty name!"); switch (Prefix) { default: llvm_unreachable("Bad prefix!"); @@ -855,8 +854,9 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { } } -static void WriteConstantInt(raw_ostream &Out, const Constant *CV, - TypePrinting &TypePrinter, SlotTracker *Machine) { +static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, + TypePrinting &TypePrinter, + SlotTracker *Machine) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { if (CI->getType()->isIntegerTy(1)) { Out << (CI->getZExtValue() ? "true" : "false"); @@ -1147,7 +1147,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, const Constant *CV = dyn_cast<Constant>(V); if (CV && !isa<GlobalValue>(CV)) { assert(TypePrinter && "Constants require TypePrinting!"); - WriteConstantInt(Out, CV, *TypePrinter, Machine); + WriteConstantInternal(Out, CV, *TypePrinter, Machine); return; } @@ -2128,7 +2128,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { } else if (const MDNode *N = dyn_cast<MDNode>(this)) { const Function *F = N->getFunction(); SlotTracker SlotTable(F); - AssemblyWriter W(OS, SlotTable, F ? getModuleFromVal(F) : 0, AAW); + AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW); W.printMDNodeBody(N); } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) { SlotTracker SlotTable(N->getParent()); @@ -2138,7 +2138,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { TypePrinting TypePrinter; TypePrinter.print(C->getType(), OS); OS << ' '; - WriteConstantInt(OS, C, TypePrinter, 0); + WriteConstantInternal(OS, C, TypePrinter, 0); } else if (isa<InlineAsm>(this) || isa<MDString>(this) || isa<Argument>(this)) { WriteAsOperand(OS, this, true, 0); diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 1d3a058693035..3100d4ac7c9c3 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -78,7 +78,8 @@ void MDNodeOperand::allUsesReplacedWith(Value *NV) { /// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on /// the end of the MDNode. static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) { - assert(Op < N->getNumOperands() && "Invalid operand number"); + // Use <= instead of < to permit a one-past-the-end address. + assert(Op <= N->getNumOperands() && "Invalid operand number"); return reinterpret_cast<MDNodeOperand*>(N+1)+Op; } |