diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Analysis/ScalarEvolution.cpp | 30 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 16 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 | ||||
-rw-r--r-- | lib/CodeGen/VirtRegMap.cpp | 30 | ||||
-rw-r--r-- | lib/DebugInfo/DWARF/DWARFContext.cpp | 4 | ||||
-rw-r--r-- | lib/DebugInfo/DWARF/DWARFVerifier.cpp | 8 | ||||
-rw-r--r-- | lib/Object/COFFImportFile.cpp | 6 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 19 | ||||
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 38 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 44 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 55 | ||||
-rw-r--r-- | lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 10 | ||||
-rw-r--r-- | lib/Transforms/Scalar/BDCE.cpp | 44 |
16 files changed, 275 insertions, 77 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index b973203a89b66..9539fd7c75596 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -162,6 +162,11 @@ static cl::opt<unsigned> cl::desc("Maximum depth of recursive SExt/ZExt"), cl::init(8)); +static cl::opt<unsigned> + MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, + cl::desc("Max coefficients in AddRec during evolving"), + cl::init(16)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -2878,6 +2883,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) continue; + // Limit max number of arguments to avoid creation of unreasonably big + // SCEVAddRecs with very complex operands. + if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > + MaxAddRecSize) + continue; + bool Overflow = false; Type *Ty = AddRec->getType(); bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; @@ -7582,6 +7593,25 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { + + // This trivial case can show up in some degenerate cases where + // the incoming IR has not yet been fully simplified. + if (BTCC->getValue()->isZero()) { + Value *InitValue = nullptr; + bool MultipleInitValues = false; + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (!LI->contains(PN->getIncomingBlock(i))) { + if (!InitValue) + InitValue = PN->getIncomingValue(i); + else if (InitValue != PN->getIncomingValue(i)) { + MultipleInitValues = true; + break; + } + } + if (!MultipleInitValues && InitValue) + return getSCEV(InitValue); + } + } // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 439b21a812587..cdfe74d158c95 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -4458,6 +4458,10 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { + // Bail out when we hit the limit. + if (Depth == MaxDepth) + return None; + // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example. if (LHS->getType() != RHS->getType()) return None; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0cad20db09648..ecb54e1e4b41c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { - SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue Cond = N->getOperand(0); + EVT OpVT = Cond.getValueType(); + SDLoc DL(N); + // The vselect result and true/value operands needs scalarizing, but it's + // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Cond = GetScalarizedVector(Cond); + } else { + EVT VT = OpVT.getVectorElementType(); + Cond = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + SDValue LHS = GetScalarizedVector(N->getOperand(1)); TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false, false); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 823e77850c4ba..0ff154784f685 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7262,22 +7262,23 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { AddDbgValue(I, ToNode, false); } -void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, - SDValue NewMemOp) { +SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); - if (!OldLoad->hasAnyUseOfValue(1)) - return; - // The new memory operation must have the same position as the old load in // terms of memory dependency. Create a TokenFactor for the old load and new // memory operation and update uses of the old load's output chain to use that // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + if (!OldLoad->hasAnyUseOfValue(1)) + return NewChain; + SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); ReplaceAllUsesOfValueWith(OldChain, TokenFactor); UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + return TokenFactor; } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 124c2790f68c4..f8aacdb8649d4 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -180,6 +180,7 @@ class VirtRegRewriter : public MachineFunctionPass { void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; void handleIdentityCopy(MachineInstr &MI) const; void expandCopyBundle(MachineInstr &MI) const; + bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const; public: static char ID; @@ -415,6 +416,32 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { } } +/// Check whether (part of) \p SuperPhysReg is live through \p MI. +/// \pre \p MI defines a subregister of a virtual register that +/// has been assigned to \p SuperPhysReg. +bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, + unsigned SuperPhysReg) const { + SlotIndex MIIndex = LIS->getInstructionIndex(MI); + SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); + SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex(); + for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) { + const LiveRange &UnitRange = LIS->getRegUnit(*Unit); + // If the regunit is live both before and after MI, + // we assume it is live through. + // Generally speaking, this is not true, because something like + // "RU = op RU" would match that description. + // However, we know that we are trying to assess whether + // a def of a virtual reg, vreg, is live at the same time of RU. + // If we are in the "RU = op RU" situation, that means that vreg + // is defined at the same time as RU (i.e., "vreg, RU = op RU"). + // Thus, vreg and RU interferes and vreg cannot be assigned to + // SuperPhysReg. Therefore, this situation cannot happen. + if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses)) + return true; + } + return false; +} + void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector<unsigned, 8> SuperDeads; @@ -452,7 +479,8 @@ void VirtRegRewriter::rewrite() { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. A // partial redef always kills and redefines the super-register. - if (MO.readsReg() && (MO.isDef() || MO.isKill())) + if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || + (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) SuperKills.push_back(PhysReg); if (MO.isDef()) { diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 495e09fbae355..dd3235244e243 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -134,13 +134,13 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, uint64_t StringOffset = StrOffsetExt.getRelocatedValue(EntrySize, &Offset); if (Format == DWARF32) { - OS << format("%8.8x ", StringOffset); uint32_t StringOffset32 = (uint32_t)StringOffset; + OS << format("%8.8x ", StringOffset32); const char *S = StrData.getCStr(&StringOffset32); if (S) OS << format("\"%s\"", S); } else - OS << format("%16.16x ", StringOffset); + OS << format("%16.16" PRIx64 " ", StringOffset); OS << "\n"; } } diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 6cf44ffa3796a..4de46bea301e9 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -196,7 +196,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, ++NumErrors; OS << "error: DW_AT_stmt_list offset is beyond .debug_line " "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; + << format("0x%08" PRIx64, *SectionOffset) << "\n"; Die.dump(OS, 0); OS << "\n"; } @@ -234,7 +234,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, if (CUOffset >= CUSize) { ++NumErrors; OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) + << format("0x%08" PRIx64, CUOffset) << " is invalid (must be less than CU size of " << format("0x%08" PRIx32, CUSize) << "):\n"; Die.dump(OS, 0); @@ -366,7 +366,7 @@ void DWARFVerifier::verifyDebugLineRows() { if (Row.Address < PrevAddress) { ++NumDebugLineErrors; OS << "error: .debug_line[" - << format("0x%08" PRIx32, + << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "] row[" << RowIndex << "] decreases in address from previous row:\n"; @@ -381,7 +381,7 @@ void DWARFVerifier::verifyDebugLineRows() { if (Row.File > MaxFileIndex) { ++NumDebugLineErrors; OS << "error: .debug_line[" - << format("0x%08" PRIx32, + << format("0x%08" PRIx64, *toSectionOffset(Die.find(DW_AT_stmt_list))) << "][" << RowIndex << "] has invalid file index " << Row.File << " (valid values are [1," << MaxFileIndex << "]):\n"; diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index a515bc8ad16de..ff039463d08c8 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -557,7 +557,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef<COFFShortExport> Exports, - MachineTypes Machine) { + MachineTypes Machine, bool MakeWeakAliases) { std::vector<NewArchiveMember> Members; ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); @@ -575,7 +575,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, if (E.Private) continue; - if (E.isWeak()) { + if (E.isWeak() && MakeWeakAliases) { Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false)); Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true)); continue; @@ -587,7 +587,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path, if (E.Constant) ImportType = IMPORT_CONST; - StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name; + StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); Expected<std::string> Name = E.ExtName.empty() ? SymbolName diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 005f2d51e4036..9a7f45bde6c99 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -388,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst, } static unsigned getPreIndexedOpcode(unsigned Opc) { + // FIXME: We don't currently support creating pre-indexed loads/stores when + // the load or store is the unscaled version. If we decide to perform such an + // optimization in the future the cases for the unscaled loads/stores will + // need to be added here. switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); @@ -451,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); case AArch64::STRSui: + case AArch64::STURSi: return AArch64::STRSpost; case AArch64::STRDui: + case AArch64::STURDi: return AArch64::STRDpost; case AArch64::STRQui: + case AArch64::STURQi: return AArch64::STRQpost; case AArch64::STRBBui: return AArch64::STRBBpost; case AArch64::STRHHui: return AArch64::STRHHpost; case AArch64::STRWui: + case AArch64::STURWi: return AArch64::STRWpost; case AArch64::STRXui: + case AArch64::STURXi: return AArch64::STRXpost; case AArch64::LDRSui: + case AArch64::LDURSi: return AArch64::LDRSpost; case AArch64::LDRDui: + case AArch64::LDURDi: return AArch64::LDRDpost; case AArch64::LDRQui: + case AArch64::LDURQi: return AArch64::LDRQpost; case AArch64::LDRBBui: return AArch64::LDRBBpost; case AArch64::LDRHHui: return AArch64::LDRHHpost; case AArch64::LDRWui: + case AArch64::LDURWi: return AArch64::LDRWpost; case AArch64::LDRXui: + case AArch64::LDURXi: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; @@ -1694,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++NumPostFolded; break; } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. + + // Don't know how to handle unscaled pre/post-index versions below, so + // move to the next instruction. if (TII->isUnscaledLdSt(Opc)) { ++MBBI; break; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index ec49f0d37af44..46d8f0dba6914 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -769,8 +769,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); const MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - bool StatusDead = MI.getOperand(1).isDead(); + unsigned TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); @@ -797,23 +796,9 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, } // .Lloadcmp: - // mov wStatus, #0 // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone - if (!StatusDead) { - if (IsThumb) { - BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg) - .addDef(ARM::CPSR, RegState::Dead) - .addImm(0) - .add(predOps(ARMCC::AL)); - } else { - BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - } - } MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); @@ -836,10 +821,10 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strex rStatus, rNew, [rAddr] - // cmp rStatus, #0 + // strex rTempReg, rNew, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp - MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg) + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) .addReg(NewReg) .addReg(AddrReg); if (StrexOp == ARM::t2STREX) @@ -848,7 +833,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, getKillRegState(StatusDead)) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -904,8 +889,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); - unsigned StatusReg = MI.getOperand(1).getReg(); - bool StatusDead = MI.getOperand(1).isDead(); + unsigned TempReg = MI.getOperand(1).getReg(); // Duplicating undef operands into 2 instructions does not guarantee the same // value on both; However undef should be replaced by xzr anyway. assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); @@ -931,7 +915,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rStatus<dead>, rDestHi, rDesiredHi + // sbcs rTempReg<dead>, rDestHi, rDesiredHi // bne .Ldone unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; @@ -959,17 +943,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, LoadCmpBB->addSuccessor(StoreBB); // .Lstore: - // strexd rStatus, rNewLo, rNewHi, [rAddr] - // cmp rStatus, #0 + // strexd rTempReg, rNewLo, rNewHi, [rAddr] + // cmp rTempReg, #0 // bne .Lloadcmp unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; - MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, getKillRegState(StatusDead)) + .addReg(TempReg, RegState::Kill) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index d06b7d0896f16..7206083a70791 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -6053,21 +6053,21 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), // significantly more naive than the standard expansion: we conservatively // assume seq_cst, strong cmpxchg and omit clrex on failure. -let Constraints = "@earlyclobber $Rd,@earlyclobber $status", +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", mayLoad = 1, mayStore = 1 in { -def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$desired, GPR:$new), NoItinerary, []>, Sched<[]>; -def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7563bffd8f873..1e73122cdc388 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -419,6 +419,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); } + + // Custom action for SELECT MMX and expand action for SELECT_CC MMX + setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); + setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to @@ -1383,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Custom under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) + MVT::v8f32, MVT::v4f64, MVT::v1i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1, MVT::v32i1, MVT::v64i1 }) @@ -14570,6 +14575,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); MVT ResVT = Op.getSimpleValueType(); + // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond + // would result with: v1i1 = extract_subvector(vXi1, idx). + // Lower these into extract_vector_elt which is already selectable. + if (ResVT == MVT::v1i1) { + assert(Subtarget.hasAVX512() && + "Boolean EXTRACT_SUBVECTOR requires AVX512"); + + MVT EltVT = ResVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT LegalVT = + (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT(); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res); + } + assert((In.getSimpleValueType().is256BitVector() || In.getSimpleValueType().is512BitVector()) && "Can only extract from 256-bit or 512-bit vectors"); @@ -20651,8 +20671,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, } // ADC/ADCX/SBB case ADX: { - SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other); - SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other); + SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), @@ -30663,6 +30683,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(N, 0); } + // Custom action for SELECT MMX + if (VT == MVT::x86mmx) { + LHS = DAG.getBitcast(MVT::i64, LHS); + RHS = DAG.getBitcast(MVT::i64, RHS); + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS); + return DAG.getBitcast(VT, newSelect); + } + return SDValue(); } @@ -33358,7 +33386,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); - SDValue NewChain = NewLd.getValue(1); + // Make sure new load is placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); if (TokenFactorIndex >= 0) { Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); @@ -33379,11 +33408,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, Ld->getPointerInfo().getWithOffset(4), MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); + // Make sure new loads are placed in same chain order. + SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - SDValue NewChain = LoLd.getValue(1); if (TokenFactorIndex >= 0) { - Ops.push_back(LoLd); - Ops.push_back(HiLd); + Ops.push_back(NewChain); NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 705d0f7a5cf7d..0e654a380e7c5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -978,6 +978,44 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _, (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } +multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, + X86VectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg> { + let ExeDomain = _.ExeDomain in + defm r : AVX512_maskable_custom<opc, MRMSrcReg, + (outs _.RC:$dst), (ins GR32:$src), + !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), + !con((ins _.KRCWM:$mask), (ins GR32:$src)), + "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], + "$src0 = $dst">, T8PD, EVEX; + + def : Pat <(_.VT (OpNode SrcRC:$src)), + (!cast<Instruction>(Name#r) + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), + (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; + + def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), + (!cast<Instruction>(Name#rkz) _.KRCWM:$mask, + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; +} + +multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, + AVX512VLVectorVTInfo _, SDPatternOperator OpNode, + RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { + let Predicates = [prd] in + defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC, + Subreg>, EVEX_V512; + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode, + SrcRC, Subreg>, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode, + SrcRC, Subreg>, EVEX_V128; + } +} + multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { @@ -989,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, } } -let isCodeGenOnly = 1 in { -defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - X86VBroadcast, GR8, HasBWI>; -defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - X86VBroadcast, GR16, HasBWI>; -} -let isAsmParserOnly = 1 in { - defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, - null_frag, GR32, HasBWI>; - defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, - null_frag, GR32, HasBWI>; -} +defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", + avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; +defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", + avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, + HasBWI>; defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, X86VBroadcast, GR32, HasAVX512>; defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index a7de793060746..fc15dc1e6032c 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -60,11 +60,13 @@ std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs; // Opens a file. Path has to be resolved already. // Newly created memory buffers are owned by this driver. -MemoryBufferRef openFile(StringRef Path) { +Optional<MemoryBufferRef> openFile(StringRef Path) { ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path); - if (std::error_code EC = MB.getError()) + if (std::error_code EC = MB.getError()) { llvm::errs() << "fail openFile: " << EC.message() << "\n"; + return None; + } MemoryBufferRef MBRef = MB.get()->getMemBufferRef(); OwningMBs.push_back(std::move(MB.get())); // take ownership @@ -114,11 +116,16 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { for (auto *Arg : Args.filtered(OPT_UNKNOWN)) llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; - MemoryBufferRef MB; - if (auto *Arg = Args.getLastArg(OPT_d)) - MB = openFile(Arg->getValue()); + if (!Args.hasArg(OPT_d)) { + llvm::errs() << "no definition file specified\n"; + return 1; + } + + Optional<MemoryBufferRef> MB = openFile(Args.getLastArg(OPT_d)->getValue()); + if (!MB) + return 1; - if (!MB.getBufferSize()) { + if (!MB->getBufferSize()) { llvm::errs() << "definition file empty\n"; return 1; } @@ -133,7 +140,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { } Expected<COFFModuleDefinition> Def = - parseCOFFModuleDefinition(MB, Machine, true); + parseCOFFModuleDefinition(*MB, Machine, true); if (!Def) { llvm::errs() << "error parsing definition\n" @@ -154,7 +161,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) { if (Path.empty()) Path = getImplibPath(Def->OutputFile); - if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine)) + if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) return 1; return 0; } diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index a33490f6e4acf..ddc975cbed1a7 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1470,6 +1470,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } i = CS.arg_begin(); + const unsigned ShadowArgStart = Args.size(); for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) Args.push_back(DFSF.getShadow(*i)); @@ -1505,6 +1506,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) { CustomCI->setCallingConv(CI->getCallingConv()); CustomCI->setAttributes(CI->getAttributes()); + // Update the parameter attributes of the custom call instruction to + // zero extend the shadow parameters. This is required for targets + // which consider ShadowTy an illegal type. + for (unsigned n = 0; n < FT->getNumParams(); n++) { + const unsigned ArgNo = ShadowArgStart + n; + if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy) + CustomCI->addParamAttr(ArgNo, Attribute::ZExt); + } + if (!FT->getReturnType()->isVoidTy()) { LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca); DFSF.setShadow(CustomCI, LabelLoad); diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index 61e8700f1cd67..2e5618686ec21 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" @@ -35,6 +36,46 @@ using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed (unused)"); STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)"); +/// If an instruction is trivialized (dead), then the chain of users of that +/// instruction may need to be cleared of assumptions that can no longer be +/// guaranteed correct. +static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) { + assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?"); + + // Initialize the worklist with eligible direct users. + SmallVector<Instruction *, 16> WorkList; + for (User *JU : I->users()) { + // If all bits of a user are demanded, then we know that nothing below that + // in the def-use chain needs to be changed. + auto *J = dyn_cast<Instruction>(JU); + if (J && !DB.getDemandedBits(J).isAllOnesValue()) + WorkList.push_back(J); + } + + // DFS through subsequent users while tracking visits to avoid cycles. + SmallPtrSet<Instruction *, 16> Visited; + while (!WorkList.empty()) { + Instruction *J = WorkList.pop_back_val(); + + // NSW, NUW, and exact are based on operands that might have changed. + J->dropPoisonGeneratingFlags(); + + // We do not have to worry about llvm.assume or range metadata: + // 1. llvm.assume demands its operand, so trivializing can't change it. + // 2. range metadata only applies to memory accesses which demand all bits. + + Visited.insert(J); + + for (User *KU : J->users()) { + // If all bits of a user are demanded, then we know that nothing below + // that in the def-use chain needs to be changed. + auto *K = dyn_cast<Instruction>(KU); + if (K && !Visited.count(K) && !DB.getDemandedBits(K).isAllOnesValue()) + WorkList.push_back(K); + } + } +} + static bool bitTrackingDCE(Function &F, DemandedBits &DB) { SmallVector<Instruction*, 128> Worklist; bool Changed = false; @@ -51,6 +92,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) { // replacing all uses with something else. Then, if they don't need to // remain live (because they have side effects, etc.) we can remove them. DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n"); + + clearAssumptionsOfUsers(&I, DB); + // FIXME: In theory we could substitute undef here instead of zero. // This should be reconsidered once we settle on the semantics of // undef, poison, etc. |