diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-08 17:12:57 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-08 17:12:57 +0000 |
commit | c46e6a5940c50058e00c0c5f9123fd82e338d29a (patch) | |
tree | 89a719d723035c54a190b1f81d329834f1f93336 /lib/CodeGen | |
parent | 148779df305667b6942fee7e758fdf81a6498f38 (diff) |
Notes
Diffstat (limited to 'lib/CodeGen')
25 files changed, 561 insertions, 352 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b11e30c359b3..7ddb86d80bf0 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2761,37 +2761,63 @@ void AsmPrinter::emitXRayTable() { auto PrevSection = OutStreamer->getCurrentSectionOnly(); auto Fn = MF->getFunction(); - MCSection *Section = nullptr; + MCSection *InstMap = nullptr; + MCSection *FnSledIndex = nullptr; if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { if (Fn->hasComdat()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Fn->getComdat()->getName()); + FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, + Fn->getComdat()->getName()); } else { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); } } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); + FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0, + SectionKind::getReadOnlyWithRel()); } else { llvm_unreachable("Unsupported target"); } // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since this function is always called just - // before the function's end, we assume that this is happening after - // the last return instruction. - + // xray_instr_map and xray_fn_idx sections. Since this function is always + // called just before the function's end, we assume that this is happening + // after the last return instruction. We also use the synthetic label in the + // xray_inster_map as a delimeter for the range of sleds for this function in + // the index. auto WordSizeBytes = MAI->getCodePointerSize(); - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true); + MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); + + // Now we switch to the instrumentation map section. Because this is done + // per-function, we are able to create an index entry that will represent the + // range of sleds associated with a function. + OutStreamer->SwitchSection(InstMap); + OutStreamer->EmitLabel(SledsStart); for (const auto &Sled : Sleds) Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); - + MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true); + OutStreamer->EmitLabel(SledsEnd); + + // We then emit a single entry in the index per function. We use the symbols + // that bound the instrumentation map as the range for a specific function. + // Each entry here will be 2 * word size aligned, as we're writing down two + // pointers. This should work for both 32-bit and 64-bit platforms. + OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); + OutStreamer->EmitLabel(IdxRef); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes); + OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes); OutStreamer->SwitchSection(PrevSection); Sleds.clear(); } diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 786b11618d75..87b45c001de4 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -469,7 +469,7 @@ void CodeViewDebug::emitTypeInformation() { CommentPrefix += ' '; } - TypeDatabase TypeDB; + TypeDatabase TypeDB(TypeTable.records().size()); CVTypeDumper CVTD(TypeDB); TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) { if (OS.isVerboseAsm()) { @@ -1705,10 +1705,12 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { SizeInBytes, FullName, Ty->getIdentifier()); TypeIndex ClassTI = TypeTable.writeKnownType(CR); - StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); - TypeIndex SIDI = TypeTable.writeKnownType(SIDR); - UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); - TypeTable.writeKnownType(USLR); + if (const auto *File = Ty->getFile()) { + StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); + TypeIndex SIDI = TypeTable.writeKnownType(SIDR); + UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); + TypeTable.writeKnownType(USLR); + } addToUDTs(Ty, ClassTI); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 2d01301402f0..b63d9f4a4351 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1850,8 +1850,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { return false; bool HasDups = false; - SmallVector<unsigned, 4> LocalDefs; - SmallSet<unsigned, 4> LocalDefsSet; + SmallVector<unsigned, 4> LocalDefs, LocalKills; + SmallSet<unsigned, 4> ActiveDefsSet, AllDefsSet; MachineBasicBlock::iterator TIB = TBB->begin(); MachineBasicBlock::iterator FIB = FBB->begin(); MachineBasicBlock::iterator TIE = TBB->end(); @@ -1905,7 +1905,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { IsSafe = false; break; } - } else if (!LocalDefsSet.count(Reg)) { + } else if (!ActiveDefsSet.count(Reg)) { if (Defs.count(Reg)) { // Use is defined by the instruction at the point of insertion. IsSafe = false; @@ -1925,18 +1925,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) break; - // Remove kills from LocalDefsSet, these registers had short live ranges. + // Remove kills from ActiveDefsSet, these registers had short live ranges. for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned Reg = MO.getReg(); - if (!Reg || !LocalDefsSet.count(Reg)) + if (!Reg) + continue; + if (!AllDefsSet.count(Reg)) { + LocalKills.push_back(Reg); continue; + } if (TargetRegisterInfo::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + ActiveDefsSet.erase(*AI); } else { - LocalDefsSet.erase(Reg); + ActiveDefsSet.erase(Reg); } } @@ -1948,7 +1952,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) continue; LocalDefs.push_back(Reg); - addRegAndItsAliases(Reg, TRI, LocalDefsSet); + addRegAndItsAliases(Reg, TRI, ActiveDefsSet); + addRegAndItsAliases(Reg, TRI, AllDefsSet); } HasDups = true; @@ -1963,17 +1968,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { FBB->erase(FBB->begin(), FIB); // Update livein's. - bool AddedLiveIns = false; + bool ChangedLiveIns = false; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Def = LocalDefs[i]; - if (LocalDefsSet.count(Def)) { + if (ActiveDefsSet.count(Def)) { TBB->addLiveIn(Def); FBB->addLiveIn(Def); - AddedLiveIns = true; + ChangedLiveIns = true; } } + for (unsigned K : LocalKills) { + TBB->removeLiveIn(K); + FBB->removeLiveIn(K); + ChangedLiveIns = true; + } - if (AddedLiveIns) { + if (ChangedLiveIns) { TBB->sortUniqueLiveIns(); FBB->sortUniqueLiveIns(); } diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 75be7a55bd2a..811858f136eb 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1108,6 +1108,14 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { default: return false; } + } else if (auto CV = dyn_cast<ConstantVector>(&C)) { + if (CV->getNumOperands() == 1) + return translate(*CV->getOperand(0), Reg); + SmallVector<unsigned, 4> Ops; + for (unsigned i = 0; i < CV->getNumOperands(); ++i) { + Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); + } + EntryBuilder.buildMerge(Reg, Ops); } else return false; @@ -1199,9 +1207,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - TLI.finalizeLowering(*MF); - // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to // be maximal. diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index cf97c635e79a..a16e14fe2db6 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #define DEBUG_TYPE "instruction-select" @@ -70,8 +71,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); - // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many - // other MF/MFI fields we need to initialize. + // FIXME: There are many other MF/MFI fields we need to initialize. #ifndef NDEBUG // Check that our input is fully legal: we require the function to have the @@ -184,6 +184,9 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; } + auto &TLI = *MF.getSubtarget().getTargetLowering(); + TLI.finalizeLowering(MF); + // FIXME: Should we accurately track changes? return true; } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 74ed58e8d049..aec379197dfb 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -176,8 +176,13 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { unsigned NumNewInsns = 0; SmallVector<MachineInstr *, 4> WorkList; Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { - ++NumNewInsns; - WorkList.push_back(MI); + // Only legalize pre-isel generic instructions. + // Legalization process could generate Target specific pseudo + // instructions with generic types. Don't record them + if (isPreISelGenericOpcode(MI->getOpcode())) { + ++NumNewInsns; + WorkList.push_back(MI); + } }); WorkList.push_back(&*MI); diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index f935390a8d1b..7248f50945d0 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -213,21 +213,23 @@ uint64_t RegBankSelect::getRepairCost( return UINT_MAX; } -RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( +const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings, SmallVectorImpl<RepairingPlacement> &RepairPts) { assert(!PossibleMappings.empty() && "Do not know how to map this instruction"); - RegisterBankInfo::InstructionMapping *BestMapping = nullptr; + const RegisterBankInfo::InstructionMapping *BestMapping = nullptr; MappingCost Cost = MappingCost::ImpossibleCost(); SmallVector<RepairingPlacement, 4> LocalRepairPts; - for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) { - MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost); + for (const RegisterBankInfo::InstructionMapping *CurMapping : + PossibleMappings) { + MappingCost CurCost = + computeMapping(MI, *CurMapping, LocalRepairPts, &Cost); if (CurCost < Cost) { DEBUG(dbgs() << "New best: " << CurCost << '\n'); Cost = CurCost; - BestMapping = &CurMapping; + BestMapping = CurMapping; RepairPts.clear(); for (RepairingPlacement &RepairPt : LocalRepairPts) RepairPts.emplace_back(std::move(RepairPt)); @@ -237,7 +239,7 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( // If none of the mapping worked that means they are all impossible. // Thus, pick the first one and set an impossible repairing point. // It will trigger the failed isel mode. - BestMapping = &(*PossibleMappings.begin()); + BestMapping = *PossibleMappings.begin(); RepairPts.emplace_back( RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible)); } else @@ -543,10 +545,10 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { // Remember the repairing placement for all the operands. SmallVector<RepairingPlacement, 4> RepairPts; - RegisterBankInfo::InstructionMapping BestMapping; + const RegisterBankInfo::InstructionMapping *BestMapping; if (OptMode == RegBankSelect::Mode::Fast) { - BestMapping = RBI->getInstrMapping(MI); - MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts); + BestMapping = &RBI->getInstrMapping(MI); + MappingCost DefaultCost = computeMapping(MI, *BestMapping, RepairPts); (void)DefaultCost; if (DefaultCost == MappingCost::ImpossibleCost()) return false; @@ -555,16 +557,16 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { RBI->getInstrPossibleMappings(MI); if (PossibleMappings.empty()) return false; - BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts)); + BestMapping = &findBestMapping(MI, PossibleMappings, RepairPts); } // Make sure the mapping is valid for MI. - assert(BestMapping.verify(MI) && "Invalid instruction mapping"); + assert(BestMapping->verify(MI) && "Invalid instruction mapping"); - DEBUG(dbgs() << "Best Mapping: " << BestMapping << '\n'); + DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n'); // After this call, MI may not be valid anymore. // Do not use it. - return applyMapping(MI, BestMapping, RepairPts); + return applyMapping(MI, *BestMapping, RepairPts); } bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index d5ae9a6776a4..a841902feed1 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -45,6 +45,10 @@ STATISTIC(NumOperandsMappingsCreated, "Number of operands mappings dynamically created"); STATISTIC(NumOperandsMappingsAccessed, "Number of operands mappings dynamically accessed"); +STATISTIC(NumInstructionMappingsCreated, + "Number of instruction mappings dynamically created"); +STATISTIC(NumInstructionMappingsAccessed, + "Number of instruction mappings dynamically accessed"); const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; @@ -137,7 +141,7 @@ static bool isCopyLike(const MachineInstr &MI) { MI.getOpcode() == TargetOpcode::REG_SEQUENCE; } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // For copies we want to walk over the operands and try to find one // that has a register bank since the instruction itself will not get @@ -147,9 +151,6 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // is important. The rest is not constrained. unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); - RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1, - /*OperandsMapping*/ nullptr, - NumOperandsForMapping); const MachineFunction &MF = *MI.getParent()->getParent(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); @@ -190,7 +191,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { if (!IsCopyLike) // MI does not carry enough information to guess the mapping. - return InstructionMapping(); + return getInvalidInstructionMapping(); continue; } } @@ -206,11 +207,13 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { if (IsCopyLike && !CompleteMapping) // No way to deduce the type from what we have. - return InstructionMapping(); + return getInvalidInstructionMapping(); assert(CompleteMapping && "Setting an uncomplete mapping"); - Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping)); - return Mapping; + return getInstructionMapping( + DefaultMappingID, /*Cost*/ 1, + /*OperandsMapping*/ getOperandsMapping(OperandsMapping), + NumOperandsForMapping); } /// Hashing function for PartialMapping. @@ -320,9 +323,44 @@ const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); } -RegisterBankInfo::InstructionMapping +static hash_code +hashInstructionMapping(unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) { + return hash_combine(ID, Cost, OperandsMapping, NumOperands); +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstructionMappingImpl( + bool IsInvalid, unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) const { + assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 && + OperandsMapping == nullptr && NumOperands == 0) || + !IsInvalid) && + "Mismatch argument for invalid input"); + ++NumInstructionMappingsAccessed; + + hash_code Hash = + hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands); + const auto &It = MapOfInstructionMappings.find(Hash); + if (It != MapOfInstructionMappings.end()) + return *It->second; + + ++NumInstructionMappingsCreated; + + auto &InstrMapping = MapOfInstructionMappings[Hash]; + if (IsInvalid) + InstrMapping = llvm::make_unique<InstructionMapping>(); + else + InstrMapping = llvm::make_unique<InstructionMapping>( + ID, Cost, OperandsMapping, NumOperands); + return *InstrMapping; +} + +const RegisterBankInfo::InstructionMapping & RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; llvm_unreachable("The target must implement this"); @@ -332,14 +370,14 @@ RegisterBankInfo::InstructionMappings RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { InstructionMappings PossibleMappings; // Put the default mapping first. - PossibleMappings.push_back(getInstrMapping(MI)); + PossibleMappings.push_back(&getInstrMapping(MI)); // Then the alternative mapping, if any. InstructionMappings AltMappings = getInstrAlternativeMappings(MI); - for (InstructionMapping &AltMapping : AltMappings) - PossibleMappings.emplace_back(std::move(AltMapping)); + for (const InstructionMapping *AltMapping : AltMappings) + PossibleMappings.push_back(AltMapping); #ifndef NDEBUG - for (const InstructionMapping &Mapping : PossibleMappings) - assert(Mapping.verify(MI) && "Mapping is invalid"); + for (const InstructionMapping *Mapping : PossibleMappings) + assert(Mapping->verify(MI) && "Mapping is invalid"); #endif return PossibleMappings; } diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index cac22af32956..1d36ff4e1458 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "MIParser.h" + #include "MILexer.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -134,7 +136,8 @@ public: bool parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); - bool parseBasicBlock(MachineBasicBlock &MBB); + bool parseBasicBlock(MachineBasicBlock &MBB, + MachineBasicBlock *&AddFalthroughFrom); bool parseBasicBlockLiveins(MachineBasicBlock &MBB); bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); @@ -518,7 +521,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { return false; } -bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { +bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, + MachineBasicBlock *&AddFalthroughFrom) { // Skip the definition. assert(Token.is(MIToken::MachineBasicBlockLabel)); lex(); @@ -538,10 +542,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { // // is equivalent to // liveins: %edi, %esi + bool ExplicitSuccesors = false; while (true) { if (Token.is(MIToken::kw_successors)) { if (parseBasicBlockSuccessors(MBB)) return true; + ExplicitSuccesors = true; } else if (Token.is(MIToken::kw_liveins)) { if (parseBasicBlockLiveins(MBB)) return true; @@ -557,10 +563,9 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { // Parse the instructions. bool IsInBundle = false; MachineInstr *PrevMI = nullptr; - while (true) { - if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)) - return false; - else if (consumeIfPresent(MIToken::Newline)) + while (!Token.is(MIToken::MachineBasicBlockLabel) && + !Token.is(MIToken::Eof)) { + if (consumeIfPresent(MIToken::Newline)) continue; if (consumeIfPresent(MIToken::rbrace)) { // The first parsing pass should verify that all closing '}' have an @@ -592,6 +597,22 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { assert(Token.isNewlineOrEOF() && "MI is not fully parsed"); lex(); } + + // Construct successor list by searching for basic block machine operands. + if (!ExplicitSuccesors) { + SmallVector<MachineBasicBlock*,4> Successors; + bool IsFallthrough; + guessSuccessors(MBB, Successors, IsFallthrough); + for (MachineBasicBlock *Succ : Successors) + MBB.addSuccessor(Succ); + + if (IsFallthrough) { + AddFalthroughFrom = &MBB; + } else { + MBB.normalizeSuccProbs(); + } + } + return false; } @@ -605,11 +626,18 @@ bool MIParser::parseBasicBlocks() { // The first parsing pass should have verified that this token is a MBB label // in the 'parseBasicBlockDefinitions' method. assert(Token.is(MIToken::MachineBasicBlockLabel)); + MachineBasicBlock *AddFalthroughFrom = nullptr; do { MachineBasicBlock *MBB = nullptr; if (parseMBBReference(MBB)) return true; - if (parseBasicBlock(*MBB)) + if (AddFalthroughFrom) { + if (!AddFalthroughFrom->isSuccessor(MBB)) + AddFalthroughFrom->addSuccessor(MBB); + AddFalthroughFrom->normalizeSuccProbs(); + AddFalthroughFrom = nullptr; + } + if (parseBasicBlock(*MBB, AddFalthroughFrom)) return true; // The method 'parseBasicBlock' should parse the whole block until the next // block or the end of file. diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index d017b21f0a59..6f6a67d81b0f 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -12,7 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MIRPrinter.h" +#include "llvm/CodeGen/MIRPrinter.h" + #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -34,6 +35,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Options.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -42,6 +44,9 @@ using namespace llvm; +static cl::opt<bool> SimplifyMIR("simplify-mir", + cl::desc("Leave out unnecessary information when printing MIR")); + namespace { /// This structure describes how to print out stack object references. @@ -105,6 +110,9 @@ class MIPrinter { const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds; const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping; + bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const; + bool canPredictSuccessors(const MachineBasicBlock &MBB) const; + public: MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST, const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds, @@ -454,6 +462,63 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) { RegisterMaskIds.insert(std::make_pair(Mask, I++)); } +void llvm::guessSuccessors(const MachineBasicBlock &MBB, + SmallVectorImpl<MachineBasicBlock*> &Result, + bool &IsFallthrough) { + SmallPtrSet<MachineBasicBlock*,8> Seen; + + for (const MachineInstr &MI : MBB) { + if (MI.isPHI()) + continue; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isMBB()) + continue; + MachineBasicBlock *Succ = MO.getMBB(); + auto RP = Seen.insert(Succ); + if (RP.second) + Result.push_back(Succ); + } + } + MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + IsFallthrough = I == MBB.end() || !I->isBarrier(); +} + +bool +MIPrinter::canPredictBranchProbabilities(const MachineBasicBlock &MBB) const { + if (MBB.succ_size() <= 1) + return true; + if (!MBB.hasSuccessorProbabilities()) + return true; + + SmallVector<BranchProbability,8> Normalized(MBB.Probs.begin(), + MBB.Probs.end()); + BranchProbability::normalizeProbabilities(Normalized.begin(), + Normalized.end()); + SmallVector<BranchProbability,8> Equal(Normalized.size()); + BranchProbability::normalizeProbabilities(Equal.begin(), Equal.end()); + + return std::equal(Normalized.begin(), Normalized.end(), Equal.begin()); +} + +bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const { + SmallVector<MachineBasicBlock*,8> GuessedSuccs; + bool GuessedFallthrough; + guessSuccessors(MBB, GuessedSuccs, GuessedFallthrough); + if (GuessedFallthrough) { + const MachineFunction &MF = *MBB.getParent(); + MachineFunction::const_iterator NextI = std::next(MBB.getIterator()); + if (NextI != MF.end()) { + MachineBasicBlock *Next = const_cast<MachineBasicBlock*>(&*NextI); + if (!is_contained(GuessedSuccs, Next)) + GuessedSuccs.push_back(Next); + } + } + if (GuessedSuccs.size() != MBB.succ_size()) + return false; + return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin()); +} + + void MIPrinter::print(const MachineBasicBlock &MBB) { assert(MBB.getNumber() >= 0 && "Invalid MBB number"); OS << "bb." << MBB.getNumber(); @@ -492,13 +557,15 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { bool HasLineAttributes = false; // Print the successors - if (!MBB.succ_empty()) { + bool canPredictProbs = canPredictBranchProbabilities(MBB); + if (!MBB.succ_empty() && (!SimplifyMIR || !canPredictProbs || + !canPredictSuccessors(MBB))) { OS.indent(2) << "successors: "; for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { if (I != MBB.succ_begin()) OS << ", "; printMBBReference(**I); - if (MBB.hasSuccessorProbabilities()) + if (!SimplifyMIR || !canPredictProbs) OS << '(' << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator()) << ')'; diff --git a/lib/CodeGen/MIRPrinter.h b/lib/CodeGen/MIRPrinter.h deleted file mode 100644 index 16aa9038b6b2..000000000000 --- a/lib/CodeGen/MIRPrinter.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- MIRPrinter.h - MIR serialization format printer --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the functions that print out the LLVM IR and the machine -// functions using the MIR serialization format. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_MIRPRINTER_H -#define LLVM_LIB_CODEGEN_MIRPRINTER_H - -namespace llvm { - -class MachineFunction; -class Module; -class raw_ostream; - -/// Print LLVM IR using the MIR serialization format to the given output stream. -void printMIR(raw_ostream &OS, const Module &M); - -/// Print a machine function using the MIR serialization format to the given -/// output stream. -void printMIR(raw_ostream &OS, const MachineFunction &MF); - -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index c690bcfad567..671cf1eddc2d 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -12,7 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MIRPrinter.h" +#include "llvm/CodeGen/MIRPrinter.h" + #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MIRYamlMapping.h" diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index 7de8434df806..73d778ff3023 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cassert> @@ -175,6 +176,31 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { return (unsigned)Offset; } +void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u && + "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known"); + + MaxCallFrameSize = 0; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + unsigned Opcode = MI.getOpcode(); + if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) { + unsigned Size = TII.getFrameSize(MI); + MaxCallFrameSize = std::max(MaxCallFrameSize, Size); + AdjustsStack = true; + } else if (MI.isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + } + } +} + void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 84bd670105e1..bfb2cde030dc 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -188,8 +188,9 @@ namespace { return Reg < regsReserved.size() && regsReserved.test(Reg); } - bool isAllocatable(unsigned Reg) { - return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); + bool isAllocatable(unsigned Reg) const { + return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && + !regsReserved.test(Reg); } // Analysis information if available @@ -526,7 +527,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); - regsReserved = MRI->getReservedRegs(); + regsReserved = MRI->reservedRegsFrozen() ? MRI->getReservedRegs() + : TRI->getReservedRegs(*MF); if (!MF->empty()) markReachable(&MF->front()); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 549f07ecd9ce..d2afeae9e70b 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -277,6 +277,9 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { AdjustsStack = true; } + assert(!MFI.isMaxCallFrameSizeComputed() || + (MFI.getMaxCallFrameSize() == MaxCallFrameSize && + MFI.adjustsStack() == AdjustsStack)); MFI.setAdjustsStack(AdjustsStack); MFI.setMaxCallFrameSize(MaxCallFrameSize); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 03698ac862af..c77046fdfaf5 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6688,6 +6688,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (isAbs) { EVT VT = LHS.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) + return DAG.getNode(ISD::ABS, DL, VT, LHS); + SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); @@ -9469,6 +9472,14 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { return SDValue(); } +static bool isFMulNegTwo(SDValue &N) { + if (N.getOpcode() != ISD::FMUL) + return false; + if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1))) + return CFP->isExactlyValue(-2.0); + return false; +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -9507,6 +9518,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations), Flags); + // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) + // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B)) + if ((isFMulNegTwo(N0) && N0.hasOneUse()) || + (isFMulNegTwo(N1) && N1.hasOneUse())) { + bool N1IsFMul = isFMulNegTwo(N1); + SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); + } + // FIXME: Auto-upgrade the target/function-level option. if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // fold (fadd A, 0) -> A diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 6fb26fc3b73d..8c98e3740f6d 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -861,6 +861,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return true; } +bool FastISel::selectXRayCustomEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector<MachineOperand, 8> Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. + return true; +} + + /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) { @@ -1252,6 +1271,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: return selectPatchpoint(II); + + case Intrinsic::xray_customevent: + return selectXRayCustomEvent(II); } return fastLowerIntrinsicCall(II); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index a0135dc40b87..cdf4d3a8b4e5 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -402,8 +402,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->Known.Zero = LOI->Known.Zero.zextOrTrunc(BitWidth); - LOI->Known.One = LOI->Known.One.zextOrTrunc(BitWidth); + LOI->Known = LOI->Known.zextOrTrunc(BitWidth); } return LOI; diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index a1d70ab6f036..a21b4c733254 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -67,12 +67,11 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) unsigned ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); const SDNode *ScegN = PredSU->getNode(); if (!ScegN) @@ -105,12 +104,11 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); const SDNode *ScegN = SuccSU->getNode(); if (!ScegN) continue; @@ -142,9 +140,8 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, static unsigned numberCtrlDepsInSU(SUnit *SU) { unsigned NumberDeps = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - if (I->isCtrl()) + for (const SDep &Succ : SU->Succs) + if (Succ.isCtrl()) NumberDeps++; return NumberDeps; @@ -152,9 +149,8 @@ static unsigned numberCtrlDepsInSU(SUnit *SU) { static unsigned numberCtrlPredInSU(SUnit *SU) { unsigned NumberDeps = 0; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (I->isCtrl()) + for (SDep &Pred : SU->Preds) + if (Pred.isCtrl()) NumberDeps++; return NumberDeps; @@ -212,15 +208,14 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// of SU, return it, otherwise return null. SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { + for (const SDep &Pred : SU->Preds) { + SUnit &PredSU = *Pred.getSUnit(); + if (!PredSU.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) return nullptr; - OnlyAvailablePred = &Pred; + OnlyAvailablePred = &PredSU; } } return OnlyAvailablePred; @@ -230,9 +225,8 @@ void ResourcePriorityQueue::push(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - if (getSingleUnscheduledPred(I->getSUnit()) == SU) + for (const SDep &Succ : SU->Succs) + if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) ++NumNodesBlocking; NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; @@ -269,14 +263,13 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { // Now see if there are no other dependencies // to instructions already in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) - for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), - E = Packet[i]->Succs.end(); I != E; ++I) { + for (const SDep &Succ : Packet[i]->Succs) { // Since we do not add pseudos to packets, might as well // ignore order deps. - if (I->isCtrl()) + if (Succ.isCtrl()) continue; - if (I->getSUnit() == SU) + if (Succ.getSUnit() == SU) return false; } @@ -499,11 +492,10 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { } } } - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) continue; - --I->getSUnit()->NumRegDefsLeft; + --Pred.getSUnit()->NumRegDefsLeft; } } @@ -515,10 +507,9 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // number of live ranges. All others, increase it. unsigned NumberNonControlDeps = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - adjustPriorityOfUnscheduledPreds(I->getSUnit()); - if (!I->isCtrl()) + for (const SDep &Succ : SU->Succs) { + adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); + if (!Succ.isCtrl()) NumberNonControlDeps++; } @@ -595,8 +586,7 @@ SUnit *ResourcePriorityQueue::pop() { std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { int BestCost = SUSchedulingCost(*Best); - for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), - E = Queue.end(); I != E; ++I) { + for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { if (SUSchedulingCost(*I) > BestCost) { BestCost = SUSchedulingCost(*I); @@ -606,8 +596,7 @@ SUnit *ResourcePriorityQueue::pop() { } // Use default TD scheduling mechanism. else { - for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), - E = Queue.end(); I != E; ++I) + for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 62e7733ecd2b..d80a281279b6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -160,18 +160,17 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { // Bottom up: release predecessors - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - ReleasePred(SU, &*I); - if (I->isAssignedRegDep()) { + for (SDep &Pred : SU->Preds) { + ReleasePred(SU, &Pred); + if (Pred.isAssignedRegDep()) { // This is a physical register dependency and it's impossible or // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. - if (!LiveRegDefs[I->getReg()]) { + if (!LiveRegDefs[Pred.getReg()]) { ++NumLiveRegs; - LiveRegDefs[I->getReg()] = I->getSUnit(); - LiveRegCycles[I->getReg()] = CurCycle; + LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); + LiveRegCycles[Pred.getReg()] = CurCycle; } } } @@ -191,16 +190,15 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { ReleasePredecessors(SU, CurCycle); // Release all the implicit physical register defs that are live. - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isAssignedRegDep()) { - if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + for (SDep &Succ : SU->Succs) { + if (Succ.isAssignedRegDep()) { + if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - assert(LiveRegDefs[I->getReg()] == SU && + assert(LiveRegDefs[Succ.getReg()] == SU && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = nullptr; - LiveRegCycles[I->getReg()] = 0; + LiveRegDefs[Succ.getReg()] = nullptr; + LiveRegCycles[Succ.getReg()] = 0; } } } @@ -282,22 +280,20 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SmallVector<SDep, 4> LoadPreds; SmallVector<SDep, 4> NodePreds; SmallVector<SDep, 4> NodeSuccs; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainPred = *I; - else if (I->getSUnit()->getNode() && - I->getSUnit()->getNode()->isOperandOf(LoadNode)) - LoadPreds.push_back(*I); + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPred = Pred; + else if (Pred.getSUnit()->getNode() && + Pred.getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(Pred); else - NodePreds.push_back(*I); + NodePreds.push_back(Pred); } - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainSuccs.push_back(*I); + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); else - NodeSuccs.push_back(*I); + NodeSuccs.push_back(Succ); } if (ChainPred.getSUnit()) { @@ -354,21 +350,19 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { NewSU = Clone(SU); // New SUnit has the exact same predecessors. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (!I->isArtificial()) - AddPred(NewSU, *I); + for (SDep &Pred : SU->Preds) + if (!Pred.isArtificial()) + AddPred(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(NewSU); AddPred(SuccSU, D); D.setSUnit(SU); @@ -399,16 +393,15 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(CopyToSU); AddPred(SuccSU, D); - DelDeps.push_back(std::make_pair(SuccSU, *I)); + DelDeps.push_back(std::make_pair(SuccSU, Succ)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { @@ -479,10 +472,9 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, SmallSet<unsigned, 4> RegAdded; // If this node would clobber any "live" register, then it's not ready. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isAssignedRegDep()) { - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + for (SDep &Pred : SU->Preds) { + if (Pred.isAssignedRegDep()) { + CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -755,9 +747,8 @@ void ScheduleDAGLinearize::Schedule() { // Glue user must be scheduled together with the glue operand. So other // users of the glue operand must be treated as its users. SDNode *ImmGUser = Glue->getGluedUser(); - for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); - ui != ue; ++ui) - if (*ui == ImmGUser) + for (const SDNode *U : Glue->uses()) + if (U == ImmGUser) --Degree; GUser->setNodeId(UDegree + Degree); Glue->setNodeId(1); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 69b76fbe57d2..4f4025d8ae6a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -520,21 +520,20 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, /// interference on flags. void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { // Bottom up: release predecessors - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - ReleasePred(SU, &*I); - if (I->isAssignedRegDep()) { + for (SDep &Pred : SU->Preds) { + ReleasePred(SU, &Pred); + if (Pred.isAssignedRegDep()) { // This is a physical register dependency and it's impossible or // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. - SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; - assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && + SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && "interference on register dependence"); - LiveRegDefs[I->getReg()] = I->getSUnit(); - if (!LiveRegGens[I->getReg()]) { + LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); + if (!LiveRegGens[Pred.getReg()]) { ++NumLiveRegs; - LiveRegGens[I->getReg()] = SU; + LiveRegGens[Pred.getReg()] = SU; } } } @@ -733,15 +732,14 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { ReleasePredecessors(SU); // Release all the implicit physical register defs that are live. - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. - if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { + for (SDep &Succ : SU->Succs) { + // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. + if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = nullptr; - LiveRegGens[I->getReg()] = nullptr; - releaseInterferences(I->getReg()); + LiveRegDefs[Succ.getReg()] = nullptr; + LiveRegGens[Succ.getReg()] = nullptr; + releaseInterferences(Succ.getReg()); } } // Release the special call resource dependence, if this is the beginning @@ -802,17 +800,16 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - CapturePred(&*I); - if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ + for (SDep &Pred : SU->Preds) { + CapturePred(&Pred); + if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = nullptr; - LiveRegGens[I->getReg()] = nullptr; - releaseInterferences(I->getReg()); + LiveRegDefs[Pred.getReg()] = nullptr; + LiveRegGens[Pred.getReg()] = nullptr; + releaseInterferences(Pred.getReg()); } } @@ -895,7 +892,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); unsigned HazardCycle = (*I)->getHeight(); - for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + for (auto E = Sequence.end(); I != E; ++I) { SUnit *SU = *I; for (; SU->getHeight() > HazardCycle; ++HazardCycle) { HazardRec->RecedeCycle(); @@ -1261,10 +1258,9 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // // If SU is the currently live definition of the same register that it uses, // then we are free to schedule it. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(), + for (SDep &Pred : SU->Preds) { + if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) + CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -1743,8 +1739,7 @@ protected: template<class SF> static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); - for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), - E = Q.end(); I != E; ++I) + for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9d949a2bbfa6..d605a1dc1c20 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2017,8 +2017,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (SrcOp.getValueSizeInBits() != BitWidth) { assert(SrcOp.getValueSizeInBits() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - Known2.One = Known2.One.trunc(BitWidth); - Known2.Zero = Known2.Zero.trunc(BitWidth); + Known2 = Known2.trunc(BitWidth); } // Known bits are the values that are shared by every demanded element. @@ -2045,8 +2044,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, if (M < 0) { // For UNDEF elements, we don't know anything about the common state of // the shuffle result. - Known.One.clearAllBits(); - Known.Zero.clearAllBits(); + Known.resetAll(); DemandedLHS.clearAllBits(); DemandedRHS.clearAllBits(); break; @@ -2219,14 +2217,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - Known.One.clearAllBits(); unsigned TrailZ = Known.Zero.countTrailingOnes() + Known2.Zero.countTrailingOnes(); unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + Known2.Zero.countLeadingOnes(), BitWidth) - BitWidth; - Known.Zero.clearAllBits(); + Known.resetAll(); Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); break; @@ -2377,7 +2374,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::CTPOP: { - Known.Zero.setBitsFrom(Log2_32(BitWidth)+1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we know some of the bits are zero, they can't be one. + unsigned PossibleOnes = BitWidth - Known2.Zero.countPopulation(); + Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); break; } case ISD::LOAD: { @@ -2396,24 +2396,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - Known.Zero = Known.Zero.trunc(InBits); - Known.One = Known.One.trunc(InBits); + Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts.zext(InVT.getVectorNumElements()), Depth + 1); - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); + Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InBits); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - Known.Zero = Known.Zero.trunc(InBits); - Known.One = Known.One.trunc(InBits); + Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); + Known = Known.zext(BitWidth); Known.Zero.setBitsFrom(InBits); break; } @@ -2422,34 +2418,28 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - Known.Zero = Known.Zero.trunc(InBits); - Known.One = Known.One.trunc(InBits); + Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. - Known.Zero = Known.Zero.sext(BitWidth); - Known.One = Known.One.sext(BitWidth); + Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - Known.Zero = Known.Zero.trunc(InBits); - Known.One = Known.One.trunc(InBits); + Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, Depth+1); - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); + Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - Known.Zero = Known.Zero.zext(InBits); - Known.One = Known.One.zext(InBits); + Known = Known.zext(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero = Known.Zero.trunc(BitWidth); - Known.One = Known.One.trunc(BitWidth); + Known = Known.trunc(BitWidth); break; } case ISD::AssertZext: { @@ -2606,8 +2596,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, uint32_t Leaders = std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes()); - Known.One.clearAllBits(); - Known.Zero.clearAllBits(); + Known.resetAll(); Known.Zero.setHighBits(Leaders); break; } @@ -2621,8 +2610,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); // Remove high part of known bit mask - Known.Zero = Known.Zero.trunc(BitWidth); - Known.One = Known.One.trunc(BitWidth); + Known = Known.trunc(BitWidth); break; } case ISD::EXTRACT_VECTOR_ELT: { @@ -2634,10 +2622,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, const unsigned NumSrcElts = VecVT.getVectorNumElements(); // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. - if (BitWidth > EltBitWidth) { - Known.Zero = Known.Zero.trunc(EltBitWidth); - Known.One = Known.One.trunc(EltBitWidth); - } + if (BitWidth > EltBitWidth) + Known = Known.trunc(EltBitWidth); ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { // If we know the element index, just demand that vector element. @@ -2648,10 +2634,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // Unknown element index, so ignore DemandedElts and demand them all. computeKnownBits(InVec, Known, Depth + 1); } - if (BitWidth > EltBitWidth) { - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); - } + if (BitWidth > EltBitWidth) + Known = Known.zext(BitWidth); break; } case ISD::INSERT_VECTOR_ELT: { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ba9e11798f15..50313e2da884 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4992,45 +4992,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); - } else { - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa<Argument>(V)) - // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - false, N)) { - SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else if (!V->use_empty() ) { - // Do not call getValue(V) yet, as we don't want to generate code. - // Remember it for later. - DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); - DanglingDebugInfoMap[V] = DDI; - } else { - // We may expand this to cover more cases. One case where we have no - // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - } + return nullptr; } - // Build a debug info table entry. - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) - V = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast<AllocaInst>(V); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) { - DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, + N)) + return nullptr; + SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); return nullptr; } - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return nullptr; // VLAs. + + if (!V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + return nullptr; + } + + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } @@ -5715,7 +5703,37 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, N); return nullptr; } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2d39ecd9779b..23f597db140c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -561,8 +561,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Known2.One.getBitWidth() != BitWidth) { assert(Known2.getBitWidth() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - Known2.One = Known2.One.trunc(BitWidth); - Known2.Zero = Known2.Zero.trunc(BitWidth); + Known2 = Known2.trunc(BitWidth); } // Known bits are the values that are shared by every element. @@ -659,7 +658,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Output known-1 are known to be set if set in either the LHS | RHS. Known.One |= Known2.One; break; - case ISD::XOR: + case ISD::XOR: { if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); @@ -704,28 +703,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - // If the RHS is a constant, see if we can simplify it. - // for XOR, we prefer to force bits to 1 if they will make a -1. - // If we can't force bits, try to shrink the constant. - if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { - APInt Expanded = C->getAPIntValue() | (~NewMask); - // If we can expand it to have all bits set, do it. - if (Expanded.isAllOnesValue()) { - if (Expanded != C->getAPIntValue()) { - EVT VT = Op.getValueType(); - SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), - TLO.DAG.getConstant(Expanded, dl, VT)); - return TLO.CombineTo(Op, New); - } - // If it already has all the bits set, nothing to change - // but don't shrink either! - } else if (ShrinkDemandedConstant(Op, NewMask, TLO)) { - return true; + // If the RHS is a constant, see if we can change it. Don't alter a -1 + // constant because that's a 'not' op, and that is better for combining and + // codegen. + ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1)); + if (C && !C->isAllOnesValue()) { + if (NewMask.isSubsetOf(C->getAPIntValue())) { + // We're flipping all demanded bits. Flip the undemanded bits too. + SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType()); + return TLO.CombineTo(Op, New); } + // If we can't turn this into a 'not', try to shrink the constant. + if (ShrinkDemandedConstant(Op, NewMask, TLO)) + return true; } Known = std::move(KnownOut); break; + } case ISD::SELECT: if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1)) return true; @@ -1091,8 +1086,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); + Known = Known.zext(BitWidth); Known.Zero |= NewBits; break; } @@ -1118,8 +1112,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, Depth+1)) return true; - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); + Known = Known.zext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. if (Known.Zero.intersects(InSignBit)) @@ -1143,8 +1136,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - Known.Zero = Known.Zero.zext(BitWidth); - Known.One = Known.One.zext(BitWidth); + Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { @@ -1154,8 +1146,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt TruncMask = NewMask.zext(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1)) return true; - Known.Zero = Known.Zero.trunc(BitWidth); - Known.One = Known.One.trunc(BitWidth); + Known = Known.trunc(BitWidth); // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. @@ -1312,7 +1303,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - Known.Zero.clearAllBits(); Known.One.clearAllBits(); + Known.resetAll(); } /// This method can be implemented by targets that want to expose additional diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 7d2848bdc13b..2df3602733f3 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -18,6 +18,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetInstrInfo.h" @@ -33,6 +35,14 @@ struct XRayInstrumentation : public MachineFunctionPass { initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -43,7 +53,7 @@ private: // This is the approach to go on CPUs which have a single RET instruction, // like x86/x86_64. void replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII); // Prepend the original return instruction with the exit sled code ("patchable // function exit" pseudo-instruction), preserving the original return @@ -54,13 +64,12 @@ private: // have to call the trampoline and return from it to the original return // instruction of the function being instrumented. void prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII); }; } // anonymous namespace -void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII) -{ +void XRayInstrumentation::replaceRetWithPatchableRet( + MachineFunction &MF, const TargetInstrInfo *TII) { // We look for *all* terminators and returns, then replace those with // PATCHABLE_RET instructions. SmallVector<MachineInstr *, 4> Terminators; @@ -91,9 +100,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, I->eraseFromParent(); } -void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII) -{ +void XRayInstrumentation::prependRetWithPatchableExit( + MachineFunction &MF, const TargetInstrInfo *TII) { for (auto &MBB : MF) { for (auto &T : MBB.terminators()) { unsigned Opc = 0; @@ -106,7 +114,7 @@ void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, if (Opc != 0) { // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or // PATCHABLE_TAIL_CALL . - BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc)); + BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)); } } } @@ -125,8 +133,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { return false; // XRay threshold attribute not found. if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. - if (F.size() < XRayThreshold) - return false; // Function is too small. + + // Check if we have a loop. + // FIXME: Maybe make this smarter, and see whether the loops are dependent + // on inputs or side-effects? + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + if (MLI.empty() && F.size() < XRayThreshold) + return false; // Function is too small and has no loops. } // We look for the first non-empty MachineBasicBlock, so that we can insert @@ -142,12 +155,10 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { if (!MF.getSubtarget().isXRaySupported()) { FirstMI.emitError("An attempt to perform XRay instrumentation for an" - " unsupported target."); + " unsupported target."); return false; } - // FIXME: Do the loop triviality analysis here or in an earlier pass. - // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the // MachineFunction. BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), @@ -176,5 +187,8 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { char XRayInstrumentation::ID = 0; char &llvm::XRayInstrumentationID = XRayInstrumentation::ID; -INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", - false, false) +INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation", + "Insert XRay ops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation", + "Insert XRay ops", false, false) |