diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/LoopAccessAnalysis.cpp | 60 | ||||
-rw-r--r-- | llvm/lib/CodeGen/AtomicExpandPass.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.h | 14 |
8 files changed, 110 insertions, 52 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index ae282a7a10952..f409cd322146e 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -393,7 +393,10 @@ void RuntimePointerChecking::groupChecks( // equivalence class, the iteration order is deterministic. for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); MI != ME; ++MI) { - unsigned Pointer = PositionMap[MI->getPointer()]; + auto PointerI = PositionMap.find(MI->getPointer()); + assert(PointerI != PositionMap.end() && + "pointer in equivalence class not found in PositionMap"); + unsigned Pointer = PointerI->second; bool Merged = false; // Mark this pointer as seen. Seen.insert(Pointer); @@ -726,52 +729,55 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, SmallVector<MemAccessInfo, 4> Retries; + // First, count how many write and read accesses are in the alias set. Also + // collect MemAccessInfos for later. + SmallVector<MemAccessInfo, 4> AccessInfos; for (auto A : AS) { Value *Ptr = A.getValue(); bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); - MemAccessInfo Access(Ptr, IsWrite); if (IsWrite) ++NumWritePtrChecks; else ++NumReadPtrChecks; + AccessInfos.emplace_back(Ptr, IsWrite); + } + // We do not need runtime checks for this alias set, if there are no writes + // or a single write and no reads. + if (NumWritePtrChecks == 0 || + (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { + assert((AS.size() <= 1 || + all_of(AS, + [this](auto AC) { + MemAccessInfo AccessWrite(AC.getValue(), true); + return DepCands.findValue(AccessWrite) == DepCands.end(); + })) && + "Can only skip updating CanDoRT below, if all entries in AS " + "are reads or there is at most 1 entry"); + continue; + } + + for (auto &Access : AccessInfos) { if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { - LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n'); + LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" + << *Access.getPointer() << '\n'); Retries.push_back(Access); CanDoAliasSetRT = false; } } - // If we have at least two writes or one write and a read then we need to - // check them. But there is no need to checks if there is only one - // dependence set for this alias set. - // // Note that this function computes CanDoRT and MayNeedRTCheck // independently. For example CanDoRT=false, MayNeedRTCheck=false means that // we have a pointer for which we couldn't find the bounds but we don't // actually need to emit any checks so it does not matter. - bool NeedsAliasSetRTCheck = false; - if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) { - NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 || - (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); - // For alias sets without at least 2 writes or 1 write and 1 read, there - // is no need to generate RT checks and CanDoAliasSetRT for this alias set - // does not impact whether runtime checks can be generated. - if (!NeedsAliasSetRTCheck) { - assert((AS.size() <= 1 || - all_of(AS, - [this](auto AC) { - MemAccessInfo AccessWrite(AC.getValue(), true); - return DepCands.findValue(AccessWrite) == - DepCands.end(); - })) && - "Can only skip updating CanDoRT below, if all entries in AS " - "are reads or there is at most 1 entry"); - continue; - } - } + // + // We need runtime checks for this alias set, if there are at least 2 + // dependence sets (in which case RunningDepId > 2) or if we need to re-try + // any bound checks (because in that case the number of dependence sets is + // incomplete). + bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); // We need to perform run-time alias checks, but some pointers had bounds // that couldn't be checked. diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index a5030305435c1..c61531c5141a9 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -1239,7 +1239,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Value *NewValueInsert = insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder); + TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, + MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index 2ce1d414e7550..1e2a82615da8c 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -455,19 +455,23 @@ bool InlineAsmLowering::lowerInlineAsm( unsigned DefRegIdx = InstFlagIdx + 1; Register Def = Inst->getOperand(DefRegIdx).getReg(); - // Copy input to new vreg with same reg class as Def - const TargetRegisterClass *RC = MRI->getRegClass(Def); ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); assert(SrcRegs.size() == 1 && "Single register is expected here"); - Register Tmp = MRI->createVirtualRegister(RC); - if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder)) - return false; - // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def. + // When Def is physreg: use given input. + Register In = SrcRegs[0]; + // When Def is vreg: copy input to new vreg with same reg class as Def. + if (Def.isVirtual()) { + In = MRI->createVirtualRegister(MRI->getRegClass(Def)); + if (!buildAnyextOrCopy(In, SrcRegs[0], MIRBuilder)) + return false; + } + + // Add Flag and input register operand (In) to Inst. Tie In to Def. unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx); Inst.addImm(Flag); - Inst.addReg(Tmp); + Inst.addReg(In); Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1); break; } diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 7c39ddc8b1da0..7ed8a718ed3c1 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -269,7 +269,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, uint64_t SymOffset) { switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_X86_64_NONE: break; @@ -359,7 +359,7 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section, default: // There are other relocation types, but it appears these are the // only ones currently used by the LLVM ELF object writer - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; } } @@ -382,7 +382,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_AARCH64_ABS16: { uint64_t Result = Value + Addend; @@ -721,7 +721,7 @@ void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section, uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_PPC_ADDR16_LO: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); @@ -741,7 +741,7 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_PPC64_ADDR16: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); @@ -835,7 +835,7 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, uint8_t *LocalAddress = Section.getAddressWithOffset(Offset); switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_390_PC16DBL: case ELF::R_390_PLT16DBL: { @@ -890,7 +890,7 @@ void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section, switch (Type) { default: - llvm_unreachable("Relocation type not implemented yet!"); + report_fatal_error("Relocation type not implemented yet!"); break; case ELF::R_BPF_NONE: break; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index aa50bd05cb71b..aaadc8dc1b600 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -48,10 +49,33 @@ unsigned AArch64WinCOFFObjectWriter::getRelocType( : Target.getSymA()->getKind(); const MCExpr *Expr = Fixup.getValue(); + if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(Expr)) { + AArch64MCExpr::VariantKind RefKind = A64E->getKind(); + switch (AArch64MCExpr::getSymbolLoc(RefKind)) { + case AArch64MCExpr::VK_ABS: + case AArch64MCExpr::VK_SECREL: + // Supported + break; + default: + Ctx.reportError(Fixup.getLoc(), "relocation variant " + + A64E->getVariantKindName() + + " unsupported on COFF targets"); + return COFF::IMAGE_REL_ARM64_ABSOLUTE; // Dummy return value + } + } + switch (static_cast<unsigned>(Fixup.getKind())) { default: { - const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); - report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); + if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(Expr)) { + Ctx.reportError(Fixup.getLoc(), "relocation type " + + A64E->getVariantKindName() + + " unsupported on COFF targets"); + } else { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + Ctx.reportError(Fixup.getLoc(), Twine("relocation type ") + Info.Name + + " unsupported on COFF targets"); + } + return COFF::IMAGE_REL_ARM64_ABSOLUTE; // Dummy return value } case FK_Data_4: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index b09e92c07f9ba..45f515c5115ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -131,10 +131,20 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { // We're tracking up to the Function boundaries, and cannot go beyond because // of FunctionPass restrictions. We can ensure that is memory not clobbered // for memory operations that are live in to entry points only. - bool NotClobbered = isEntryFunc && !isClobberedInFunction(&I); Instruction *PtrI = dyn_cast<Instruction>(Ptr); - if (!PtrI && NotClobbered && isGlobalLoad(I)) { - if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { + + if (!isEntryFunc) { + if (PtrI) + setUniformMetadata(PtrI); + return; + } + + bool NotClobbered = false; + if (PtrI) + NotClobbered = !isClobberedInFunction(&I); + else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { + if (isGlobalLoad(I) && !isClobberedInFunction(&I)) { + NotClobbered = true; // Lookup for the existing GEP if (noClobberClones.count(Ptr)) { PtrI = noClobberClones[Ptr]; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 11c97210ead9b..9a4c57fedac2a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2655,6 +2655,15 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, unsigned RegNo) const { + // Conservatively clear kill flag for the register if the instructions are in + // different basic blocks and in SSA form, because the kill flag may no longer + // be right. There is no need to bother with dead flags since defs with no + // uses will be handled by DCE. + MachineRegisterInfo &MRI = StartMI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA() && (StartMI.getParent() != EndMI.getParent())) { + MRI.clearKillFlags(RegNo); + return; + } // Instructions between [StartMI, EndMI] should be in same basic block. assert((StartMI.getParent() == EndMI.getParent()) && diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index d98597f483406..43973c627fcf1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -565,14 +565,18 @@ public: int64_t OffsetImm) const; /// Fixup killed/dead flag for register \p RegNo between instructions [\p - /// StartMI, \p EndMI]. Some PostRA transformations may violate register - /// killed/dead flags semantics, this function can be called to fix up. Before - /// calling this function, + /// StartMI, \p EndMI]. Some pre-RA or post-RA transformations may violate + /// register killed/dead flags semantics, this function can be called to fix + /// up. Before calling this function, /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI. /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI) /// and possible definition for \p RegNo is \p StartMI or \p EndMI. - /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same - /// basic block. + /// 3. We can do accurate fixup for the case when all instructions between + /// [\p StartMI, \p EndMI] are in same basic block. + /// 4. For the case when \p StartMI and \p EndMI are not in same basic block, + /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA + /// and for post-RA, we give an assertion as without reaching definition + /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right. void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, unsigned RegNo) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; |