summaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp60
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp18
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp14
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp16
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h14
8 files changed, 110 insertions, 52 deletions
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ae282a7a10952..f409cd322146e 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -393,7 +393,10 @@ void RuntimePointerChecking::groupChecks(
// equivalence class, the iteration order is deterministic.
for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
MI != ME; ++MI) {
- unsigned Pointer = PositionMap[MI->getPointer()];
+ auto PointerI = PositionMap.find(MI->getPointer());
+ assert(PointerI != PositionMap.end() &&
+ "pointer in equivalence class not found in PositionMap");
+ unsigned Pointer = PointerI->second;
bool Merged = false;
// Mark this pointer as seen.
Seen.insert(Pointer);
@@ -726,52 +729,55 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
SmallVector<MemAccessInfo, 4> Retries;
+ // First, count how many write and read accesses are in the alias set. Also
+ // collect MemAccessInfos for later.
+ SmallVector<MemAccessInfo, 4> AccessInfos;
for (auto A : AS) {
Value *Ptr = A.getValue();
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
- MemAccessInfo Access(Ptr, IsWrite);
if (IsWrite)
++NumWritePtrChecks;
else
++NumReadPtrChecks;
+ AccessInfos.emplace_back(Ptr, IsWrite);
+ }
+ // We do not need runtime checks for this alias set, if there are no writes
+ // or a single write and no reads.
+ if (NumWritePtrChecks == 0 ||
+ (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
+ assert((AS.size() <= 1 ||
+ all_of(AS,
+ [this](auto AC) {
+ MemAccessInfo AccessWrite(AC.getValue(), true);
+ return DepCands.findValue(AccessWrite) == DepCands.end();
+ })) &&
+ "Can only skip updating CanDoRT below, if all entries in AS "
+ "are reads or there is at most 1 entry");
+ continue;
+ }
+
+ for (auto &Access : AccessInfos) {
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
RunningDepId, ASId, ShouldCheckWrap, false)) {
- LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
+ << *Access.getPointer() << '\n');
Retries.push_back(Access);
CanDoAliasSetRT = false;
}
}
- // If we have at least two writes or one write and a read then we need to
- // check them. But there is no need to checks if there is only one
- // dependence set for this alias set.
- //
// Note that this function computes CanDoRT and MayNeedRTCheck
// independently. For example CanDoRT=false, MayNeedRTCheck=false means that
// we have a pointer for which we couldn't find the bounds but we don't
// actually need to emit any checks so it does not matter.
- bool NeedsAliasSetRTCheck = false;
- if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) {
- NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
- (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
- // For alias sets without at least 2 writes or 1 write and 1 read, there
- // is no need to generate RT checks and CanDoAliasSetRT for this alias set
- // does not impact whether runtime checks can be generated.
- if (!NeedsAliasSetRTCheck) {
- assert((AS.size() <= 1 ||
- all_of(AS,
- [this](auto AC) {
- MemAccessInfo AccessWrite(AC.getValue(), true);
- return DepCands.findValue(AccessWrite) ==
- DepCands.end();
- })) &&
- "Can only skip updating CanDoRT below, if all entries in AS "
- "are reads or there is at most 1 entry");
- continue;
- }
- }
+ //
+ // We need runtime checks for this alias set, if there are at least 2
+ // dependence sets (in which case RunningDepId > 2) or if we need to re-try
+ // any bound checks (because in that case the number of dependence sets is
+ // incomplete).
+ bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
// We need to perform run-time alias checks, but some pointers had bounds
// that couldn't be checked.
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a5030305435c1..c61531c5141a9 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1239,7 +1239,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Value *NewValueInsert =
insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
Value *StoreSuccess =
- TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder);
+ TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr,
+ MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 2ce1d414e7550..1e2a82615da8c 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -455,19 +455,23 @@ bool InlineAsmLowering::lowerInlineAsm(
unsigned DefRegIdx = InstFlagIdx + 1;
Register Def = Inst->getOperand(DefRegIdx).getReg();
- // Copy input to new vreg with same reg class as Def
- const TargetRegisterClass *RC = MRI->getRegClass(Def);
ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
assert(SrcRegs.size() == 1 && "Single register is expected here");
- Register Tmp = MRI->createVirtualRegister(RC);
- if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder))
- return false;
- // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def.
+ // When Def is physreg: use given input.
+ Register In = SrcRegs[0];
+ // When Def is vreg: copy input to new vreg with same reg class as Def.
+ if (Def.isVirtual()) {
+ In = MRI->createVirtualRegister(MRI->getRegClass(Def));
+ if (!buildAnyextOrCopy(In, SrcRegs[0], MIRBuilder))
+ return false;
+ }
+
+ // Add Flag and input register operand (In) to Inst. Tie In to Def.
unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx);
Inst.addImm(Flag);
- Inst.addReg(Tmp);
+ Inst.addReg(In);
Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1);
break;
}
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 7c39ddc8b1da0..7ed8a718ed3c1 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -269,7 +269,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
uint64_t SymOffset) {
switch (Type) {
default:
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
case ELF::R_X86_64_NONE:
break;
@@ -359,7 +359,7 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
default:
// There are other relocation types, but it appears these are the
// only ones currently used by the LLVM ELF object writer
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
}
}
@@ -382,7 +382,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
switch (Type) {
default:
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
case ELF::R_AARCH64_ABS16: {
uint64_t Result = Value + Addend;
@@ -721,7 +721,7 @@ void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section,
uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
switch (Type) {
default:
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
case ELF::R_PPC_ADDR16_LO:
writeInt16BE(LocalAddress, applyPPClo(Value + Addend));
@@ -741,7 +741,7 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
switch (Type) {
default:
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
case ELF::R_PPC64_ADDR16:
writeInt16BE(LocalAddress, applyPPClo(Value + Addend));
@@ -835,7 +835,7 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
switch (Type) {
default:
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
case ELF::R_390_PC16DBL:
case ELF::R_390_PLT16DBL: {
@@ -890,7 +890,7 @@ void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section,
switch (Type) {
default:
- llvm_unreachable("Relocation type not implemented yet!");
+ report_fatal_error("Relocation type not implemented yet!");
break;
case ELF::R_BPF_NONE:
break;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
index aa50bd05cb71b..aaadc8dc1b600 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
@@ -48,10 +49,33 @@ unsigned AArch64WinCOFFObjectWriter::getRelocType(
: Target.getSymA()->getKind();
const MCExpr *Expr = Fixup.getValue();
+ if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(Expr)) {
+ AArch64MCExpr::VariantKind RefKind = A64E->getKind();
+ switch (AArch64MCExpr::getSymbolLoc(RefKind)) {
+ case AArch64MCExpr::VK_ABS:
+ case AArch64MCExpr::VK_SECREL:
+ // Supported
+ break;
+ default:
+ Ctx.reportError(Fixup.getLoc(), "relocation variant " +
+ A64E->getVariantKindName() +
+ " unsupported on COFF targets");
+ return COFF::IMAGE_REL_ARM64_ABSOLUTE; // Dummy return value
+ }
+ }
+
switch (static_cast<unsigned>(Fixup.getKind())) {
default: {
- const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
- report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+ if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(Expr)) {
+ Ctx.reportError(Fixup.getLoc(), "relocation type " +
+ A64E->getVariantKindName() +
+ " unsupported on COFF targets");
+ } else {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ Ctx.reportError(Fixup.getLoc(), Twine("relocation type ") + Info.Name +
+ " unsupported on COFF targets");
+ }
+ return COFF::IMAGE_REL_ARM64_ABSOLUTE; // Dummy return value
}
case FK_Data_4:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index b09e92c07f9ba..45f515c5115ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -131,10 +131,20 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
// We're tracking up to the Function boundaries, and cannot go beyond because
// of FunctionPass restrictions. We can ensure that is memory not clobbered
// for memory operations that are live in to entry points only.
- bool NotClobbered = isEntryFunc && !isClobberedInFunction(&I);
Instruction *PtrI = dyn_cast<Instruction>(Ptr);
- if (!PtrI && NotClobbered && isGlobalLoad(I)) {
- if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
+
+ if (!isEntryFunc) {
+ if (PtrI)
+ setUniformMetadata(PtrI);
+ return;
+ }
+
+ bool NotClobbered = false;
+ if (PtrI)
+ NotClobbered = !isClobberedInFunction(&I);
+ else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
+ if (isGlobalLoad(I) && !isClobberedInFunction(&I)) {
+ NotClobbered = true;
// Lookup for the existing GEP
if (noClobberClones.count(Ptr)) {
PtrI = noClobberClones[Ptr];
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 11c97210ead9b..9a4c57fedac2a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2655,6 +2655,15 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
unsigned RegNo) const {
+ // Conservatively clear kill flag for the register if the instructions are in
+ // different basic blocks and in SSA form, because the kill flag may no longer
+ // be right. There is no need to bother with dead flags since defs with no
+ // uses will be handled by DCE.
+ MachineRegisterInfo &MRI = StartMI.getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA() && (StartMI.getParent() != EndMI.getParent())) {
+ MRI.clearKillFlags(RegNo);
+ return;
+ }
// Instructions between [StartMI, EndMI] should be in same basic block.
assert((StartMI.getParent() == EndMI.getParent()) &&
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index d98597f483406..43973c627fcf1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -565,14 +565,18 @@ public:
int64_t OffsetImm) const;
/// Fixup killed/dead flag for register \p RegNo between instructions [\p
- /// StartMI, \p EndMI]. Some PostRA transformations may violate register
- /// killed/dead flags semantics, this function can be called to fix up. Before
- /// calling this function,
+ /// StartMI, \p EndMI]. Some pre-RA or post-RA transformations may violate
+ /// register killed/dead flags semantics, this function can be called to fix
+ /// up. Before calling this function,
/// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
/// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
/// and possible definition for \p RegNo is \p StartMI or \p EndMI.
- /// 3. Ensure that all instructions between [\p StartMI, \p EndMI] are in same
- /// basic block.
+ /// 3. We can do accurate fixup for the case when all instructions between
+ /// [\p StartMI, \p EndMI] are in same basic block.
+ /// 4. For the case when \p StartMI and \p EndMI are not in same basic block,
+ /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA
+ /// and for post-RA, we give an assertion as without reaching definition
+ /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right.
void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
unsigned RegNo) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;