vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9 vendor/llvm-project/master

author: Dimitry Andric <dim@FreeBSD.org> 2020-07-26 19:36:28 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2020-07-26 19:36:28 +0000
commit: cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree: 209fb2a2d68f8f277793fc8df46c753d31bc853b /lld/ELF/Arch
parent: 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
14 files changed, 1353 insertions, 429 deletions
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index df41a12f7454..637046e90bbd 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -17,14 +17,13 @@
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 // Page(Expr) is the page address of the expression Expr, defined
 // as (Expr & ~0xFFF). (This applies even if the machine page size
 // supported by the platform has a different value.)
-uint64_t getAArch64Page(uint64_t expr) {
+uint64_t elf::getAArch64Page(uint64_t expr) {
   return expr & ~static_cast<uint64_t>(0xFFF);
 }
 
@@ -45,12 +44,16 @@ public:
   uint32_t getThunkSectionSpacing() const override;
   bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
   bool usesOnlyLowPageBits(RelType type) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
                           RelExpr expr) const override;
-  void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
 };
 } // namespace
 
@@ -123,6 +126,7 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
   case R_AARCH64_CONDBR19:
   case R_AARCH64_JUMP26:
   case R_AARCH64_TSTBR14:
+  case R_AARCH64_PLT32:
     return R_PLT_PC;
   case R_AARCH64_PREL16:
   case R_AARCH64_PREL32:
@@ -208,10 +212,10 @@ void AArch64::writePltHeader(uint8_t *buf) const {
 
   uint64_t got = in.gotPlt->getVA();
   uint64_t plt = in.plt->getVA();
-  relocateOne(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
-              getAArch64Page(got + 16) - getAArch64Page(plt + 4));
-  relocateOne(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
-  relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
+  relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
+                getAArch64Page(got + 16) - getAArch64Page(plt + 4));
+  relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
+  relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
 }
 
 void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
@@ -225,10 +229,10 @@ void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
   memcpy(buf, inst, sizeof(inst));
 
   uint64_t gotPltEntryAddr = sym.getGotPltVA();
-  relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21,
-              getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
-  relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
-  relocateOne(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
+  relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
+                getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
+  relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
+  relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
 }
 
 bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
@@ -241,7 +245,8 @@ bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   // ELF for the ARM 64-bit architecture, section Call and Jump relocations
   // only permits range extension thunks for R_AARCH64_CALL26 and
   // R_AARCH64_JUMP26 relocation types.
-  if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26)
+  if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
+      type != R_AARCH64_PLT32)
     return false;
   uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a);
   return !inBranchRange(type, branchAddr, dst);
@@ -255,11 +260,13 @@ uint32_t AArch64::getThunkSectionSpacing() const {
 }
 
 bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
-  if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26)
+  if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
+      type != R_AARCH64_PLT32)
     return true;
   // The AArch64 call and unconditional branch instructions have a range of
-  // +/- 128 MiB.
-  uint64_t range = 128 * 1024 * 1024;
+  // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
+  uint64_t range =
+      type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
   if (dst > src) {
     // Immediate of branch is signed.
     range -= 4;
@@ -309,16 +316,21 @@ static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
   write32le(loc, inst | ((imm & 0xFFFF) << 5));
 }
 
-void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void AArch64::relocate(uint8_t *loc, const Relocation &rel,
+                       uint64_t val) const {
+  switch (rel.type) {
   case R_AARCH64_ABS16:
   case R_AARCH64_PREL16:
-    checkIntUInt(loc, val, 16, type);
+    checkIntUInt(loc, val, 16, rel);
     write16le(loc, val);
     break;
   case R_AARCH64_ABS32:
   case R_AARCH64_PREL32:
-    checkIntUInt(loc, val, 32, type);
+    checkIntUInt(loc, val, 32, rel);
+    write32le(loc, val);
+    break;
+  case R_AARCH64_PLT32:
+    checkInt(loc, val, 32, rel);
     write32le(loc, val);
     break;
   case R_AARCH64_ABS64:
@@ -332,13 +344,13 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_AARCH64_ADR_PREL_PG_HI21:
   case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
   case R_AARCH64_TLSDESC_ADR_PAGE21:
-    checkInt(loc, val, 33, type);
+    checkInt(loc, val, 33, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_ADR_PREL_PG_HI21_NC:
     write32AArch64Addr(loc, val >> 12);
     break;
   case R_AARCH64_ADR_PREL_LO21:
-    checkInt(loc, val, 21, type);
+    checkInt(loc, val, 21, rel);
     write32AArch64Addr(loc, val);
     break;
   case R_AARCH64_JUMP26:
@@ -352,13 +364,13 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     write32le(loc, 0x14000000);
     LLVM_FALLTHROUGH;
   case R_AARCH64_CALL26:
-    checkInt(loc, val, 28, type);
+    checkInt(loc, val, 28, rel);
     or32le(loc, (val & 0x0FFFFFFC) >> 2);
     break;
   case R_AARCH64_CONDBR19:
   case R_AARCH64_LD_PREL_LO19:
-    checkAlignment(loc, val, 4, type);
-    checkInt(loc, val, 21, type);
+    checkAlignment(loc, val, 4, rel);
+    checkInt(loc, val, 21, rel);
     or32le(loc, (val & 0x1FFFFC) << 3);
     break;
   case R_AARCH64_LDST8_ABS_LO12_NC:
@@ -367,12 +379,12 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_AARCH64_LDST16_ABS_LO12_NC:
   case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
-    checkAlignment(loc, val, 2, type);
+    checkAlignment(loc, val, 2, rel);
     or32AArch64Imm(loc, getBits(val, 1, 11));
     break;
   case R_AARCH64_LDST32_ABS_LO12_NC:
   case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
-    checkAlignment(loc, val, 4, type);
+    checkAlignment(loc, val, 4, rel);
     or32AArch64Imm(loc, getBits(val, 2, 11));
     break;
   case R_AARCH64_LDST64_ABS_LO12_NC:
@@ -380,28 +392,28 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
   case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
   case R_AARCH64_TLSDESC_LD64_LO12:
-    checkAlignment(loc, val, 8, type);
+    checkAlignment(loc, val, 8, rel);
     or32AArch64Imm(loc, getBits(val, 3, 11));
     break;
   case R_AARCH64_LDST128_ABS_LO12_NC:
   case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
-    checkAlignment(loc, val, 16, type);
+    checkAlignment(loc, val, 16, rel);
     or32AArch64Imm(loc, getBits(val, 4, 11));
     break;
   case R_AARCH64_MOVW_UABS_G0:
-    checkUInt(loc, val, 16, type);
+    checkUInt(loc, val, 16, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_MOVW_UABS_G0_NC:
     or32le(loc, (val & 0xFFFF) << 5);
     break;
   case R_AARCH64_MOVW_UABS_G1:
-    checkUInt(loc, val, 32, type);
+    checkUInt(loc, val, 32, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_MOVW_UABS_G1_NC:
     or32le(loc, (val & 0xFFFF0000) >> 11);
     break;
   case R_AARCH64_MOVW_UABS_G2:
-    checkUInt(loc, val, 48, type);
+    checkUInt(loc, val, 48, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_MOVW_UABS_G2_NC:
     or32le(loc, (val & 0xFFFF00000000) >> 27);
@@ -412,7 +424,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_AARCH64_MOVW_PREL_G0:
   case R_AARCH64_MOVW_SABS_G0:
   case R_AARCH64_TLSLE_MOVW_TPREL_G0:
-    checkInt(loc, val, 17, type);
+    checkInt(loc, val, 17, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_MOVW_PREL_G0_NC:
   case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
@@ -421,7 +433,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_AARCH64_MOVW_PREL_G1:
   case R_AARCH64_MOVW_SABS_G1:
   case R_AARCH64_TLSLE_MOVW_TPREL_G1:
-    checkInt(loc, val, 33, type);
+    checkInt(loc, val, 33, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_MOVW_PREL_G1_NC:
   case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
@@ -430,7 +442,7 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_AARCH64_MOVW_PREL_G2:
   case R_AARCH64_MOVW_SABS_G2:
   case R_AARCH64_TLSLE_MOVW_TPREL_G2:
-    checkInt(loc, val, 49, type);
+    checkInt(loc, val, 49, rel);
     LLVM_FALLTHROUGH;
   case R_AARCH64_MOVW_PREL_G2_NC:
     writeSMovWImm(loc, val >> 32);
@@ -439,11 +451,11 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     writeSMovWImm(loc, val >> 48);
     break;
   case R_AARCH64_TSTBR14:
-    checkInt(loc, val, 16, type);
+    checkInt(loc, val, 16, rel);
     or32le(loc, (val & 0xFFFC) << 3);
     break;
   case R_AARCH64_TLSLE_ADD_TPREL_HI12:
-    checkUInt(loc, val, 24, type);
+    checkUInt(loc, val, 24, rel);
     or32AArch64Imm(loc, val >> 12);
     break;
   case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
@@ -455,7 +467,8 @@ void AArch64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                             uint64_t val) const {
   // TLSDESC Global-Dynamic relocation are in the form:
   //   adrp    x0, :tlsdesc:v             [R_AARCH64_TLSDESC_ADR_PAGE21]
   //   ldr     x1, [x0, #:tlsdesc_lo12:v  [R_AARCH64_TLSDESC_LD64_LO12]
@@ -467,9 +480,9 @@ void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   //   movk    x0, #0x10
   //   nop
   //   nop
-  checkUInt(loc, val, 32, type);
+  checkUInt(loc, val, 32, rel);
 
-  switch (type) {
+  switch (rel.type) {
   case R_AARCH64_TLSDESC_ADD_LO12:
   case R_AARCH64_TLSDESC_CALL:
     write32le(loc, 0xd503201f); // nop
@@ -485,7 +498,8 @@ void AArch64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void AArch64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
+void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                             uint64_t val) const {
   // TLSDESC Global-Dynamic relocation are in the form:
   //   adrp    x0, :tlsdesc:v             [R_AARCH64_TLSDESC_ADR_PAGE21]
   //   ldr     x1, [x0, #:tlsdesc_lo12:v  [R_AARCH64_TLSDESC_LD64_LO12]
@@ -498,34 +512,35 @@ void AArch64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
   //   nop
   //   nop
 
-  switch (type) {
+  switch (rel.type) {
   case R_AARCH64_TLSDESC_ADD_LO12:
   case R_AARCH64_TLSDESC_CALL:
     write32le(loc, 0xd503201f); // nop
     break;
   case R_AARCH64_TLSDESC_ADR_PAGE21:
     write32le(loc, 0x90000000); // adrp
-    relocateOne(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
+    relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
     break;
   case R_AARCH64_TLSDESC_LD64_LO12:
     write32le(loc, 0xf9400000); // ldr
-    relocateOne(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
+    relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
     break;
   default:
     llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
   }
 }
 
-void AArch64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  checkUInt(loc, val, 32, type);
+void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                             uint64_t val) const {
+  checkUInt(loc, val, 32, rel);
 
-  if (type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
+  if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
     // Generate MOVZ.
     uint32_t regNo = read32le(loc) & 0x1f;
     write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
     return;
   }
-  if (type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
+  if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
     // Generate MOVK.
     uint32_t regNo = read32le(loc) & 0x1f;
     write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5));
@@ -593,8 +608,10 @@ AArch64BtiPac::AArch64BtiPac() {
   // the function in an executable being taken by a shared library.
   // FIXME: There is a potential optimization to omit the BTI if we detect
   // that the address of the PLT entry isn't taken.
+  // The PAC PLT entries require dynamic loader support and this isn't known
+  // from properties in the objects, so we use the command line flag.
   btiEntry = btiHeader && !config->shared;
-  pacEntry = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_PAC);
+  pacEntry = config->zPacPlt;
 
   if (btiEntry || pacEntry) {
     pltEntrySize = 24;
@@ -627,10 +644,10 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
   }
   memcpy(buf, pltData, sizeof(pltData));
 
-  relocateOne(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
-              getAArch64Page(got + 16) - getAArch64Page(plt + 8));
-  relocateOne(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
-  relocateOne(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
+  relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
+                getAArch64Page(got + 16) - getAArch64Page(plt + 8));
+  relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
+  relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
   if (!btiHeader)
     // We didn't add the BTI c instruction so round out size with NOP.
     memcpy(buf + sizeof(pltData), nopData, sizeof(nopData));
@@ -664,11 +681,10 @@ void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
 
   uint64_t gotPltEntryAddr = sym.getGotPltVA();
   memcpy(buf, addrInst, sizeof(addrInst));
-  relocateOne(buf, R_AARCH64_ADR_PREL_PG_HI21,
-              getAArch64Page(gotPltEntryAddr) -
-                  getAArch64Page(pltEntryAddr));
-  relocateOne(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
-  relocateOne(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
+  relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
+                getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
+  relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
+  relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
 
   if (pacEntry)
     memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr));
@@ -689,7 +705,4 @@ static TargetInfo *getTargetInfo() {
   return &t;
 }
 
-TargetInfo *getAArch64TargetInfo() { return getTargetInfo(); }
-
-} // namespace elf
-} // namespace lld
+TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); }
diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index b42ca7746742..3610a38692d6 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -17,16 +17,16 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class AMDGPU final : public TargetInfo {
 public:
   AMDGPU();
   uint32_t calcEFlags() const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
   RelExpr getRelExpr(RelType type, const Symbol &s,
                      const uint8_t *loc) const override;
   RelType getDynRel(RelType type) const override;
@@ -58,8 +58,8 @@ uint32_t AMDGPU::calcEFlags() const {
   return ret;
 }
 
-void AMDGPU::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void AMDGPU::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_AMDGPU_ABS32:
   case R_AMDGPU_GOTPCREL:
   case R_AMDGPU_GOTPCREL32_LO:
@@ -108,10 +108,7 @@ RelType AMDGPU::getDynRel(RelType type) const {
   return R_AMDGPU_NONE;
 }
 
-TargetInfo *getAMDGPUTargetInfo() {
+TargetInfo *elf::getAMDGPUTargetInfo() {
   static AMDGPU target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index de1023346aa5..fd90557cc4f6 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -18,9 +18,8 @@
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class ARM final : public TargetInfo {
@@ -43,7 +42,8 @@ public:
                   int64_t a) const override;
   uint32_t getThunkSectionSpacing() const override;
   bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 };
 } // namespace
 
@@ -64,6 +64,7 @@ ARM::ARM() {
   ipltEntrySize = 16;
   trapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
   needsThunks = true;
+  defaultMaxPageSize = 65536;
 }
 
 uint32_t ARM::calcEFlags() const {
@@ -120,6 +121,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
     return R_TLSGD_PC;
   case R_ARM_TLS_LDM32:
     return R_TLSLD_PC;
+  case R_ARM_TLS_LDO32:
+    return R_DTPREL;
   case R_ARM_BASE_PREL:
     // B(S) + A - P
     // FIXME: currently B(S) assumed to be .got, this may not hold for all
@@ -131,6 +134,19 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
   case R_ARM_THM_MOVW_PREL_NC:
   case R_ARM_THM_MOVT_PREL:
     return R_PC;
+  case R_ARM_ALU_PC_G0:
+  case R_ARM_LDR_PC_G0:
+  case R_ARM_THM_ALU_PREL_11_0:
+  case R_ARM_THM_PC8:
+  case R_ARM_THM_PC12:
+    return R_ARM_PCA;
+  case R_ARM_MOVW_BREL_NC:
+  case R_ARM_MOVW_BREL:
+  case R_ARM_MOVT_BREL:
+  case R_ARM_THM_MOVW_BREL_NC:
+  case R_ARM_THM_MOVW_BREL:
+  case R_ARM_THM_MOVT_BREL:
+    return R_ARM_SBREL;
   case R_ARM_NONE:
     return R_NONE;
   case R_ARM_TLS_LE32:
@@ -262,7 +278,8 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
 }
 
 bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
-                     uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const {
+                     uint64_t branchAddr, const Symbol &s,
+                     int64_t /*a*/) const {
   // If S is an undefined weak symbol and does not have a PLT entry then it
   // will be resolved as a branch to the next instruction.
   if (s.isUndefWeak() && !s.isInPlt())
@@ -275,8 +292,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_PLT32:
   case R_ARM_JUMP24:
     // Source is ARM, all PLT entries are ARM so no interworking required.
-    // Otherwise we need to interwork if Symbol has bit 0 set (Thumb).
-    if (expr == R_PC && ((s.getVA() & 1) == 1))
+    // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
+    if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
       return true;
     LLVM_FALLTHROUGH;
   case R_ARM_CALL: {
@@ -286,8 +303,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
     // Source is Thumb, all PLT entries are ARM so interworking is required.
-    // Otherwise we need to interwork if Symbol has bit 0 clear (ARM).
-    if (expr == R_PLT_PC || ((s.getVA() & 1) == 0))
+    // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
+    if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
       return true;
     LLVM_FALLTHROUGH;
   case R_ARM_THM_CALL: {
@@ -375,8 +392,82 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
   return distance <= range;
 }
 
-void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+// Helper to produce message text when LLD detects that a CALL relocation to
+// a non STT_FUNC symbol that may result in incorrect interworking between ARM
+// or Thumb.
+static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
+  assert(!s.isFunc());
+  if (s.isSection()) {
+    // Section symbols must be defined and in a section. Users cannot change
+    // the type. Use the section name as getName() returns an empty string.
+    warn(getErrorLocation(loc) + "branch and link relocation: " +
+         toString(relt) + " to STT_SECTION symbol " +
+         cast<Defined>(s).section->name + " ; interworking not performed");
+  } else {
+    // Warn with hint on how to alter the symbol type.
+    warn(getErrorLocation(loc) + "branch and link relocation: " +
+         toString(relt) + " to non STT_FUNC symbol: " + s.getName() +
+         " interworking not performed; consider using directive '.type " +
+         s.getName() +
+         ", %function' to give symbol type STT_FUNC if"
+         " interworking between ARM and Thumb is required");
+  }
+}
+
+// Utility functions taken from ARMAddressingModes.h, only changes are LLD
+// coding style.
+
+// Rotate a 32-bit unsigned value right by a specified amt of bits.
+static uint32_t rotr32(uint32_t val, uint32_t amt) {
+  assert(amt < 32 && "Invalid rotate amount");
+  return (val >> amt) | (val << ((32 - amt) & 31));
+}
+
+// Rotate a 32-bit unsigned value left by a specified amt of bits.
+static uint32_t rotl32(uint32_t val, uint32_t amt) {
+  assert(amt < 32 && "Invalid rotate amount");
+  return (val << amt) | (val >> ((32 - amt) & 31));
+}
+
+// Try to encode a 32-bit unsigned immediate imm with an immediate shifter
+// operand, this form is an 8-bit immediate rotated right by an even number of
+// bits. We compute the rotate amount to use.  If this immediate value cannot be
+// handled with a single shifter-op, determine a good rotate amount that will
+// take a maximal chunk of bits out of the immediate.
+static uint32_t getSOImmValRotate(uint32_t imm) {
+  // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+  // of zero.
+  if ((imm & ~255U) == 0)
+    return 0;
+
+  // Use CTZ to compute the rotate amount.
+  unsigned tz = llvm::countTrailingZeros(imm);
+
+  // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+  // not 9.
+  unsigned rotAmt = tz & ~1;
+
+  // If we can handle this spread, return it.
+  if ((rotr32(imm, rotAmt) & ~255U) == 0)
+    return (32 - rotAmt) & 31; // HW rotates right, not left.
+
+  // For values like 0xF000000F, we should ignore the low 6 bits, then
+  // retry the hunt.
+  if (imm & 63U) {
+    unsigned tz2 = countTrailingZeros(imm & ~63U);
+    unsigned rotAmt2 = tz2 & ~1;
+    if ((rotr32(imm, rotAmt2) & ~255U) == 0)
+      return (32 - rotAmt2) & 31; // HW rotates right, not left.
+  }
+
+  // Otherwise, we have no way to cover this span of bits with a single
+  // shifter_op immediate.  Return a chunk of bits that will be useful to
+  // handle.
+  return (32 - rotAmt) & 31; // HW rotates right, not left.
+}
+
+void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_ARM_ABS32:
   case R_ARM_BASE_PREL:
   case R_ARM_GOTOFF32:
@@ -397,40 +488,49 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     write32le(loc, val);
     break;
   case R_ARM_PREL31:
-    checkInt(loc, val, 31, type);
+    checkInt(loc, val, 31, rel);
     write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000));
     break;
-  case R_ARM_CALL:
-    // R_ARM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if (val & 1) {
-      // If bit 0 of Val is 1 the target is Thumb, we must select a BLX.
+  case R_ARM_CALL: {
+    // R_ARM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we don't need to interwork.
+    assert(rel.sym); // R_ARM_CALL is always reached via relocate().
+    bool bit0Thumb = val & 1;
+    bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000;
+    // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
+    // even when type not STT_FUNC.
+    if (!rel.sym->isFunc() && isBlx != bit0Thumb)
+      stateChangeWarning(loc, rel.type, *rel.sym);
+    if (rel.sym->isFunc() ? bit0Thumb : isBlx) {
       // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
-      checkInt(loc, val, 26, type);
+      checkInt(loc, val, 26, rel);
       write32le(loc, 0xfa000000 |                    // opcode
                          ((val & 2) << 23) |         // H
                          ((val >> 2) & 0x00ffffff)); // imm24
       break;
     }
-    if ((read32le(loc) & 0xfe000000) == 0xfa000000)
-      // BLX (always unconditional) instruction to an ARM Target, select an
-      // unconditional BL.
-      write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
+    // BLX (always unconditional) instruction to an ARM Target, select an
+    // unconditional BL.
+    write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
     // fall through as BL encoding is shared with B
+  }
     LLVM_FALLTHROUGH;
   case R_ARM_JUMP24:
   case R_ARM_PC24:
   case R_ARM_PLT32:
-    checkInt(loc, val, 26, type);
+    checkInt(loc, val, 26, rel);
     write32le(loc, (read32le(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff));
     break;
   case R_ARM_THM_JUMP11:
-    checkInt(loc, val, 12, type);
+    checkInt(loc, val, 12, rel);
     write16le(loc, (read32le(loc) & 0xf800) | ((val >> 1) & 0x07ff));
     break;
   case R_ARM_THM_JUMP19:
     // Encoding T3: Val = S:J2:J1:imm6:imm11:0
-    checkInt(loc, val, 21, type);
+    checkInt(loc, val, 21, rel);
     write16le(loc,
               (read16le(loc) & 0xfbc0) |   // opcode cond
                   ((val >> 10) & 0x0400) | // S
@@ -441,20 +541,32 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
                   ((val >> 5) & 0x2000) | // J1
                   ((val >> 1) & 0x07ff)); // imm11
     break;
-  case R_ARM_THM_CALL:
-    // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if ((val & 1) == 0) {
-      // Ensure BLX destination is 4-byte aligned. As BLX instruction may
-      // only be two byte aligned. This must be done before overflow check
+  case R_ARM_THM_CALL: {
+    // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we need to interwork.
+    assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
+    bool bit0Thumb = val & 1;
+    bool isBlx = (read16le(loc + 2) & 0x1000) == 0;
+    // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
+    // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
+    if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
+      stateChangeWarning(loc, rel.type, *rel.sym);
+    if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
+      // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
+      // the BLX instruction may only be two byte aligned. This must be done
+      // before overflow check.
       val = alignTo(val, 4);
+      write16le(loc + 2, read16le(loc + 2) & ~0x1000);
+    } else {
+      write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12);
     }
-    // Bit 12 is 0 for BLX, 1 for BL
-    write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12);
     if (!config->armJ1J2BranchEncoding) {
       // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
       // different encoding rules and range due to J1 and J2 always being 1.
-      checkInt(loc, val, 23, type);
+      checkInt(loc, val, 23, rel);
       write16le(loc,
                 0xf000 |                     // opcode
                     ((val >> 12) & 0x07ff)); // imm11
@@ -464,11 +576,12 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
                     ((val >> 1) & 0x07ff));    // imm11
       break;
     }
+  }
     // Fall through as rest of encoding is the same as B.W
     LLVM_FALLTHROUGH;
   case R_ARM_THM_JUMP24:
     // Encoding B  T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
-    checkInt(loc, val, 25, type);
+    checkInt(loc, val, 25, rel);
     write16le(loc,
               0xf000 |                     // opcode
                   ((val >> 14) & 0x0400) | // S
@@ -481,16 +594,19 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_ARM_MOVW_ABS_NC:
   case R_ARM_MOVW_PREL_NC:
+  case R_ARM_MOVW_BREL_NC:
     write32le(loc, (read32le(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) |
                        (val & 0x0fff));
     break;
   case R_ARM_MOVT_ABS:
   case R_ARM_MOVT_PREL:
+  case R_ARM_MOVT_BREL:
     write32le(loc, (read32le(loc) & ~0x000f0fff) |
                        (((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff));
     break;
   case R_ARM_THM_MOVT_ABS:
   case R_ARM_THM_MOVT_PREL:
+  case R_ARM_THM_MOVT_BREL:
     // Encoding T1: A = imm4:i:imm3:imm8
     write16le(loc,
               0xf2c0 |                     // opcode
@@ -503,6 +619,7 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_ARM_THM_MOVW_ABS_NC:
   case R_ARM_THM_MOVW_PREL_NC:
+  case R_ARM_THM_MOVW_BREL_NC:
     // Encoding T3: A = imm4:i:imm3:imm8
     write16le(loc,
               0xf240 |                     // opcode
@@ -513,8 +630,92 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
                   ((val << 4) & 0x7000) |    // imm3
                   (val & 0x00ff));           // imm8
     break;
+  case R_ARM_ALU_PC_G0: {
+    // ADR (literal) add = bit23, sub = bit22
+    // literal is a 12-bit modified immediate, made up of a 4-bit even rotate
+    // right and an 8-bit immediate. The code-sequence here is derived from
+    // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we
+    // want to give an error if we cannot encode the constant.
+    uint32_t opcode = 0x00800000;
+    if (val >> 63) {
+      opcode = 0x00400000;
+      val = ~val + 1;
+    }
+    if ((val & ~255U) != 0) {
+      uint32_t rotAmt = getSOImmValRotate(val);
+      // Error if we cannot encode this with a single shift
+      if (rotr32(~255U, rotAmt) & val)
+        error(getErrorLocation(loc) + "unencodeable immediate " +
+              Twine(val).str() + " for relocation " + toString(rel.type));
+      val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8);
+    }
+    write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val);
+    break;
+  }
+  case R_ARM_LDR_PC_G0: {
+    // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - P.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    // LDR (literal) u = bit23
+    int64_t imm = val;
+    uint32_t u = 0x00800000;
+    if (imm < 0) {
+      imm = -imm;
+      u = 0;
+    }
+    checkUInt(loc, imm, 12, rel);
+    write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm);
+    break;
+  }
+  case R_ARM_THM_ALU_PREL_11_0: {
+    // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
+    int64_t imm = val;
+    uint16_t sub = 0;
+    if (imm < 0) {
+      imm = -imm;
+      sub = 0x00a0;
+    }
+    checkUInt(loc, imm, 12, rel);
+    write16le(loc, (read16le(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1);
+    write16le(loc + 2,
+              (read16le(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff));
+    break;
+  }
+  case R_ARM_THM_PC8:
+    // ADR and LDR literal encoding T1 positive offset only imm8:00
+    // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - Pa.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    checkUInt(loc, val, 10, rel);
+    checkAlignment(loc, val, 4, rel);
+    write16le(loc, (read16le(loc) & 0xff00) | (val & 0x3fc) >> 2);
+    break;
+  case R_ARM_THM_PC12: {
+    // LDR (literal) encoding T2, add = (U == '1') imm12
+    // imm12 is unsigned
+    // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - Pa.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    int64_t imm12 = val;
+    uint16_t u = 0x0080;
+    if (imm12 < 0) {
+      imm12 = -imm12;
+      u = 0;
+    }
+    checkUInt(loc, imm12, 12, rel);
+    write16le(loc, read16le(loc) | u);
+    write16le(loc + 2, (read16le(loc + 2) & 0xf000) | imm12);
+    break;
+  }
   default:
-    error(getErrorLocation(loc) + "unrecognized relocation " + toString(type));
+    error(getErrorLocation(loc) + "unrecognized relocation " +
+          toString(rel.type));
   }
 }
 
@@ -582,14 +783,18 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_ARM_MOVW_ABS_NC:
   case R_ARM_MOVT_ABS:
   case R_ARM_MOVW_PREL_NC:
-  case R_ARM_MOVT_PREL: {
+  case R_ARM_MOVT_PREL:
+  case R_ARM_MOVW_BREL_NC:
+  case R_ARM_MOVT_BREL: {
     uint64_t val = read32le(buf) & 0x000f0fff;
     return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff));
   }
   case R_ARM_THM_MOVW_ABS_NC:
   case R_ARM_THM_MOVT_ABS:
   case R_ARM_THM_MOVW_PREL_NC:
-  case R_ARM_THM_MOVT_PREL: {
+  case R_ARM_THM_MOVT_PREL:
+  case R_ARM_THM_MOVW_BREL_NC:
+  case R_ARM_THM_MOVT_BREL: {
     // Encoding T3: A = imm4:i:imm3:imm8
     uint16_t hi = read16le(buf);
     uint16_t lo = read16le(buf + 2);
@@ -598,13 +803,50 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
                             ((lo & 0x7000) >> 4) |  // imm3
                             (lo & 0x00ff));         // imm8
   }
+  case R_ARM_ALU_PC_G0: {
+    // 12-bit immediate is a modified immediate made up of a 4-bit even
+    // right rotation and 8-bit constant. After the rotation the value
+    // is zero-extended. When bit 23 is set the instruction is an add, when
+    // bit 22 is set it is a sub.
+    uint32_t instr = read32le(buf);
+    uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);
+    return (instr & 0x00400000) ? -val : val;
+  }
+  case R_ARM_LDR_PC_G0: {
+    // ADR (literal) add = bit23, sub = bit22
+    // LDR (literal) u = bit23 unsigned imm12
+    bool u = read32le(buf) & 0x00800000;
+    uint32_t imm12 = read32le(buf) & 0xfff;
+    return u ? imm12 : -imm12;
+  }
+  case R_ARM_THM_ALU_PREL_11_0: {
+    // Thumb2 ADR, which is an alias for a sub or add instruction with an
+    // unsigned immediate.
+    // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
+    uint16_t hi = read16le(buf);
+    uint16_t lo = read16le(buf + 2);
+    uint64_t imm = (hi & 0x0400) << 1 | // i
+                   (lo & 0x7000) >> 4 | // imm3
+                   (lo & 0x00ff);       // imm8
+    // For sub, addend is negative, add is positive.
+    return (hi & 0x00f0) ? -imm : imm;
+  }
+  case R_ARM_THM_PC8:
+    // ADR and LDR (literal) encoding T1
+    // From ELF for the ARM Architecture the initial signed addend is formed
+    // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4)
+    // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff
+    return ((((read16le(buf) & 0xff) << 2) + 4) & 0x3ff) - 4;
+  case R_ARM_THM_PC12: {
+    // LDR (literal) encoding T2, add = (U == '1') imm12
+    bool u = read16le(buf) & 0x0080;
+    uint64_t imm12 = read16le(buf + 2) & 0x0fff;
+    return u ? imm12 : -imm12;
+  }
   }
 }
 
-TargetInfo *getARMTargetInfo() {
+TargetInfo *elf::getARMTargetInfo() {
   static ARM target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp
index cb33ff448ba4..4513a970b32d 100644
--- a/lld/ELF/Arch/AVR.cpp
+++ b/lld/ELF/Arch/AVR.cpp
@@ -36,9 +36,8 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class AVR final : public TargetInfo {
@@ -46,7 +45,8 @@ public:
   AVR();
   RelExpr getRelExpr(RelType type, const Symbol &s,
                      const uint8_t *loc) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 };
 } // namespace
 
@@ -54,11 +54,131 @@ AVR::AVR() { noneRel = R_AVR_NONE; }
 
 RelExpr AVR::getRelExpr(RelType type, const Symbol &s,
                         const uint8_t *loc) const {
-  return R_ABS;
+  switch (type) {
+  case R_AVR_7_PCREL:
+  case R_AVR_13_PCREL:
+    return R_PC;
+  default:
+    return R_ABS;
+  }
 }
 
-void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+static void writeLDI(uint8_t *loc, uint64_t val) {
+  write16le(loc, (read16le(loc) & 0xf0f0) | (val & 0xf0) << 4 | (val & 0x0f));
+}
+
+void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
+  case R_AVR_8:
+    checkUInt(loc, val, 8, rel);
+    *loc = val;
+    break;
+  case R_AVR_16:
+    // Note: this relocation is often used between code and data space, which
+    // are 0x800000 apart in the output ELF file. The bitmask cuts off the high
+    // bit.
+    write16le(loc, val & 0xffff);
+    break;
+  case R_AVR_16_PM:
+    checkAlignment(loc, val, 2, rel);
+    checkUInt(loc, val >> 1, 16, rel);
+    write16le(loc, val >> 1);
+    break;
+  case R_AVR_32:
+    checkUInt(loc, val, 32, rel);
+    write32le(loc, val);
+    break;
+
+  case R_AVR_LDI:
+    checkUInt(loc, val, 8, rel);
+    writeLDI(loc, val & 0xff);
+    break;
+
+  case R_AVR_LO8_LDI_NEG:
+    writeLDI(loc, -val & 0xff);
+    break;
+  case R_AVR_LO8_LDI:
+    writeLDI(loc, val & 0xff);
+    break;
+  case R_AVR_HI8_LDI_NEG:
+    writeLDI(loc, (-val >> 8) & 0xff);
+    break;
+  case R_AVR_HI8_LDI:
+    writeLDI(loc, (val >> 8) & 0xff);
+    break;
+  case R_AVR_HH8_LDI_NEG:
+    writeLDI(loc, (-val >> 16) & 0xff);
+    break;
+  case R_AVR_HH8_LDI:
+    writeLDI(loc, (val >> 16) & 0xff);
+    break;
+  case R_AVR_MS8_LDI_NEG:
+    writeLDI(loc, (-val >> 24) & 0xff);
+    break;
+  case R_AVR_MS8_LDI:
+    writeLDI(loc, (val >> 24) & 0xff);
+    break;
+
+  case R_AVR_LO8_LDI_PM:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (val >> 1) & 0xff);
+    break;
+  case R_AVR_HI8_LDI_PM:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (val >> 9) & 0xff);
+    break;
+  case R_AVR_HH8_LDI_PM:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (val >> 17) & 0xff);
+    break;
+
+  case R_AVR_LO8_LDI_PM_NEG:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (-val >> 1) & 0xff);
+    break;
+  case R_AVR_HI8_LDI_PM_NEG:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (-val >> 9) & 0xff);
+    break;
+  case R_AVR_HH8_LDI_PM_NEG:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (-val >> 17) & 0xff);
+    break;
+
+  case R_AVR_PORT5:
+    checkUInt(loc, val, 5, rel);
+    write16le(loc, (read16le(loc) & 0xff07) | (val << 3));
+    break;
+  case R_AVR_PORT6:
+    checkUInt(loc, val, 6, rel);
+    write16le(loc, (read16le(loc) & 0xf9f0) | (val & 0x30) << 5 | (val & 0x0f));
+    break;
+
+  // Since every jump destination is word aligned we gain an extra bit
+  case R_AVR_7_PCREL: {
+    checkInt(loc, val, 7, rel);
+    checkAlignment(loc, val, 2, rel);
+    const uint16_t target = (val - 2) >> 1;
+    write16le(loc, (read16le(loc) & 0xfc07) | ((target & 0x7f) << 3));
+    break;
+  }
+  case R_AVR_13_PCREL: {
+    checkAlignment(loc, val, 2, rel);
+    const uint16_t target = (val - 2) >> 1;
+    write16le(loc, (read16le(loc) & 0xf000) | (target & 0xfff));
+    break;
+  }
+
+  case R_AVR_6:
+    checkInt(loc, val, 6, rel);
+    write16le(loc, (read16le(loc) & 0xd3f8) | (val & 0x20) << 8 |
+                       (val & 0x18) << 7 | (val & 0x07));
+    break;
+  case R_AVR_6_ADIW:
+    checkInt(loc, val, 6, rel);
+    write16le(loc, (read16le(loc) & 0xff30) | (val & 0x30) << 2 | (val & 0x0F));
+    break;
+
   case R_AVR_CALL: {
     uint16_t hi = val >> 17;
     uint16_t lo = val >> 1;
@@ -67,14 +187,12 @@ void AVR::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   }
   default:
-    error(getErrorLocation(loc) + "unrecognized relocation " + toString(type));
+    error(getErrorLocation(loc) + "unrecognized relocation " +
+          toString(rel.type));
   }
 }
 
-TargetInfo *getAVRTargetInfo() {
+TargetInfo *elf::getAVRTargetInfo() {
   static AVR target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 106bc9bab5bd..7740ce9a71e0 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -19,9 +19,8 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class Hexagon final : public TargetInfo {
@@ -31,7 +30,8 @@ public:
   RelExpr getRelExpr(RelType type, const Symbol &s,
                      const uint8_t *loc) const override;
   RelType getDynRel(RelType type) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
   void writePltHeader(uint8_t *buf) const override;
   void writePlt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
@@ -55,6 +55,8 @@ Hexagon::Hexagon() {
   defaultMaxPageSize = 0x10000;
   noneRel = R_HEX_NONE;
   tlsGotRel = R_HEX_TPREL_32;
+  tlsModuleIndexRel = R_HEX_DTPMOD_32;
+  tlsOffsetRel = R_HEX_DTPREL_32;
 }
 
 uint32_t Hexagon::calcEFlags() const {
@@ -102,6 +104,7 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s,
   case R_HEX_32_6_X:
   case R_HEX_HI16:
   case R_HEX_LO16:
+  case R_HEX_DTPREL_32:
     return R_ABS;
   case R_HEX_B9_PCREL:
   case R_HEX_B13_PCREL:
@@ -115,12 +118,19 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s,
   case R_HEX_PLT_B22_PCREL:
   case R_HEX_B22_PCREL_X:
   case R_HEX_B32_PCREL_X:
+  case R_HEX_GD_PLT_B22_PCREL:
+  case R_HEX_GD_PLT_B22_PCREL_X:
+  case R_HEX_GD_PLT_B32_PCREL_X:
     return R_PLT_PC;
   case R_HEX_IE_32_6_X:
   case R_HEX_IE_16_X:
   case R_HEX_IE_HI16:
   case R_HEX_IE_LO16:
     return R_GOT;
+  case R_HEX_GD_GOT_11_X:
+  case R_HEX_GD_GOT_16_X:
+  case R_HEX_GD_GOT_32_6_X:
+    return R_TLSGD_GOTPLT;
   case R_HEX_GOTREL_11_X:
   case R_HEX_GOTREL_16_X:
   case R_HEX_GOTREL_32_6_X:
@@ -152,6 +162,13 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s,
   }
 }
 
+static bool isDuplex(uint32_t insn) {
+  // Duplex forms have a fixed mask and parse bits 15:14 are always
+  // zero.  Non-duplex insns will always have at least one bit set in the
+  // parse field.
+  return (0xC000 & insn) == 0;
+}
+
 static uint32_t findMaskR6(uint32_t insn) {
   // There are (arguably too) many relocation masks for the DSP's
   // R_HEX_6_X type.  The table below is used to select the correct mask
@@ -176,10 +193,7 @@ static uint32_t findMaskR6(uint32_t insn) {
       {0xd7000000, 0x006020e0}, {0xd8000000, 0x006020e0},
       {0xdb000000, 0x006020e0}, {0xdf000000, 0x006020e0}};
 
-  // Duplex forms have a fixed mask and parse bits 15:14 are always
-  // zero.  Non-duplex insns will always have at least one bit set in the
-  // parse field.
-  if ((0xC000 & insn) == 0x0)
+  if (isDuplex(insn))
     return 0x03f00000;
 
   for (InstructionMask i : r6)
@@ -215,6 +229,9 @@ static uint32_t findMaskR16(uint32_t insn) {
   if ((0xff000000 & insn) == 0xb0000000)
     return 0x0fe03fe0;
 
+  if (isDuplex(insn))
+    return 0x03f00000;
+
   error("unrecognized instruction for R_HEX_16_X relocation: 0x" +
         utohexstr(insn));
   return 0;
@@ -222,8 +239,9 @@ static uint32_t findMaskR16(uint32_t insn) {
 
 static void or32le(uint8_t *p, int32_t v) { write32le(p, read32le(p) | v); }
 
-void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void Hexagon::relocate(uint8_t *loc, const Relocation &rel,
+                       uint64_t val) const {
+  switch (rel.type) {
   case R_HEX_NONE:
     break;
   case R_HEX_6_PCREL_X:
@@ -240,6 +258,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     or32le(loc, applyMask(0x00203fe0, val & 0x3f));
     break;
   case R_HEX_11_X:
+  case R_HEX_GD_GOT_11_X:
   case R_HEX_IE_GOT_11_X:
   case R_HEX_GOT_11_X:
   case R_HEX_GOTREL_11_X:
@@ -252,6 +271,7 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_HEX_16_X: // These relocs only have 6 effective bits.
   case R_HEX_IE_16_X:
   case R_HEX_IE_GOT_16_X:
+  case R_HEX_GD_GOT_16_X:
   case R_HEX_GOT_16_X:
   case R_HEX_GOTREL_16_X:
   case R_HEX_TPREL_16_X:
@@ -262,9 +282,11 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_HEX_32:
   case R_HEX_32_PCREL:
+  case R_HEX_DTPREL_32:
     or32le(loc, val);
     break;
   case R_HEX_32_6_X:
+  case R_HEX_GD_GOT_32_6_X:
   case R_HEX_GOT_32_6_X:
   case R_HEX_GOTREL_32_6_X:
   case R_HEX_IE_GOT_32_6_X:
@@ -273,32 +295,35 @@ void Hexagon::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     or32le(loc, applyMask(0x0fff3fff, val >> 6));
     break;
   case R_HEX_B9_PCREL:
-    checkInt(loc, val, 11, type);
+    checkInt(loc, val, 11, rel);
     or32le(loc, applyMask(0x003000fe, val >> 2));
     break;
   case R_HEX_B9_PCREL_X:
     or32le(loc, applyMask(0x003000fe, val & 0x3f));
     break;
   case R_HEX_B13_PCREL:
-    checkInt(loc, val, 15, type);
+    checkInt(loc, val, 15, rel);
     or32le(loc, applyMask(0x00202ffe, val >> 2));
     break;
   case R_HEX_B15_PCREL:
-    checkInt(loc, val, 17, type);
+    checkInt(loc, val, 17, rel);
     or32le(loc, applyMask(0x00df20fe, val >> 2));
     break;
   case R_HEX_B15_PCREL_X:
     or32le(loc, applyMask(0x00df20fe, val & 0x3f));
     break;
   case R_HEX_B22_PCREL:
+  case R_HEX_GD_PLT_B22_PCREL:
   case R_HEX_PLT_B22_PCREL:
-    checkInt(loc, val, 22, type);
+    checkInt(loc, val, 22, rel);
     or32le(loc, applyMask(0x1ff3ffe, val >> 2));
     break;
   case R_HEX_B22_PCREL_X:
+  case R_HEX_GD_PLT_B22_PCREL_X:
     or32le(loc, applyMask(0x1ff3ffe, val & 0x3f));
     break;
   case R_HEX_B32_PCREL_X:
+  case R_HEX_GD_PLT_B32_PCREL_X:
     or32le(loc, applyMask(0x0fff3fff, val >> 6));
     break;
   case R_HEX_GOTREL_HI16:
@@ -335,8 +360,8 @@ void Hexagon::writePltHeader(uint8_t *buf) const {
 
   // Offset from PLT0 to the GOT.
   uint64_t off = in.gotPlt->getVA() - in.plt->getVA();
-  relocateOne(buf, R_HEX_B32_PCREL_X, off);
-  relocateOne(buf + 4, R_HEX_6_PCREL_X, off);
+  relocateNoSym(buf, R_HEX_B32_PCREL_X, off);
+  relocateNoSym(buf + 4, R_HEX_6_PCREL_X, off);
 }
 
 void Hexagon::writePlt(uint8_t *buf, const Symbol &sym,
@@ -350,8 +375,8 @@ void Hexagon::writePlt(uint8_t *buf, const Symbol &sym,
   memcpy(buf, inst, sizeof(inst));
 
   uint64_t gotPltEntryAddr = sym.getGotPltVA();
-  relocateOne(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr);
-  relocateOne(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr);
+  relocateNoSym(buf, R_HEX_B32_PCREL_X, gotPltEntryAddr - pltEntryAddr);
+  relocateNoSym(buf + 4, R_HEX_6_PCREL_X, gotPltEntryAddr - pltEntryAddr);
 }
 
 RelType Hexagon::getDynRel(RelType type) const {
@@ -360,10 +385,7 @@ RelType Hexagon::getDynRel(RelType type) const {
   return R_HEX_NONE;
 }
 
-TargetInfo *getHexagonTargetInfo() {
+TargetInfo *elf::getHexagonTargetInfo() {
   static Hexagon target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/MSP430.cpp b/lld/ELF/Arch/MSP430.cpp
index f03e8181923b..4af90b40a346 100644
--- a/lld/ELF/Arch/MSP430.cpp
+++ b/lld/ELF/Arch/MSP430.cpp
@@ -26,9 +26,8 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class MSP430 final : public TargetInfo {
@@ -36,7 +35,8 @@ public:
   MSP430();
   RelExpr getRelExpr(RelType type, const Symbol &s,
                      const uint8_t *loc) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 };
 } // namespace
 
@@ -60,38 +60,36 @@ RelExpr MSP430::getRelExpr(RelType type, const Symbol &s,
   }
 }
 
-void MSP430::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void MSP430::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_MSP430_8:
-    checkIntUInt(loc, val, 8, type);
+    checkIntUInt(loc, val, 8, rel);
     *loc = val;
     break;
   case R_MSP430_16:
   case R_MSP430_16_PCREL:
   case R_MSP430_16_BYTE:
   case R_MSP430_16_PCREL_BYTE:
-    checkIntUInt(loc, val, 16, type);
+    checkIntUInt(loc, val, 16, rel);
     write16le(loc, val);
     break;
   case R_MSP430_32:
-    checkIntUInt(loc, val, 32, type);
+    checkIntUInt(loc, val, 32, rel);
     write32le(loc, val);
     break;
   case R_MSP430_10_PCREL: {
     int16_t offset = ((int16_t)val >> 1) - 1;
-    checkInt(loc, offset, 10, type);
+    checkInt(loc, offset, 10, rel);
     write16le(loc, (read16le(loc) & 0xFC00) | (offset & 0x3FF));
     break;
   }
   default:
-    error(getErrorLocation(loc) + "unrecognized relocation " + toString(type));
+    error(getErrorLocation(loc) + "unrecognized relocation " +
+          toString(rel.type));
   }
 }
 
-TargetInfo *getMSP430TargetInfo() {
+TargetInfo *elf::getMSP430TargetInfo() {
   static MSP430 target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp
index ed6f4ca24130..fd1c5f507734 100644
--- a/lld/ELF/Arch/Mips.cpp
+++ b/lld/ELF/Arch/Mips.cpp
@@ -18,9 +18,9 @@
 using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::ELF;
+using namespace lld;
+using namespace lld::elf;
 
-namespace lld {
-namespace elf {
 namespace {
 template <class ELFT> class MIPS final : public TargetInfo {
 public:
@@ -37,7 +37,8 @@ public:
   bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
                   uint64_t branchAddr, const Symbol &s,
                   int64_t a) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
   bool usesOnlyLowPageBits(RelType type) const override;
 };
 } // namespace
@@ -274,12 +275,12 @@ template <class ELFT> void MIPS<ELFT>::writePltHeader(uint8_t *buf) const {
       write16(buf + 18, 0x0f83); // move    $28, $3
       write16(buf + 20, 0x472b); // jalrc   $25
       write16(buf + 22, 0x0c00); // nop
-      relocateOne(buf, R_MICROMIPS_PC19_S2, gotPlt - plt);
+      relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPlt - plt);
     } else {
       write16(buf + 18, 0x45f9); // jalrc   $25
       write16(buf + 20, 0x0f83); // move    $28, $3
       write16(buf + 22, 0x0c00); // nop
-      relocateOne(buf, R_MICROMIPS_PC23_S2, gotPlt - plt);
+      relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPlt - plt);
     }
     return;
   }
@@ -330,13 +331,13 @@ void MIPS<ELFT>::writePlt(uint8_t *buf, const Symbol &sym,
       write16(buf + 4, 0xff22);  // lw $25, 0($2)
       write16(buf + 8, 0x0f02);  // move $24, $2
       write16(buf + 10, 0x4723); // jrc $25 / jr16 $25
-      relocateOne(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr);
+      relocateNoSym(buf, R_MICROMIPS_PC19_S2, gotPltEntryAddr - pltEntryAddr);
     } else {
       write16(buf, 0x7900);      // addiupc $2, (GOTPLT) - .
       write16(buf + 4, 0xff22);  // lw $25, 0($2)
       write16(buf + 8, 0x4599);  // jrc $25 / jr16 $25
       write16(buf + 10, 0x0f02); // move $24, $2
-      relocateOne(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr);
+      relocateNoSym(buf, R_MICROMIPS_PC23_S2, gotPltEntryAddr - pltEntryAddr);
     }
     return;
   }
@@ -537,8 +538,10 @@ static uint64_t fixupCrossModeJump(uint8_t *loc, RelType type, uint64_t val) {
 }
 
 template <class ELFT>
-void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
+void MIPS<ELFT>::relocate(uint8_t *loc, const Relocation &rel,
+                          uint64_t val) const {
   const endianness e = ELFT::TargetEndianness;
+  RelType type = rel.type;
 
   if (ELFT::Is64Bits || config->mipsN32Abi)
     std::tie(type, val) = calculateMipsRelChain(loc, type, val);
@@ -577,7 +580,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     if (config->relocatable) {
       writeValue(loc, val + 0x8000, 16, 16);
     } else {
-      checkInt(loc, val, 16, type);
+      checkInt(loc, val, 16, rel);
       writeValue(loc, val, 16, 0);
     }
     break;
@@ -585,7 +588,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     if (config->relocatable) {
       writeShuffleValue<e>(loc, val + 0x8000, 16, 16);
     } else {
-      checkInt(loc, val, 16, type);
+      checkInt(loc, val, 16, rel);
       writeShuffleValue<e>(loc, val, 16, 0);
     }
     break;
@@ -596,7 +599,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_MIPS_TLS_GD:
   case R_MIPS_TLS_GOTTPREL:
   case R_MIPS_TLS_LDM:
-    checkInt(loc, val, 16, type);
+    checkInt(loc, val, 16, rel);
     LLVM_FALLTHROUGH;
   case R_MIPS_CALL_LO16:
   case R_MIPS_GOT_LO16:
@@ -610,7 +613,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_MICROMIPS_GPREL16:
   case R_MICROMIPS_TLS_GD:
   case R_MICROMIPS_TLS_LDM:
-    checkInt(loc, val, 16, type);
+    checkInt(loc, val, 16, rel);
     writeShuffleValue<e>(loc, val, 16, 0);
     break;
   case R_MICROMIPS_CALL16:
@@ -622,7 +625,7 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     writeShuffleValue<e>(loc, val, 16, 0);
     break;
   case R_MICROMIPS_GPREL7_S2:
-    checkInt(loc, val, 7, type);
+    checkInt(loc, val, 7, rel);
     writeShuffleValue<e>(loc, val, 7, 2);
     break;
   case R_MIPS_CALL_HI16:
@@ -665,23 +668,23 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     // Ignore this optimization relocation for now
     break;
   case R_MIPS_PC16:
-    checkAlignment(loc, val, 4, type);
-    checkInt(loc, val, 18, type);
+    checkAlignment(loc, val, 4, rel);
+    checkInt(loc, val, 18, rel);
     writeValue(loc, val, 16, 2);
     break;
   case R_MIPS_PC19_S2:
-    checkAlignment(loc, val, 4, type);
-    checkInt(loc, val, 21, type);
+    checkAlignment(loc, val, 4, rel);
+    checkInt(loc, val, 21, rel);
     writeValue(loc, val, 19, 2);
     break;
   case R_MIPS_PC21_S2:
-    checkAlignment(loc, val, 4, type);
-    checkInt(loc, val, 23, type);
+    checkAlignment(loc, val, 4, rel);
+    checkInt(loc, val, 23, rel);
     writeValue(loc, val, 21, 2);
     break;
   case R_MIPS_PC26_S2:
-    checkAlignment(loc, val, 4, type);
-    checkInt(loc, val, 28, type);
+    checkAlignment(loc, val, 4, rel);
+    checkInt(loc, val, 28, rel);
     writeValue(loc, val, 26, 2);
     break;
   case R_MIPS_PC32:
@@ -689,35 +692,35 @@ void MIPS<ELFT>::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_MICROMIPS_26_S1:
   case R_MICROMIPS_PC26_S1:
-    checkInt(loc, val, 27, type);
+    checkInt(loc, val, 27, rel);
     writeShuffleValue<e>(loc, val, 26, 1);
     break;
   case R_MICROMIPS_PC7_S1:
-    checkInt(loc, val, 8, type);
+    checkInt(loc, val, 8, rel);
     writeMicroRelocation16<e>(loc, val, 7, 1);
     break;
   case R_MICROMIPS_PC10_S1:
-    checkInt(loc, val, 11, type);
+    checkInt(loc, val, 11, rel);
     writeMicroRelocation16<e>(loc, val, 10, 1);
     break;
   case R_MICROMIPS_PC16_S1:
-    checkInt(loc, val, 17, type);
+    checkInt(loc, val, 17, rel);
     writeShuffleValue<e>(loc, val, 16, 1);
     break;
   case R_MICROMIPS_PC18_S3:
-    checkInt(loc, val, 21, type);
+    checkInt(loc, val, 21, rel);
     writeShuffleValue<e>(loc, val, 18, 3);
     break;
   case R_MICROMIPS_PC19_S2:
-    checkInt(loc, val, 21, type);
+    checkInt(loc, val, 21, rel);
     writeShuffleValue<e>(loc, val, 19, 2);
     break;
   case R_MICROMIPS_PC21_S1:
-    checkInt(loc, val, 22, type);
+    checkInt(loc, val, 22, rel);
     writeShuffleValue<e>(loc, val, 21, 1);
     break;
   case R_MICROMIPS_PC23_S2:
-    checkInt(loc, val, 25, type);
+    checkInt(loc, val, 25, rel);
     writeShuffleValue<e>(loc, val, 23, 2);
     break;
   default:
@@ -731,7 +734,7 @@ template <class ELFT> bool MIPS<ELFT>::usesOnlyLowPageBits(RelType type) const {
 }
 
 // Return true if the symbol is a PIC function.
-template <class ELFT> bool isMipsPIC(const Defined *sym) {
+template <class ELFT> bool elf::isMipsPIC(const Defined *sym) {
   if (!sym->isFunc())
     return false;
 
@@ -749,20 +752,17 @@ template <class ELFT> bool isMipsPIC(const Defined *sym) {
   return file->getObj().getHeader()->e_flags & EF_MIPS_PIC;
 }
 
-template <class ELFT> TargetInfo *getMipsTargetInfo() {
+template <class ELFT> TargetInfo *elf::getMipsTargetInfo() {
   static MIPS<ELFT> target;
   return &target;
 }
 
-template TargetInfo *getMipsTargetInfo<ELF32LE>();
-template TargetInfo *getMipsTargetInfo<ELF32BE>();
-template TargetInfo *getMipsTargetInfo<ELF64LE>();
-template TargetInfo *getMipsTargetInfo<ELF64BE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF32LE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF32BE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF64LE>();
+template TargetInfo *elf::getMipsTargetInfo<ELF64BE>();
 
-template bool isMipsPIC<ELF32LE>(const Defined *);
-template bool isMipsPIC<ELF32BE>(const Defined *);
-template bool isMipsPIC<ELF64LE>(const Defined *);
-template bool isMipsPIC<ELF64BE>(const Defined *);
-
-} // namespace elf
-} // namespace lld
+template bool elf::isMipsPIC<ELF32LE>(const Defined *);
+template bool elf::isMipsPIC<ELF32BE>(const Defined *);
+template bool elf::isMipsPIC<ELF64LE>(const Defined *);
+template bool elf::isMipsPIC<ELF64BE>(const Defined *);
diff --git a/lld/ELF/Arch/MipsArchTree.cpp b/lld/ELF/Arch/MipsArchTree.cpp
index 923458afae0d..85329c3bef53 100644
--- a/lld/ELF/Arch/MipsArchTree.cpp
+++ b/lld/ELF/Arch/MipsArchTree.cpp
@@ -23,8 +23,8 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::ELF;
 
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 struct ArchTreeEdge {
@@ -294,7 +294,7 @@ static uint32_t getArchFlags(ArrayRef<FileFlags> files) {
   return ret;
 }
 
-template <class ELFT> uint32_t calcMipsEFlags() {
+template <class ELFT> uint32_t elf::calcMipsEFlags() {
   std::vector<FileFlags> v;
   for (InputFile *f : objectFiles)
     v.push_back({f, cast<ObjFile<ELFT>>(f)->getObj().getHeader()->e_flags});
@@ -350,7 +350,8 @@ static StringRef getMipsFpAbiName(uint8_t fpAbi) {
   }
 }
 
-uint8_t getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, StringRef fileName) {
+uint8_t elf::getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag,
+                              StringRef fileName) {
   if (compareMipsFpAbi(newFlag, oldFlag) >= 0)
     return newFlag;
   if (compareMipsFpAbi(oldFlag, newFlag) < 0)
@@ -366,7 +367,7 @@ template <class ELFT> static bool isN32Abi(const InputFile *f) {
   return false;
 }
 
-bool isMipsN32Abi(const InputFile *f) {
+bool elf::isMipsN32Abi(const InputFile *f) {
   switch (config->ekind) {
   case ELF32LEKind:
     return isN32Abi<ELF32LE>(f);
@@ -381,17 +382,14 @@ bool isMipsN32Abi(const InputFile *f) {
   }
 }
 
-bool isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; }
+bool elf::isMicroMips() { return config->eflags & EF_MIPS_MICROMIPS; }
 
-bool isMipsR6() {
+bool elf::isMipsR6() {
   uint32_t arch = config->eflags & EF_MIPS_ARCH;
   return arch == EF_MIPS_ARCH_32R6 || arch == EF_MIPS_ARCH_64R6;
 }
 
-template uint32_t calcMipsEFlags<ELF32LE>();
-template uint32_t calcMipsEFlags<ELF32BE>();
-template uint32_t calcMipsEFlags<ELF64LE>();
-template uint32_t calcMipsEFlags<ELF64BE>();
-
-} // namespace elf
-} // namespace lld
+template uint32_t elf::calcMipsEFlags<ELF32LE>();
+template uint32_t elf::calcMipsEFlags<ELF32BE>();
+template uint32_t elf::calcMipsEFlags<ELF64LE>();
+template uint32_t elf::calcMipsEFlags<ELF64BE>();
diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index 3c0b0c290b58..a004cf74ddd8 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -17,9 +17,8 @@
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class PPC final : public TargetInfo {
@@ -44,14 +43,19 @@ public:
                   int64_t a) const override;
   uint32_t getThunkSectionSpacing() const override;
   bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
                           RelExpr expr) const override;
   int getTlsGdRelaxSkip(RelType type) const override;
-  void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
 };
 } // namespace
 
@@ -66,7 +70,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) {
   write32(config->isLE ? loc : loc - 2, insn);
 }
 
-void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
+void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
+  // Create canonical PLT entries for non-PIE code. Compilers don't generate
+  // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE.
+  uint32_t glink = in.plt->getVA(); // VA of .glink
+  if (!config->isPic) {
+    for (const Symbol *sym : cast<PPC32GlinkSection>(in.plt)->canonical_plts) {
+      writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0);
+      buf += 16;
+      glink += 16;
+    }
+  }
+
   // On PPC Secure PLT ABI, bl foo@plt jumps to a call stub, which loads an
   // absolute address from a specific .plt slot (usually called .got.plt on
   // other targets) and jumps there.
@@ -85,15 +100,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
   // computes the PLT index (by computing the distance from the landing b to
   // itself) and calls _dl_runtime_resolve() (in glibc).
   uint32_t got = in.got->getVA();
-  uint32_t glink = in.plt->getVA(); // VA of .glink
   const uint8_t *end = buf + 64;
   if (config->isPic) {
-    uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12;
+    uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12;
     uint32_t gotBcl = got + 4 - (glink + afterBcl);
     write32(buf + 0, 0x3d6b0000 | ha(afterBcl));  // addis r11,r11,1f-glink@ha
     write32(buf + 4, 0x7c0802a6);                 // mflr r0
     write32(buf + 8, 0x429f0005);                 // bcl 20,30,.+4
-    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink@l
+    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink@l
     write32(buf + 16, 0x7d8802a6);                // mflr r12
     write32(buf + 20, 0x7c0803a6);                // mtlr r0
     write32(buf + 24, 0x7d6c5850);                // sub r11,r11,r12
@@ -113,16 +127,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
     buf += 56;
   } else {
     write32(buf + 0, 0x3d800000 | ha(got + 4));   // lis     r12,GOT+4@ha
-    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-Glink@ha
+    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-glink@ha
     if (ha(got + 4) == ha(got + 8))
       write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4@l(r12)
     else
       write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4@l(r12)
-    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-Glink@l
+    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-glink@l
     write32(buf + 16, 0x7c0903a6);                // mtctr   r0
     write32(buf + 20, 0x7c0b5a14);                // add     r0,r11,r11
     if (ha(got + 4) == ha(got + 8))
-      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@ha(r12)
+      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8@l(r12)
     else
       write32(buf + 24, 0x818c0000 | 4);          // lwz r12,4(r12)
     write32(buf + 28, 0x7d605a14);                // add     r11,r0,r11
@@ -136,6 +150,7 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
 }
 
 PPC::PPC() {
+  copyRel = R_PPC_COPY;
   gotRel = R_PPC_GLOB_DAT;
   noneRel = R_PPC_NONE;
   pltRel = R_PPC_JMP_SLOT;
@@ -145,7 +160,7 @@ PPC::PPC() {
   gotBaseSymInGotPlt = false;
   gotHeaderEntriesNum = 3;
   gotPltHeaderEntriesNum = 0;
-  pltHeaderSize = 64; // size of PLTresolve in .glink
+  pltHeaderSize = 0;
   pltEntrySize = 4;
   ipltEntrySize = 16;
 
@@ -177,25 +192,25 @@ void PPC::writeGotHeader(uint8_t *buf) const {
 
 void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const {
   // Address of the symbol resolver stub in .glink .
-  write32(buf, in.plt->getVA() + 4 * s.pltIndex);
+  write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex);
 }
 
 bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file,
-                     uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const {
-  if (type != R_PPC_REL24 && type != R_PPC_PLTREL24)
+                     uint64_t branchAddr, const Symbol &s, int64_t a) const {
+  if (type != R_PPC_LOCAL24PC && type != R_PPC_REL24 && type != R_PPC_PLTREL24)
     return false;
   if (s.isInPlt())
     return true;
   if (s.isUndefWeak())
     return false;
-  return !(expr == R_PC && PPC::inBranchRange(type, branchAddr, s.getVA()));
+  return !PPC::inBranchRange(type, branchAddr, s.getVA(a));
 }
 
 uint32_t PPC::getThunkSectionSpacing() const { return 0x2000000; }
 
 bool PPC::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
   uint64_t offset = dst - src;
-  if (type == R_PPC_REL24 || type == R_PPC_PLTREL24)
+  if (type == R_PPC_LOCAL24PC || type == R_PPC_REL24 || type == R_PPC_PLTREL24)
     return isInt<26>(offset);
   llvm_unreachable("unsupported relocation type used in branch");
 }
@@ -218,13 +233,13 @@ RelExpr PPC::getRelExpr(RelType type, const Symbol &s,
     return R_DTPREL;
   case R_PPC_REL14:
   case R_PPC_REL32:
-  case R_PPC_LOCAL24PC:
   case R_PPC_REL16_LO:
   case R_PPC_REL16_HI:
   case R_PPC_REL16_HA:
     return R_PC;
   case R_PPC_GOT16:
     return R_GOT_OFF;
+  case R_PPC_LOCAL24PC:
   case R_PPC_REL24:
     return R_PLT_PC;
   case R_PPC_PLTREL24:
@@ -277,12 +292,12 @@ static std::pair<RelType, uint64_t> fromDTPREL(RelType type, uint64_t val) {
   }
 }
 
-void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
+void PPC::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   RelType newType;
-  std::tie(newType, val) = fromDTPREL(type, val);
+  std::tie(newType, val) = fromDTPREL(rel.type, val);
   switch (newType) {
   case R_PPC_ADDR16:
-    checkIntUInt(loc, val, 16, type);
+    checkIntUInt(loc, val, 16, rel);
     write16(loc, val);
     break;
   case R_PPC_GOT16:
@@ -290,7 +305,7 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_PPC_GOT_TLSLD16:
   case R_PPC_GOT_TPREL16:
   case R_PPC_TPREL16:
-    checkInt(loc, val, 16, type);
+    checkInt(loc, val, 16, rel);
     write16(loc, val);
     break;
   case R_PPC_ADDR16_HA:
@@ -326,8 +341,8 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_PPC_REL14: {
     uint32_t mask = 0x0000FFFC;
-    checkInt(loc, val, 16, type);
-    checkAlignment(loc, val, 4, type);
+    checkInt(loc, val, 16, rel);
+    checkAlignment(loc, val, 4, rel);
     write32(loc, (read32(loc) & ~mask) | (val & mask));
     break;
   }
@@ -335,8 +350,8 @@ void PPC::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_PPC_LOCAL24PC:
   case R_PPC_PLTREL24: {
     uint32_t mask = 0x03FFFFFC;
-    checkInt(loc, val, 26, type);
-    checkAlignment(loc, val, 4, type);
+    checkInt(loc, val, 26, rel);
+    checkAlignment(loc, val, 4, rel);
     write32(loc, (read32(loc) & ~mask) | (val & mask));
     break;
   }
@@ -368,13 +383,14 @@ int PPC::getTlsGdRelaxSkip(RelType type) const {
   return 1;
 }
 
-void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void PPC::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                         uint64_t val) const {
+  switch (rel.type) {
   case R_PPC_GOT_TLSGD16: {
     // addi rT, rA, x@got@tlsgd --> lwz rT, x@got@tprel(rA)
     uint32_t insn = readFromHalf16(loc);
     writeFromHalf16(loc, 0x80000000 | (insn & 0x03ff0000));
-    relocateOne(loc, R_PPC_GOT_TPREL16, val);
+    relocateNoSym(loc, R_PPC_GOT_TPREL16, val);
     break;
   }
   case R_PPC_TLSGD:
@@ -386,8 +402,9 @@ void PPC::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void PPC::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                         uint64_t val) const {
+  switch (rel.type) {
   case R_PPC_GOT_TLSGD16:
     // addi r3, r31, x@got@tlsgd --> addis r3, r2, x@tprel@ha
     writeFromHalf16(loc, 0x3c620000 | ha(val));
@@ -401,8 +418,9 @@ void PPC::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void PPC::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                         uint64_t val) const {
+  switch (rel.type) {
   case R_PPC_GOT_TLSLD16:
     // addi r3, rA, x@got@tlsgd --> addis r3, r2, 0
     writeFromHalf16(loc, 0x3c620000);
@@ -417,15 +435,16 @@ void PPC::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   case R_PPC_DTPREL16_HA:
   case R_PPC_DTPREL16_HI:
   case R_PPC_DTPREL16_LO:
-    relocateOne(loc, type, val);
+    relocate(loc, rel, val);
     break;
   default:
     llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
   }
 }
 
-void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                         uint64_t val) const {
+  switch (rel.type) {
   case R_PPC_GOT_TPREL16: {
     // lwz rT, x@got@tprel(rA) --> addis rT, r2, x@tprel@ha
     uint32_t rt = readFromHalf16(loc) & 0x03e00000;
@@ -448,10 +467,7 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-TargetInfo *getPPCTargetInfo() {
+TargetInfo *elf::getPPCTargetInfo() {
   static PPC target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index da77a4ddaddf..71c568088fb9 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -6,20 +6,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Thunks.h"
 #include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 static uint64_t ppc64TocOffset = 0x8000;
 static uint64_t dynamicThreadPointerOffset = 0x8000;
@@ -61,7 +62,7 @@ enum DFormOpcd {
   ADDI = 14
 };
 
-uint64_t getPPC64TocBase() {
+uint64_t elf::getPPC64TocBase() {
   // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
   // TOC starts where the first of these sections starts. We always create a
   // .got when we see a relocation that uses it, so for us the start is always
@@ -75,7 +76,7 @@ uint64_t getPPC64TocBase() {
   return tocVA + ppc64TocOffset;
 }
 
-unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) {
+unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) {
   // The offset is encoded into the 3 most significant bits of the st_other
   // field, with some special values described in section 3.4.1 of the ABI:
   // 0   --> Zero offset between the GEP and LEP, and the function does NOT use
@@ -100,11 +101,89 @@ unsigned getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) {
   return 0;
 }
 
-bool isPPC64SmallCodeModelTocReloc(RelType type) {
+bool elf::isPPC64SmallCodeModelTocReloc(RelType type) {
   // The only small code model relocations that access the .toc section.
   return type == R_PPC64_TOC16 || type == R_PPC64_TOC16_DS;
 }
 
+static bool addOptional(StringRef name, uint64_t value,
+                        std::vector<Defined *> &defined) {
+  Symbol *sym = symtab->find(name);
+  if (!sym || sym->isDefined())
+    return false;
+  sym->resolve(Defined{/*file=*/nullptr, saver.save(name), STB_GLOBAL,
+                       STV_HIDDEN, STT_FUNC, value,
+                       /*size=*/0, /*section=*/nullptr});
+  defined.push_back(cast<Defined>(sym));
+  return true;
+}
+
+// If from is 14, write ${prefix}14: firstInsn; ${prefix}15:
+// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)*0x200008; $tail
+// The labels are defined only if they exist in the symbol table.
+static void writeSequence(MutableArrayRef<uint32_t> buf, const char *prefix,
+                          int from, uint32_t firstInsn,
+                          ArrayRef<uint32_t> tail) {
+  std::vector<Defined *> defined;
+  char name[16];
+  int first;
+  uint32_t *ptr = buf.data();
+  for (int r = from; r < 32; ++r) {
+    format("%s%d", prefix, r).snprint(name, sizeof(name));
+    if (addOptional(name, 4 * (r - from), defined) && defined.size() == 1)
+      first = r - from;
+    write32(ptr++, firstInsn + 0x200008 * (r - from));
+  }
+  for (uint32_t insn : tail)
+    write32(ptr++, insn);
+  assert(ptr == &*buf.end());
+
+  if (defined.empty())
+    return;
+  // The full section content has the extent of [begin, end). We drop unused
+  // instructions and write [first,end).
+  auto *sec = make<InputSection>(
+      nullptr, SHF_ALLOC, SHT_PROGBITS, 4,
+      makeArrayRef(reinterpret_cast<uint8_t *>(buf.data() + first),
+                   4 * (buf.size() - first)),
+      ".text");
+  inputSections.push_back(sec);
+  for (Defined *sym : defined) {
+    sym->section = sec;
+    sym->value -= 4 * first;
+  }
+}
+
+// Implements some save and restore functions as described by ELF V2 ABI to be
+// compatible with GCC. With GCC -Os, when the number of call-saved registers
+// exceeds a certain threshold, GCC generates _savegpr0_* _restgpr0_* calls and
+// expects the linker to define them. See
+// https://sourceware.org/pipermail/binutils/2002-February/017444.html and
+// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is
+// weird because libgcc.a would be the natural place. The linker generation
+// approach has the advantage that the linker can generate multiple copies to
+// avoid long branch thunks. However, we don't consider the advantage
+// significant enough to complicate our trunk implementation, so we take the
+// simple approach and synthesize .text sections providing the implementation.
+void elf::addPPC64SaveRestore() {
+  static uint32_t savegpr0[20], restgpr0[21], savegpr1[19], restgpr1[19];
+  constexpr uint32_t blr = 0x4e800020, mtlr_0 = 0x7c0803a6;
+
+  // _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ...
+  // Tail: ld 0, 16(1); mtlr 0; blr
+  writeSequence(restgpr0, "_restgpr0_", 14, 0xe9c1ff70,
+                {0xe8010010, mtlr_0, blr});
+  // _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ...
+  // Tail: blr
+  writeSequence(restgpr1, "_restgpr1_", 14, 0xe9ccff70, {blr});
+  // _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ...
+  // Tail: std 0, 16(1); blr
+  writeSequence(savegpr0, "_savegpr0_", 14, 0xf9c1ff70, {0xf8010010, blr});
+  // _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ...
+  // Tail: blr
+  writeSequence(savegpr1, "_savegpr1_", 14, 0xf9ccff70, {blr});
+}
+
 // Find the R_PPC64_ADDR64 in .rela.toc with matching offset.
 template <typename ELFT>
 static std::pair<Defined *, int64_t>
@@ -137,7 +216,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {
 
 // When accessing a symbol defined in another translation unit, compilers
 // reserve a .toc entry, allocate a local label and generate toc-indirect
-// instuctions:
+// instructions:
 //
 //   addis 3, 2, .LC0@toc@ha  # R_PPC64_TOC16_HA
 //   ld    3, .LC0@toc@l(3)   # R_PPC64_TOC16_LO_DS, load the address from a .toc entry
@@ -155,8 +234,7 @@ getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {
 //   ld/lwa 3, 0(3)           # load the value from the address
 //
 // Returns true if the relaxation is performed.
-bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel,
-                                 uint8_t *bufLoc) {
+bool elf::tryRelaxPPC64TocIndirection(const Relocation &rel, uint8_t *bufLoc) {
   assert(config->tocOptimize);
   if (rel.addend < 0)
     return false;
@@ -186,8 +264,8 @@ bool tryRelaxPPC64TocIndirection(RelType type, const Relocation &rel,
   if (!isInt<32>(tocRelative))
     return false;
 
-  // Add PPC64TocOffset that will be subtracted by relocateOne().
-  target->relaxGot(bufLoc, type, tocRelative + ppc64TocOffset);
+  // Add PPC64TocOffset that will be subtracted by PPC64::relocate().
+  target->relaxGot(bufLoc, rel, tocRelative + ppc64TocOffset);
   return true;
 }
 
@@ -205,7 +283,8 @@ public:
                 uint64_t pltEntryAddr) const override;
   void writeIplt(uint8_t *buf, const Symbol &sym,
                  uint64_t pltEntryAddr) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
   void writeGotHeader(uint8_t *buf) const override;
   bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
                   uint64_t branchAddr, const Symbol &s,
@@ -214,11 +293,16 @@ public:
   bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
                           RelExpr expr) const override;
-  void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relaxGot(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
+  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
 
   bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
                                         uint8_t stOther) const override;
@@ -292,7 +376,22 @@ static uint32_t readFromHalf16(const uint8_t *loc) {
   return read32(config->isLE ? loc : loc - 2);
 }
 
+// The prefixed instruction is always a 4 byte prefix followed by a 4 byte
+// instruction. Therefore, the prefix is always in lower memory than the
+// instruction (regardless of endianness).
+// As a result, we need to shift the pieces around on little endian machines.
+static void writePrefixedInstruction(uint8_t *loc, uint64_t insn) {
+  insn = config->isLE ? insn << 32 | insn >> 32 : insn;
+  write64(loc, insn);
+}
+
+static uint64_t readPrefixedInstruction(const uint8_t *loc) {
+  uint64_t fullInstr = read64(loc);
+  return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr;
+}
+
 PPC64::PPC64() {
+  copyRel = R_PPC64_COPY;
   gotRel = R_PPC64_GLOB_DAT;
   noneRel = R_PPC64_NONE;
   pltRel = R_PPC64_JMP_SLOT;
@@ -364,11 +463,11 @@ uint32_t PPC64::calcEFlags() const {
   return 2;
 }
 
-void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_PPC64_TOC16_HA:
     // Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop".
-    relocateOne(loc, type, val);
+    relocate(loc, rel, val);
     break;
   case R_PPC64_TOC16_LO_DS: {
     // Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or
@@ -377,7 +476,7 @@ void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const {
     if (getPrimaryOpCode(insn) != LD)
       error("expected a 'ld' for got-indirect to toc-relative relaxing");
     writeFromHalf16(loc, (insn & 0x03ffffff) | 0x38000000);
-    relocateOne(loc, R_PPC64_TOC16_LO, val);
+    relocateNoSym(loc, R_PPC64_TOC16_LO, val);
     break;
   }
   default:
@@ -385,7 +484,8 @@ void PPC64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                           uint64_t val) const {
   // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
   // The general dynamic code sequence for a global `x` will look like:
   // Instruction                    Relocation                Symbol
@@ -401,14 +501,14 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   // bl __tls_get_addr(x@tlsgd)      into      nop
   // nop                             into      addi r3, r3, x@tprel@l
 
-  switch (type) {
+  switch (rel.type) {
   case R_PPC64_GOT_TLSGD16_HA:
     writeFromHalf16(loc, 0x60000000); // nop
     break;
   case R_PPC64_GOT_TLSGD16:
   case R_PPC64_GOT_TLSGD16_LO:
     writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13
-    relocateOne(loc, R_PPC64_TPREL16_HA, val);
+    relocateNoSym(loc, R_PPC64_TPREL16_HA, val);
     break;
   case R_PPC64_TLSGD:
     write32(loc, 0x60000000);     // nop
@@ -416,15 +516,16 @@ void PPC64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
     // Since we are relocating a half16 type relocation and Loc + 4 points to
     // the start of an instruction we need to advance the buffer by an extra
     // 2 bytes on BE.
-    relocateOne(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0),
-                R_PPC64_TPREL16_LO, val);
+    relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0),
+                  R_PPC64_TPREL16_LO, val);
     break;
   default:
     llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
   }
 }
 
-void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                           uint64_t val) const {
   // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
   // The local dynamic code sequence for a global `x` will look like:
   // Instruction                    Relocation                Symbol
@@ -440,7 +541,7 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   // bl __tls_get_addr(x@tlsgd)     into      nop
   // nop                            into      addi r3, r3, 4096
 
-  switch (type) {
+  switch (rel.type) {
   case R_PPC64_GOT_TLSLD16_HA:
     writeFromHalf16(loc, 0x60000000); // nop
     break;
@@ -457,14 +558,14 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   case R_PPC64_DTPREL16_DS:
   case R_PPC64_DTPREL16_LO:
   case R_PPC64_DTPREL16_LO_DS:
-    relocateOne(loc, type, val);
+    relocate(loc, rel, val);
     break;
   default:
     llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
   }
 }
 
-unsigned getPPCDFormOp(unsigned secondaryOp) {
+unsigned elf::getPPCDFormOp(unsigned secondaryOp) {
   switch (secondaryOp) {
   case LBZX:
     return LBZ;
@@ -489,7 +590,8 @@ unsigned getPPCDFormOp(unsigned secondaryOp) {
   }
 }
 
-void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                           uint64_t val) const {
   // The initial exec code sequence for a global `x` will look like:
   // Instruction                    Relocation                Symbol
   // addis r9, r2, x@got@tprel@ha   R_PPC64_GOT_TPREL16_HA      x
@@ -510,7 +612,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
   // indexed load or store instructions.
 
   unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0;
-  switch (type) {
+  switch (rel.type) {
   case R_PPC64_GOT_TPREL16_HA:
     write32(loc - offset, 0x60000000); // nop
     break;
@@ -518,7 +620,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
   case R_PPC64_GOT_TPREL16_DS: {
     uint32_t regNo = read32(loc - offset) & 0x03E00000; // bits 6-10
     write32(loc - offset, 0x3C0D0000 | regNo);          // addis RegNo, r13
-    relocateOne(loc, R_PPC64_TPREL16_HA, val);
+    relocateNoSym(loc, R_PPC64_TPREL16_HA, val);
     break;
   }
   case R_PPC64_TLS: {
@@ -530,7 +632,7 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
     if (dFormOp == 0)
       error("unrecognized instruction for IE to LE R_PPC64_TLS");
     write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF)));
-    relocateOne(loc + offset, R_PPC64_TPREL16_LO, val);
+    relocateNoSym(loc + offset, R_PPC64_TPREL16_LO, val);
     break;
   }
   default:
@@ -569,6 +671,8 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
   case R_PPC64_TOC16_HI:
   case R_PPC64_TOC16_LO:
     return R_GOTREL;
+  case R_PPC64_GOT_PCREL34:
+    return R_GOT_PC;
   case R_PPC64_TOC16_HA:
   case R_PPC64_TOC16_LO_DS:
     return config->tocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL;
@@ -577,11 +681,14 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
   case R_PPC64_REL14:
   case R_PPC64_REL24:
     return R_PPC64_CALL_PLT;
+  case R_PPC64_REL24_NOTOC:
+    return R_PLT_PC;
   case R_PPC64_REL16_LO:
   case R_PPC64_REL16_HA:
   case R_PPC64_REL16_HI:
   case R_PPC64_REL32:
   case R_PPC64_REL64:
+  case R_PPC64_PCREL34:
     return R_PC;
   case R_PPC64_GOT_TLSGD16:
   case R_PPC64_GOT_TLSGD16_HA:
@@ -769,11 +876,8 @@ static bool isTocOptType(RelType type) {
   }
 }
 
-void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  // We need to save the original relocation type to use in diagnostics, and
-  // use the original type to determine if we should toc-optimize the
-  // instructions being relocated.
-  RelType originalType = type;
+void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  RelType type = rel.type;
   bool shouldTocOptimize =  isTocOptType(type);
   // For dynamic thread pointer relative, toc-relative, and got-indirect
   // relocations, proceed in terms of the corresponding ADDR16 relocation type.
@@ -781,27 +885,27 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
 
   switch (type) {
   case R_PPC64_ADDR14: {
-    checkAlignment(loc, val, 4, type);
+    checkAlignment(loc, val, 4, rel);
     // Preserve the AA/LK bits in the branch instruction
     uint8_t aalk = loc[3];
     write16(loc + 2, (aalk & 3) | (val & 0xfffc));
     break;
   }
   case R_PPC64_ADDR16:
-    checkIntUInt(loc, val, 16, originalType);
+    checkIntUInt(loc, val, 16, rel);
     write16(loc, val);
     break;
   case R_PPC64_ADDR32:
-    checkIntUInt(loc, val, 32, originalType);
+    checkIntUInt(loc, val, 32, rel);
     write32(loc, val);
     break;
   case R_PPC64_ADDR16_DS:
   case R_PPC64_TPREL16_DS: {
-    checkInt(loc, val, 16, originalType);
+    checkInt(loc, val, 16, rel);
     // DQ-form instructions use bits 28-31 as part of the instruction encoding
     // DS-form instructions only use bits 30-31.
     uint16_t mask = isDQFormInstruction(readFromHalf16(loc)) ? 0xf : 0x3;
-    checkAlignment(loc, lo(val), mask + 1, originalType);
+    checkAlignment(loc, lo(val), mask + 1, rel);
     write16(loc, (read16(loc) & mask) | lo(val));
   } break;
   case R_PPC64_ADDR16_HA:
@@ -856,7 +960,7 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     // DS-form instructions only use bits 30-31.
     uint32_t insn = readFromHalf16(loc);
     uint16_t mask = isDQFormInstruction(insn) ? 0xf : 0x3;
-    checkAlignment(loc, lo(val), mask + 1, originalType);
+    checkAlignment(loc, lo(val), mask + 1, rel);
     if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) {
       // When the high-adjusted part of a toc relocation evaluates to 0, it is
       // changed into a nop. The lo part then needs to be updated to use the toc
@@ -872,11 +976,11 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     }
   } break;
   case R_PPC64_TPREL16:
-    checkInt(loc, val, 16, originalType);
+    checkInt(loc, val, 16, rel);
     write16(loc, val);
     break;
   case R_PPC64_REL32:
-    checkInt(loc, val, 32, type);
+    checkInt(loc, val, 32, rel);
     write32(loc, val);
     break;
   case R_PPC64_ADDR64:
@@ -886,21 +990,44 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_PPC64_REL14: {
     uint32_t mask = 0x0000FFFC;
-    checkInt(loc, val, 16, type);
-    checkAlignment(loc, val, 4, type);
+    checkInt(loc, val, 16, rel);
+    checkAlignment(loc, val, 4, rel);
     write32(loc, (read32(loc) & ~mask) | (val & mask));
     break;
   }
-  case R_PPC64_REL24: {
+  case R_PPC64_REL24:
+  case R_PPC64_REL24_NOTOC: {
     uint32_t mask = 0x03FFFFFC;
-    checkInt(loc, val, 26, type);
-    checkAlignment(loc, val, 4, type);
+    checkInt(loc, val, 26, rel);
+    checkAlignment(loc, val, 4, rel);
     write32(loc, (read32(loc) & ~mask) | (val & mask));
     break;
   }
   case R_PPC64_DTPREL64:
     write64(loc, val - dynamicThreadPointerOffset);
     break;
+  case R_PPC64_PCREL34: {
+    const uint64_t si0Mask = 0x00000003ffff0000;
+    const uint64_t si1Mask = 0x000000000000ffff;
+    const uint64_t fullMask = 0x0003ffff0000ffff;
+    checkInt(loc, val, 34, rel);
+
+    uint64_t instr = readPrefixedInstruction(loc) & ~fullMask;
+    writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) |
+                             (val & si1Mask));
+    break;
+  }
+  case R_PPC64_GOT_PCREL34: {
+    const uint64_t si0Mask = 0x00000003ffff0000;
+    const uint64_t si1Mask = 0x000000000000ffff;
+    const uint64_t fullMask = 0x0003ffff0000ffff;
+    checkInt(loc, val, 34, rel);
+
+    uint64_t instr = readPrefixedInstruction(loc) & ~fullMask;
+    writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) |
+                             (val & si1Mask));
+    break;
+  }
   default:
     llvm_unreachable("unknown relocation");
   }
@@ -908,13 +1035,30 @@ void PPC64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
 
 bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
                        uint64_t branchAddr, const Symbol &s, int64_t a) const {
-  if (type != R_PPC64_REL14 && type != R_PPC64_REL24)
+  if (type != R_PPC64_REL14 && type != R_PPC64_REL24 &&
+      type != R_PPC64_REL24_NOTOC)
     return false;
 
+  // FIXME: Remove the fatal error once the call protocol is implemented.
+  if (type == R_PPC64_REL24_NOTOC && s.isInPlt())
+    fatal("unimplemented feature: external function call with the reltype"
+          " R_PPC64_REL24_NOTOC");
+
   // If a function is in the Plt it needs to be called with a call-stub.
   if (s.isInPlt())
     return true;
 
+  // FIXME: Remove the fatal error once the call protocol is implemented.
+  if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)
+    fatal("unimplemented feature: local function call with the reltype"
+          " R_PPC64_REL24_NOTOC and the callee needs toc-pointer setup");
+
+  // This check looks at the st_other bits of the callee with relocation
+  // R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee
+  // clobbers the TOC and we need an R2 save stub.
+  if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1)
+    return true;
+
   // If a symbol is a weak undefined and we are compiling an executable
   // it doesn't need a range-extending thunk since it can't be called.
   if (s.isUndefWeak() && !config->shared)
@@ -940,7 +1084,7 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
   int64_t offset = dst - src;
   if (type == R_PPC64_REL14)
     return isInt<16>(offset);
-  if (type == R_PPC64_REL24)
+  if (type == R_PPC64_REL24 || type == R_PPC64_REL24_NOTOC)
     return isInt<26>(offset);
   llvm_unreachable("unsupported relocation type used in branch");
 }
@@ -971,12 +1115,13 @@ RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data,
 //    thread pointer.
 // Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is
 // used as the relaxation hint for both steps 2 and 3.
-void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                           uint64_t val) const {
+  switch (rel.type) {
   case R_PPC64_GOT_TLSGD16_HA:
     // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to
     //                      addis rT, r2, sym@got@tprel@ha.
-    relocateOne(loc, R_PPC64_GOT_TPREL16_HA, val);
+    relocateNoSym(loc, R_PPC64_GOT_TPREL16_HA, val);
     return;
   case R_PPC64_GOT_TLSGD16:
   case R_PPC64_GOT_TLSGD16_LO: {
@@ -984,7 +1129,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
     //            ld r3, sym@got@tprel@l(rA)
     uint32_t ra = (readFromHalf16(loc) & (0x1f << 16));
     writeFromHalf16(loc, 0xe8600000 | ra);
-    relocateOne(loc, R_PPC64_GOT_TPREL16_LO_DS, val);
+    relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val);
     return;
   }
   case R_PPC64_TLSGD:
@@ -1103,10 +1248,7 @@ bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
   return true;
 }
 
-TargetInfo *getPPC64TargetInfo() {
+TargetInfo *elf::getPPC64TargetInfo() {
   static PPC64 target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 42db8e08162d..b340fd00deee 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -15,9 +15,8 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 
@@ -33,7 +32,8 @@ public:
   RelType getDynRel(RelType type) const override;
   RelExpr getRelExpr(RelType type, const Symbol &s,
                      const uint8_t *loc) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 };
 
 } // end anonymous namespace
@@ -76,6 +76,7 @@ RISCV::RISCV() {
   noneRel = R_RISCV_NONE;
   pltRel = R_RISCV_JUMP_SLOT;
   relativeRel = R_RISCV_RELATIVE;
+  iRelativeRel = R_RISCV_IRELATIVE;
   if (config->is64) {
     symbolicRel = R_RISCV_64;
     tlsModuleIndexRel = R_RISCV_TLS_DTPMOD64;
@@ -236,9 +237,15 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
   case R_RISCV_TPREL_LO12_S:
     return R_TLS;
   case R_RISCV_RELAX:
-  case R_RISCV_ALIGN:
   case R_RISCV_TPREL_ADD:
     return R_NONE;
+  case R_RISCV_ALIGN:
+    // Not just a hint; always padded to the worst-case number of NOPs, so may
+    // not currently be aligned, and without linker relaxation support we can't
+    // delete NOPs to realign.
+    errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires "
+                "unimplemented linker relaxation; recompile with -mno-relax");
+    return R_NONE;
   default:
     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
           ") against symbol " + toString(s));
@@ -251,11 +258,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
   return (v & ((1ULL << (begin + 1)) - 1)) >> end;
 }
 
-void RISCV::relocateOne(uint8_t *loc, const RelType type,
-                        const uint64_t val) const {
+void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   const unsigned bits = config->wordsize * 8;
 
-  switch (type) {
+  switch (rel.type) {
   case R_RISCV_32:
     write32le(loc, val);
     return;
@@ -264,8 +270,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
     return;
 
   case R_RISCV_RVC_BRANCH: {
-    checkInt(loc, static_cast<int64_t>(val) >> 1, 8, type);
-    checkAlignment(loc, val, 2, type);
+    checkInt(loc, static_cast<int64_t>(val) >> 1, 8, rel);
+    checkAlignment(loc, val, 2, rel);
     uint16_t insn = read16le(loc) & 0xE383;
     uint16_t imm8 = extractBits(val, 8, 8) << 12;
     uint16_t imm4_3 = extractBits(val, 4, 3) << 10;
@@ -279,8 +285,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
   }
 
   case R_RISCV_RVC_JUMP: {
-    checkInt(loc, static_cast<int64_t>(val) >> 1, 11, type);
-    checkAlignment(loc, val, 2, type);
+    checkInt(loc, static_cast<int64_t>(val) >> 1, 11, rel);
+    checkAlignment(loc, val, 2, rel);
     uint16_t insn = read16le(loc) & 0xE003;
     uint16_t imm11 = extractBits(val, 11, 11) << 12;
     uint16_t imm4 = extractBits(val, 4, 4) << 11;
@@ -298,7 +304,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
 
   case R_RISCV_RVC_LUI: {
     int64_t imm = SignExtend64(val + 0x800, bits) >> 12;
-    checkInt(loc, imm, 6, type);
+    checkInt(loc, imm, 6, rel);
     if (imm == 0) { // `c.lui rd, 0` is illegal, convert to `c.li rd, 0`
       write16le(loc, (read16le(loc) & 0x0F83) | 0x4000);
     } else {
@@ -310,8 +316,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
   }
 
   case R_RISCV_JAL: {
-    checkInt(loc, static_cast<int64_t>(val) >> 1, 20, type);
-    checkAlignment(loc, val, 2, type);
+    checkInt(loc, static_cast<int64_t>(val) >> 1, 20, rel);
+    checkAlignment(loc, val, 2, rel);
 
     uint32_t insn = read32le(loc) & 0xFFF;
     uint32_t imm20 = extractBits(val, 20, 20) << 31;
@@ -325,8 +331,8 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
   }
 
   case R_RISCV_BRANCH: {
-    checkInt(loc, static_cast<int64_t>(val) >> 1, 12, type);
-    checkAlignment(loc, val, 2, type);
+    checkInt(loc, static_cast<int64_t>(val) >> 1, 12, rel);
+    checkAlignment(loc, val, 2, rel);
 
     uint32_t insn = read32le(loc) & 0x1FFF07F;
     uint32_t imm12 = extractBits(val, 12, 12) << 31;
@@ -343,10 +349,10 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
   case R_RISCV_CALL:
   case R_RISCV_CALL_PLT: {
     int64_t hi = SignExtend64(val + 0x800, bits) >> 12;
-    checkInt(loc, hi, 20, type);
+    checkInt(loc, hi, 20, rel);
     if (isInt<20>(hi)) {
-      relocateOne(loc, R_RISCV_PCREL_HI20, val);
-      relocateOne(loc + 4, R_RISCV_PCREL_LO12_I, val);
+      relocateNoSym(loc, R_RISCV_PCREL_HI20, val);
+      relocateNoSym(loc + 4, R_RISCV_PCREL_LO12_I, val);
     }
     return;
   }
@@ -358,7 +364,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
   case R_RISCV_TPREL_HI20:
   case R_RISCV_HI20: {
     uint64_t hi = val + 0x800;
-    checkInt(loc, SignExtend64(hi, bits) >> 12, 20, type);
+    checkInt(loc, SignExtend64(hi, bits) >> 12, 20, rel);
     write32le(loc, (read32le(loc) & 0xFFF) | (hi & 0xFFFFF000));
     return;
   }
@@ -431,7 +437,6 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
     write64le(loc, val - dtpOffset);
     break;
 
-  case R_RISCV_ALIGN:
   case R_RISCV_RELAX:
     return; // Ignored (for now)
 
@@ -440,10 +445,7 @@ void RISCV::relocateOne(uint8_t *loc, const RelType type,
   }
 }
 
-TargetInfo *getRISCVTargetInfo() {
+TargetInfo *elf::getRISCVTargetInfo() {
   static RISCV target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp
index 08ef52099de9..f137c21fc898 100644
--- a/lld/ELF/Arch/SPARCV9.cpp
+++ b/lld/ELF/Arch/SPARCV9.cpp
@@ -16,9 +16,8 @@
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class SPARCV9 final : public TargetInfo {
@@ -28,7 +27,8 @@ public:
                      const uint8_t *loc) const override;
   void writePlt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 };
 } // namespace
 
@@ -54,6 +54,14 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s,
   case R_SPARC_UA32:
   case R_SPARC_64:
   case R_SPARC_UA64:
+  case R_SPARC_H44:
+  case R_SPARC_M44:
+  case R_SPARC_L44:
+  case R_SPARC_HH22:
+  case R_SPARC_HM10:
+  case R_SPARC_LM22:
+  case R_SPARC_HI22:
+  case R_SPARC_LO10:
     return R_ABS;
   case R_SPARC_PC10:
   case R_SPARC_PC22:
@@ -68,6 +76,9 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s,
     return R_PLT_PC;
   case R_SPARC_NONE:
     return R_NONE;
+  case R_SPARC_TLS_LE_HIX22:
+  case R_SPARC_TLS_LE_LOX10:
+    return R_TLS;
   default:
     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
           ") against symbol " + toString(s));
@@ -75,38 +86,45 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s,
   }
 }
 
-void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void SPARCV9::relocate(uint8_t *loc, const Relocation &rel,
+                       uint64_t val) const {
+  switch (rel.type) {
   case R_SPARC_32:
   case R_SPARC_UA32:
     // V-word32
-    checkUInt(loc, val, 32, type);
+    checkUInt(loc, val, 32, rel);
     write32be(loc, val);
     break;
   case R_SPARC_DISP32:
     // V-disp32
-    checkInt(loc, val, 32, type);
+    checkInt(loc, val, 32, rel);
     write32be(loc, val);
     break;
   case R_SPARC_WDISP30:
   case R_SPARC_WPLT30:
     // V-disp30
-    checkInt(loc, val, 32, type);
+    checkInt(loc, val, 32, rel);
     write32be(loc, (read32be(loc) & ~0x3fffffff) | ((val >> 2) & 0x3fffffff));
     break;
   case R_SPARC_22:
     // V-imm22
-    checkUInt(loc, val, 22, type);
+    checkUInt(loc, val, 22, rel);
     write32be(loc, (read32be(loc) & ~0x003fffff) | (val & 0x003fffff));
     break;
   case R_SPARC_GOT22:
   case R_SPARC_PC22:
+  case R_SPARC_LM22:
     // T-imm22
     write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff));
     break;
+  case R_SPARC_HI22:
+    // V-imm22
+    checkUInt(loc, val >> 10, 22, rel);
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff));
+    break;
   case R_SPARC_WDISP19:
     // V-disp19
-    checkInt(loc, val, 21, type);
+    checkInt(loc, val, 21, rel);
     write32be(loc, (read32be(loc) & ~0x0007ffff) | ((val >> 2) & 0x0007ffff));
     break;
   case R_SPARC_GOT10:
@@ -114,11 +132,45 @@ void SPARCV9::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     // T-simm10
     write32be(loc, (read32be(loc) & ~0x000003ff) | (val & 0x000003ff));
     break;
+  case R_SPARC_LO10:
+    // T-simm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff));
+    break;
   case R_SPARC_64:
   case R_SPARC_UA64:
     // V-xword64
     write64be(loc, val);
     break;
+  case R_SPARC_HH22:
+    // V-imm22
+    checkUInt(loc, val >> 42, 22, rel);
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 42) & 0x003fffff));
+    break;
+  case R_SPARC_HM10:
+    // T-simm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | ((val >> 32) & 0x000003ff));
+    break;
+  case R_SPARC_H44:
+    // V-imm22
+    checkUInt(loc, val >> 22, 22, rel);
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 22) & 0x003fffff));
+    break;
+  case R_SPARC_M44:
+    // T-imm10
+    write32be(loc, (read32be(loc) & ~0x000003ff) | ((val >> 12) & 0x000003ff));
+    break;
+  case R_SPARC_L44:
+    // T-imm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x00000fff));
+    break;
+  case R_SPARC_TLS_LE_HIX22:
+    // T-imm22
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((~val >> 10) & 0x003fffff));
+    break;
+  case R_SPARC_TLS_LE_LOX10:
+    // T-simm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff) | 0x1C00);
+    break;
   default:
     llvm_unreachable("unknown relocation");
   }
@@ -139,14 +191,11 @@ void SPARCV9::writePlt(uint8_t *buf, const Symbol & /*sym*/,
   memcpy(buf, pltData, sizeof(pltData));
 
   uint64_t off = pltEntryAddr - in.plt->getVA();
-  relocateOne(buf, R_SPARC_22, off);
-  relocateOne(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize));
+  relocateNoSym(buf, R_SPARC_22, off);
+  relocateNoSym(buf + 4, R_SPARC_WDISP19, -(off + 4 - pltEntrySize));
 }
 
-TargetInfo *getSPARCV9TargetInfo() {
+TargetInfo *elf::getSPARCV9TargetInfo() {
   static SPARCV9 target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index b4daedc0f5dc..8c8824d53cce 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -16,9 +16,8 @@
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class X86 : public TargetInfo {
@@ -35,14 +34,19 @@ public:
   void writePltHeader(uint8_t *buf) const override;
   void writePlt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 
   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
                           RelExpr expr) const override;
-  void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
 };
 } // namespace
 
@@ -262,21 +266,21 @@ int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
   }
 }
 
-void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_386_8:
     // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
     // being used for some 16-bit programs such as boot loaders, so
     // we want to support them.
-    checkIntUInt(loc, val, 8, type);
+    checkIntUInt(loc, val, 8, rel);
     *loc = val;
     break;
   case R_386_PC8:
-    checkInt(loc, val, 8, type);
+    checkInt(loc, val, 8, rel);
     *loc = val;
     break;
   case R_386_16:
-    checkIntUInt(loc, val, 16, type);
+    checkIntUInt(loc, val, 16, rel);
     write16le(loc, val);
     break;
   case R_386_PC16:
@@ -290,7 +294,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     // current location subtracted from it.
     // We just check that Val fits in 17 bits. This misses some cases, but
     // should have no false positives.
-    checkInt(loc, val, 17, type);
+    checkInt(loc, val, 17, rel);
     write16le(loc, val);
     break;
   case R_386_32:
@@ -312,7 +316,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_386_TLS_LE_32:
   case R_386_TLS_TPOFF:
   case R_386_TLS_TPOFF32:
-    checkInt(loc, val, 32, type);
+    checkInt(loc, val, 32, rel);
     write32le(loc, val);
     break;
   default:
@@ -320,7 +324,7 @@ void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &, uint64_t val) const {
   // Convert
   //   leal x@tlsgd(, %ebx, 1),
   //   call __tls_get_addr@plt
@@ -335,7 +339,7 @@ void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   write32le(loc + 5, val);
 }
 
-void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
+void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &, uint64_t val) const {
   // Convert
   //   leal x@tlsgd(, %ebx, 1),
   //   call __tls_get_addr@plt
@@ -352,14 +356,15 @@ void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
 
 // In some conditions, relocations can be optimized to avoid using GOT.
 // This function does that for Initial Exec to Local Exec case.
-void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                         uint64_t val) const {
   // Ulrich's document section 6.2 says that @gotntpoff can
   // be used with MOVL or ADDL instructions.
   // @indntpoff is similar to @gotntpoff, but for use in
   // position dependent code.
   uint8_t reg = (loc[-1] >> 3) & 7;
 
-  if (type == R_386_TLS_IE) {
+  if (rel.type == R_386_TLS_IE) {
     if (loc[-1] == 0xa1) {
       // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
       // This case is different from the generic case below because
@@ -375,7 +380,7 @@ void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
       loc[-1] = 0xc0 | reg;
     }
   } else {
-    assert(type == R_386_TLS_GOTIE);
+    assert(rel.type == R_386_TLS_GOTIE);
     if (loc[-2] == 0x8b) {
       // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
       loc[-2] = 0xc7;
@@ -389,8 +394,9 @@ void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
   write32le(loc, val);
 }
 
-void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  if (type == R_386_TLS_LDO_32) {
+void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                         uint64_t val) const {
+  if (rel.type == R_386_TLS_LDO_32) {
     write32le(loc, val);
     return;
   }
@@ -608,7 +614,7 @@ void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
   write32le(buf + 22, -off - 26);
 }
 
-TargetInfo *getX86TargetInfo() {
+TargetInfo *elf::getX86TargetInfo() {
   if (config->zRetpolineplt) {
     if (config->isPic) {
       static RetpolinePic t;
@@ -626,6 +632,3 @@ TargetInfo *getX86TargetInfo() {
   static X86 t;
   return &t;
 }
-
-} // namespace elf
-} // namespace lld
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 74b72eb91293..24711ec210a4 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InputFiles.h"
+#include "OutputSections.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -18,9 +19,8 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class X86_64 : public TargetInfo {
@@ -35,20 +35,44 @@ public:
   void writePltHeader(uint8_t *buf) const override;
   void writePlt(uint8_t *buf, const Symbol &sym,
                 uint64_t pltEntryAddr) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
+  void applyJumpInstrMod(uint8_t *loc, JumpModType type,
+                         unsigned size) const override;
 
   RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
                           RelExpr expr) const override;
-  void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
-  void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relaxGot(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
+  void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
+  void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                      uint64_t val) const override;
   bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
                                         uint8_t stOther) const override;
+  bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+                             InputSection *nextIS) const override;
 };
 } // namespace
 
+// This is vector of NOP instructions of sizes from 1 to 8 bytes.  The
+// appropriately sized instructions are used to fill the gaps between sections
+// which are executed during fall through.
+static const std::vector<std::vector<uint8_t>> nopInstructions = {
+    {0x90},
+    {0x66, 0x90},
+    {0x0f, 0x1f, 0x00},
+    {0x0f, 0x1f, 0x40, 0x00},
+    {0x0f, 0x1f, 0x44, 0x00, 0x00},
+    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+    {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+    {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
+
 X86_64::X86_64() {
   copyRel = R_X86_64_COPY;
   gotRel = R_X86_64_GLOB_DAT;
@@ -65,6 +89,7 @@ X86_64::X86_64() {
   pltEntrySize = 16;
   ipltEntrySize = 16;
   trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
+  nopInstrs = nopInstructions;
 
   // Align to the large page size (known as a superpage or huge page).
   // FreeBSD automatically promotes large, superpage-aligned allocations.
@@ -73,6 +98,216 @@ X86_64::X86_64() {
 
 int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
 
+// Opcodes for the different X86_64 jmp instructions.
+enum JmpInsnOpcode : uint32_t {
+  J_JMP_32,
+  J_JNE_32,
+  J_JE_32,
+  J_JG_32,
+  J_JGE_32,
+  J_JB_32,
+  J_JBE_32,
+  J_JL_32,
+  J_JLE_32,
+  J_JA_32,
+  J_JAE_32,
+  J_UNKNOWN,
+};
+
+// Given the first (optional) and second byte of the insn's opcode, this
+// returns the corresponding enum value.
+static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
+                                    const uint8_t *second) {
+  if (*second == 0xe9)
+    return J_JMP_32;
+
+  if (first == nullptr)
+    return J_UNKNOWN;
+
+  if (*first == 0x0f) {
+    switch (*second) {
+    case 0x84:
+      return J_JE_32;
+    case 0x85:
+      return J_JNE_32;
+    case 0x8f:
+      return J_JG_32;
+    case 0x8d:
+      return J_JGE_32;
+    case 0x82:
+      return J_JB_32;
+    case 0x86:
+      return J_JBE_32;
+    case 0x8c:
+      return J_JL_32;
+    case 0x8e:
+      return J_JLE_32;
+    case 0x87:
+      return J_JA_32;
+    case 0x83:
+      return J_JAE_32;
+    }
+  }
+  return J_UNKNOWN;
+}
+
+// Return the relocation index for input section IS with a specific Offset.
+// Returns the maximum size of the vector if no such relocation is found.
+static unsigned getRelocationWithOffset(const InputSection &is,
+                                        uint64_t offset) {
+  unsigned size = is.relocations.size();
+  for (unsigned i = size - 1; i + 1 > 0; --i) {
+    if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE)
+      return i;
+  }
+  return size;
+}
+
+// Returns true if R corresponds to a relocation used for a jump instruction.
+// TODO: Once special relocations for relaxable jump instructions are available,
+// this should be modified to use those relocations.
+static bool isRelocationForJmpInsn(Relocation &R) {
+  return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
+         R.type == R_X86_64_PC8;
+}
+
+// Return true if Relocation R points to the first instruction in the
+// next section.
+// TODO: Delete this once psABI reserves a new relocation type for fall thru
+// jumps.
+static bool isFallThruRelocation(InputSection &is, InputFile *file,
+                                 InputSection *nextIS, Relocation &r) {
+  if (!isRelocationForJmpInsn(r))
+    return false;
+
+  uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
+  uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
+      file, r.type, r.addend, addrLoc, *r.sym, r.expr);
+
+  // If this jmp is a fall thru, the target offset is the beginning of the
+  // next section.
+  uint64_t nextSectionOffset =
+      nextIS->getOutputSection()->addr + nextIS->outSecOff;
+  return (addrLoc + 4 + targetOffset) == nextSectionOffset;
+}
+
+// Return the jmp instruction opcode that is the inverse of the given
+// opcode.  For example, JE inverted is JNE.
+static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
+  switch (opcode) {
+  case J_JE_32:
+    return J_JNE_32;
+  case J_JNE_32:
+    return J_JE_32;
+  case J_JG_32:
+    return J_JLE_32;
+  case J_JGE_32:
+    return J_JL_32;
+  case J_JB_32:
+    return J_JAE_32;
+  case J_JBE_32:
+    return J_JA_32;
+  case J_JL_32:
+    return J_JGE_32;
+  case J_JLE_32:
+    return J_JG_32;
+  case J_JA_32:
+    return J_JBE_32;
+  case J_JAE_32:
+    return J_JB_32;
+  default:
+    return J_UNKNOWN;
+  }
+}
+
+// Deletes direct jump instruction in input sections that jumps to the
+// following section as it is not required.  If there are two consecutive jump
+// instructions, it checks if they can be flipped and one can be deleted.
+// For example:
+// .section .text
+// a.BB.foo:
+//    ...
+//    10: jne aa.BB.foo
+//    16: jmp bar
+// aa.BB.foo:
+//    ...
+//
+// can be converted to:
+// a.BB.foo:
+//   ...
+//   10: je bar  #jne flipped to je and the jmp is deleted.
+// aa.BB.foo:
+//   ...
+bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+                                   InputSection *nextIS) const {
+  const unsigned sizeOfDirectJmpInsn = 5;
+
+  if (nextIS == nullptr)
+    return false;
+
+  if (is.getSize() < sizeOfDirectJmpInsn)
+    return false;
+
+  // If this jmp insn can be removed, it is the last insn and the
+  // relocation is 4 bytes before the end.
+  unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
+  if (rIndex == is.relocations.size())
+    return false;
+
+  Relocation &r = is.relocations[rIndex];
+
+  // Check if the relocation corresponds to a direct jmp.
+  const uint8_t *secContents = is.data().data();
+  // If it is not a direct jmp instruction, there is nothing to do here.
+  if (*(secContents + r.offset - 1) != 0xe9)
+    return false;
+
+  if (isFallThruRelocation(is, file, nextIS, r)) {
+    // This is a fall thru and can be deleted.
+    r.expr = R_NONE;
+    r.offset = 0;
+    is.drop_back(sizeOfDirectJmpInsn);
+    is.nopFiller = true;
+    return true;
+  }
+
+  // Now, check if flip and delete is possible.
+  const unsigned sizeOfJmpCCInsn = 6;
+  // To flip, there must be atleast one JmpCC and one direct jmp.
+  if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
+    return 0;
+
+  unsigned rbIndex =
+      getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
+  if (rbIndex == is.relocations.size())
+    return 0;
+
+  Relocation &rB = is.relocations[rbIndex];
+
+  const uint8_t *jmpInsnB = secContents + rB.offset - 1;
+  JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
+  if (jmpOpcodeB == J_UNKNOWN)
+    return false;
+
+  if (!isFallThruRelocation(is, file, nextIS, rB))
+    return false;
+
+  // jmpCC jumps to the fall thru block, the branch can be flipped and the
+  // jmp can be deleted.
+  JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
+  if (jInvert == J_UNKNOWN)
+    return false;
+  is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4});
+  // Move R's values to rB except the offset.
+  rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
+  // Cancel R
+  r.expr = R_NONE;
+  r.offset = 0;
+  is.drop_back(sizeOfDirectJmpInsn);
+  is.nopFiller = true;
+  return true;
+}
+
 RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
                            const uint8_t *loc) const {
   if (type == R_X86_64_GOTTPOFF)
@@ -177,8 +412,9 @@ RelType X86_64::getDynRel(RelType type) const {
   return R_X86_64_NONE;
 }
 
-void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  if (type == R_X86_64_TLSGD) {
+void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+                            uint64_t val) const {
+  if (rel.type == R_X86_64_TLSGD) {
     // Convert
     //   .byte 0x66
     //   leaq x@tlsgd(%rip), %rdi
@@ -201,7 +437,7 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
     //   lea x@tlsgd(%rip), %rax
     //   call *(%rax)
     // to the following two instructions.
-    assert(type == R_X86_64_GOTPC32_TLSDESC);
+    assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
     if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
       error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
                                         "in callq *x@tlsdesc(%rip), %rax");
@@ -217,8 +453,9 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
   }
 }
 
-void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
-  if (type == R_X86_64_TLSGD) {
+void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+                            uint64_t val) const {
+  if (rel.type == R_X86_64_TLSGD) {
     // Convert
     //   .byte 0x66
     //   leaq x@tlsgd(%rip), %rdi
@@ -241,7 +478,7 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
     //   lea x@tlsgd(%rip), %rax
     //   call *(%rax)
     // to the following two instructions.
-    assert(type == R_X86_64_GOTPC32_TLSDESC);
+    assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
     if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
       error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
                                         "in callq *x@tlsdesc(%rip), %rax");
@@ -258,7 +495,8 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
 
 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
 // R_X86_64_TPOFF32 so that it does not use GOT.
-void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &,
+                            uint64_t val) const {
   uint8_t *inst = loc - 3;
   uint8_t reg = loc[-1] >> 3;
   uint8_t *regSlot = loc - 1;
@@ -299,12 +537,13 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
   write32le(loc, val + 4);
 }
 
-void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
-  if (type == R_X86_64_DTPOFF64) {
+void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+                            uint64_t val) const {
+  if (rel.type == R_X86_64_DTPOFF64) {
     write64le(loc, val);
     return;
   }
-  if (type == R_X86_64_DTPOFF32) {
+  if (rel.type == R_X86_64_DTPOFF32) {
     write32le(loc, val);
     return;
   }
@@ -347,26 +586,114 @@ void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
         "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
 }
 
-void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
+// A JumpInstrMod at a specific offset indicates that the jump instruction
+// opcode at that offset must be modified.  This is specifically used to relax
+// jump instructions with basic block sections.  This function looks at the
+// JumpMod and effects the change.
+void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
+                               unsigned size) const {
   switch (type) {
+  case J_JMP_32:
+    if (size == 4)
+      *loc = 0xe9;
+    else
+      *loc = 0xeb;
+    break;
+  case J_JE_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x84;
+    } else
+      *loc = 0x74;
+    break;
+  case J_JNE_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x85;
+    } else
+      *loc = 0x75;
+    break;
+  case J_JG_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x8f;
+    } else
+      *loc = 0x7f;
+    break;
+  case J_JGE_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x8d;
+    } else
+      *loc = 0x7d;
+    break;
+  case J_JB_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x82;
+    } else
+      *loc = 0x72;
+    break;
+  case J_JBE_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x86;
+    } else
+      *loc = 0x76;
+    break;
+  case J_JL_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x8c;
+    } else
+      *loc = 0x7c;
+    break;
+  case J_JLE_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x8e;
+    } else
+      *loc = 0x7e;
+    break;
+  case J_JA_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x87;
+    } else
+      *loc = 0x77;
+    break;
+  case J_JAE_32:
+    if (size == 4) {
+      loc[-1] = 0x0f;
+      *loc = 0x83;
+    } else
+      *loc = 0x73;
+    break;
+  case J_UNKNOWN:
+    llvm_unreachable("Unknown Jump Relocation");
+  }
+}
+
+void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_X86_64_8:
-    checkIntUInt(loc, val, 8, type);
+    checkIntUInt(loc, val, 8, rel);
     *loc = val;
     break;
   case R_X86_64_PC8:
-    checkInt(loc, val, 8, type);
+    checkInt(loc, val, 8, rel);
     *loc = val;
     break;
   case R_X86_64_16:
-    checkIntUInt(loc, val, 16, type);
+    checkIntUInt(loc, val, 16, rel);
     write16le(loc, val);
     break;
   case R_X86_64_PC16:
-    checkInt(loc, val, 16, type);
+    checkInt(loc, val, 16, rel);
     write16le(loc, val);
     break;
   case R_X86_64_32:
-    checkUInt(loc, val, 32, type);
+    checkUInt(loc, val, 32, rel);
     write32le(loc, val);
     break;
   case R_X86_64_32S:
@@ -384,7 +711,7 @@ void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
   case R_X86_64_TLSLD:
   case R_X86_64_DTPOFF32:
   case R_X86_64_SIZE32:
-    checkInt(loc, val, 32, type);
+    checkInt(loc, val, 32, rel);
     write32le(loc, val);
     break;
   case R_X86_64_64:
@@ -495,7 +822,7 @@ static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
   write32le(loc, val);
 }
 
-void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const {
+void X86_64::relaxGot(uint8_t *loc, const Relocation &, uint64_t val) const {
   const uint8_t op = loc[-2];
   const uint8_t modRm = loc[-1];
 
@@ -758,7 +1085,4 @@ static TargetInfo *getTargetInfo() {
   return &t;
 }
 
-TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); }
-
-} // namespace elf
-} // namespace lld
+TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); }
author	Dimitry Andric <dim@FreeBSD.org>	2020-07-26 19:36:28 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2020-07-26 19:36:28 +0000
commit	cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree	209fb2a2d68f8f277793fc8df46c753d31bc853b /lld/ELF/Arch
parent	706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)