1 files changed, 284 insertions, 42 deletions
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index de1023346aa5..fd90557cc4f6 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -18,9 +18,8 @@
 using namespace llvm;
 using namespace llvm::support::endian;
 using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
 
 namespace {
 class ARM final : public TargetInfo {
@@ -43,7 +42,8 @@ public:
                   int64_t a) const override;
   uint32_t getThunkSectionSpacing() const override;
   bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
-  void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+  void relocate(uint8_t *loc, const Relocation &rel,
+                uint64_t val) const override;
 };
 } // namespace
 
@@ -64,6 +64,7 @@ ARM::ARM() {
   ipltEntrySize = 16;
   trapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
   needsThunks = true;
+  defaultMaxPageSize = 65536;
 }
 
 uint32_t ARM::calcEFlags() const {
@@ -120,6 +121,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
     return R_TLSGD_PC;
   case R_ARM_TLS_LDM32:
     return R_TLSLD_PC;
+  case R_ARM_TLS_LDO32:
+    return R_DTPREL;
   case R_ARM_BASE_PREL:
     // B(S) + A - P
     // FIXME: currently B(S) assumed to be .got, this may not hold for all
@@ -131,6 +134,19 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
   case R_ARM_THM_MOVW_PREL_NC:
   case R_ARM_THM_MOVT_PREL:
     return R_PC;
+  case R_ARM_ALU_PC_G0:
+  case R_ARM_LDR_PC_G0:
+  case R_ARM_THM_ALU_PREL_11_0:
+  case R_ARM_THM_PC8:
+  case R_ARM_THM_PC12:
+    return R_ARM_PCA;
+  case R_ARM_MOVW_BREL_NC:
+  case R_ARM_MOVW_BREL:
+  case R_ARM_MOVT_BREL:
+  case R_ARM_THM_MOVW_BREL_NC:
+  case R_ARM_THM_MOVW_BREL:
+  case R_ARM_THM_MOVT_BREL:
+    return R_ARM_SBREL;
   case R_ARM_NONE:
     return R_NONE;
   case R_ARM_TLS_LE32:
@@ -262,7 +278,8 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
 }
 
 bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
-                     uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const {
+                     uint64_t branchAddr, const Symbol &s,
+                     int64_t /*a*/) const {
   // If S is an undefined weak symbol and does not have a PLT entry then it
   // will be resolved as a branch to the next instruction.
   if (s.isUndefWeak() && !s.isInPlt())
@@ -275,8 +292,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_PLT32:
   case R_ARM_JUMP24:
     // Source is ARM, all PLT entries are ARM so no interworking required.
-    // Otherwise we need to interwork if Symbol has bit 0 set (Thumb).
-    if (expr == R_PC && ((s.getVA() & 1) == 1))
+    // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
+    if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
       return true;
     LLVM_FALLTHROUGH;
   case R_ARM_CALL: {
@@ -286,8 +303,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
   case R_ARM_THM_JUMP19:
   case R_ARM_THM_JUMP24:
     // Source is Thumb, all PLT entries are ARM so interworking is required.
-    // Otherwise we need to interwork if Symbol has bit 0 clear (ARM).
-    if (expr == R_PLT_PC || ((s.getVA() & 1) == 0))
+    // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
+    if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
       return true;
     LLVM_FALLTHROUGH;
   case R_ARM_THM_CALL: {
@@ -375,8 +392,82 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
   return distance <= range;
 }
 
-void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
-  switch (type) {
+// Helper to produce message text when LLD detects that a CALL relocation to
+// a non STT_FUNC symbol that may result in incorrect interworking between ARM
+// or Thumb.
+static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
+  assert(!s.isFunc());
+  if (s.isSection()) {
+    // Section symbols must be defined and in a section. Users cannot change
+    // the type. Use the section name as getName() returns an empty string.
+    warn(getErrorLocation(loc) + "branch and link relocation: " +
+         toString(relt) + " to STT_SECTION symbol " +
+         cast<Defined>(s).section->name + " ; interworking not performed");
+  } else {
+    // Warn with hint on how to alter the symbol type.
+    warn(getErrorLocation(loc) + "branch and link relocation: " +
+         toString(relt) + " to non STT_FUNC symbol: " + s.getName() +
+         " interworking not performed; consider using directive '.type " +
+         s.getName() +
+         ", %function' to give symbol type STT_FUNC if"
+         " interworking between ARM and Thumb is required");
+  }
+}
+
+// Utility functions taken from ARMAddressingModes.h, only changes are LLD
+// coding style.
+
+// Rotate a 32-bit unsigned value right by a specified amt of bits.
+static uint32_t rotr32(uint32_t val, uint32_t amt) {
+  assert(amt < 32 && "Invalid rotate amount");
+  return (val >> amt) | (val << ((32 - amt) & 31));
+}
+
+// Rotate a 32-bit unsigned value left by a specified amt of bits.
+static uint32_t rotl32(uint32_t val, uint32_t amt) {
+  assert(amt < 32 && "Invalid rotate amount");
+  return (val << amt) | (val >> ((32 - amt) & 31));
+}
+
+// Try to encode a 32-bit unsigned immediate imm with an immediate shifter
+// operand, this form is an 8-bit immediate rotated right by an even number of
+// bits. We compute the rotate amount to use.  If this immediate value cannot be
+// handled with a single shifter-op, determine a good rotate amount that will
+// take a maximal chunk of bits out of the immediate.
+static uint32_t getSOImmValRotate(uint32_t imm) {
+  // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+  // of zero.
+  if ((imm & ~255U) == 0)
+    return 0;
+
+  // Use CTZ to compute the rotate amount.
+  unsigned tz = llvm::countTrailingZeros(imm);
+
+  // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+  // not 9.
+  unsigned rotAmt = tz & ~1;
+
+  // If we can handle this spread, return it.
+  if ((rotr32(imm, rotAmt) & ~255U) == 0)
+    return (32 - rotAmt) & 31; // HW rotates right, not left.
+
+  // For values like 0xF000000F, we should ignore the low 6 bits, then
+  // retry the hunt.
+  if (imm & 63U) {
+    unsigned tz2 = countTrailingZeros(imm & ~63U);
+    unsigned rotAmt2 = tz2 & ~1;
+    if ((rotr32(imm, rotAmt2) & ~255U) == 0)
+      return (32 - rotAmt2) & 31; // HW rotates right, not left.
+  }
+
+  // Otherwise, we have no way to cover this span of bits with a single
+  // shifter_op immediate.  Return a chunk of bits that will be useful to
+  // handle.
+  return (32 - rotAmt) & 31; // HW rotates right, not left.
+}
+
+void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+  switch (rel.type) {
   case R_ARM_ABS32:
   case R_ARM_BASE_PREL:
   case R_ARM_GOTOFF32:
@@ -397,40 +488,49 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     write32le(loc, val);
     break;
   case R_ARM_PREL31:
-    checkInt(loc, val, 31, type);
+    checkInt(loc, val, 31, rel);
     write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000));
     break;
-  case R_ARM_CALL:
-    // R_ARM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if (val & 1) {
-      // If bit 0 of Val is 1 the target is Thumb, we must select a BLX.
+  case R_ARM_CALL: {
+    // R_ARM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we don't need to interwork.
+    assert(rel.sym); // R_ARM_CALL is always reached via relocate().
+    bool bit0Thumb = val & 1;
+    bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000;
+    // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
+    // even when type not STT_FUNC.
+    if (!rel.sym->isFunc() && isBlx != bit0Thumb)
+      stateChangeWarning(loc, rel.type, *rel.sym);
+    if (rel.sym->isFunc() ? bit0Thumb : isBlx) {
       // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
-      checkInt(loc, val, 26, type);
+      checkInt(loc, val, 26, rel);
       write32le(loc, 0xfa000000 |                    // opcode
                          ((val & 2) << 23) |         // H
                          ((val >> 2) & 0x00ffffff)); // imm24
       break;
     }
-    if ((read32le(loc) & 0xfe000000) == 0xfa000000)
-      // BLX (always unconditional) instruction to an ARM Target, select an
-      // unconditional BL.
-      write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
+    // BLX (always unconditional) instruction to an ARM Target, select an
+    // unconditional BL.
+    write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff));
     // fall through as BL encoding is shared with B
+  }
     LLVM_FALLTHROUGH;
   case R_ARM_JUMP24:
   case R_ARM_PC24:
   case R_ARM_PLT32:
-    checkInt(loc, val, 26, type);
+    checkInt(loc, val, 26, rel);
     write32le(loc, (read32le(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff));
     break;
   case R_ARM_THM_JUMP11:
-    checkInt(loc, val, 12, type);
+    checkInt(loc, val, 12, rel);
     write16le(loc, (read32le(loc) & 0xf800) | ((val >> 1) & 0x07ff));
     break;
   case R_ARM_THM_JUMP19:
     // Encoding T3: Val = S:J2:J1:imm6:imm11:0
-    checkInt(loc, val, 21, type);
+    checkInt(loc, val, 21, rel);
     write16le(loc,
               (read16le(loc) & 0xfbc0) |   // opcode cond
                   ((val >> 10) & 0x0400) | // S
@@ -441,20 +541,32 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
                   ((val >> 5) & 0x2000) | // J1
                   ((val >> 1) & 0x07ff)); // imm11
     break;
-  case R_ARM_THM_CALL:
-    // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the
-    // value of bit 0 of Val, we must select a BL or BLX instruction
-    if ((val & 1) == 0) {
-      // Ensure BLX destination is 4-byte aligned. As BLX instruction may
-      // only be two byte aligned. This must be done before overflow check
+  case R_ARM_THM_CALL: {
+    // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
+    // STT_FUNC we choose whether to write a BL or BLX depending on the
+    // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is
+    // not of type STT_FUNC then we must preserve the original instruction.
+    // PLT entries are always ARM state so we know we need to interwork.
+    assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
+    bool bit0Thumb = val & 1;
+    bool isBlx = (read16le(loc + 2) & 0x1000) == 0;
+    // lld 10.0 and before always used bit0Thumb when deciding to write a BLX
+    // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
+    if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb)
+      stateChangeWarning(loc, rel.type, *rel.sym);
+    if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
+      // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
+      // the BLX instruction may only be two byte aligned. This must be done
+      // before overflow check.
       val = alignTo(val, 4);
+      write16le(loc + 2, read16le(loc + 2) & ~0x1000);
+    } else {
+      write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12);
     }
-    // Bit 12 is 0 for BLX, 1 for BL
-    write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12);
     if (!config->armJ1J2BranchEncoding) {
       // Older Arm architectures do not support R_ARM_THM_JUMP24 and have
       // different encoding rules and range due to J1 and J2 always being 1.
-      checkInt(loc, val, 23, type);
+      checkInt(loc, val, 23, rel);
       write16le(loc,
                 0xf000 |                     // opcode
                     ((val >> 12) & 0x07ff)); // imm11
@@ -464,11 +576,12 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
                     ((val >> 1) & 0x07ff));    // imm11
       break;
     }
+  }
     // Fall through as rest of encoding is the same as B.W
     LLVM_FALLTHROUGH;
   case R_ARM_THM_JUMP24:
     // Encoding B  T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
-    checkInt(loc, val, 25, type);
+    checkInt(loc, val, 25, rel);
     write16le(loc,
               0xf000 |                     // opcode
                   ((val >> 14) & 0x0400) | // S
@@ -481,16 +594,19 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_ARM_MOVW_ABS_NC:
   case R_ARM_MOVW_PREL_NC:
+  case R_ARM_MOVW_BREL_NC:
     write32le(loc, (read32le(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) |
                        (val & 0x0fff));
     break;
   case R_ARM_MOVT_ABS:
   case R_ARM_MOVT_PREL:
+  case R_ARM_MOVT_BREL:
     write32le(loc, (read32le(loc) & ~0x000f0fff) |
                        (((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff));
     break;
   case R_ARM_THM_MOVT_ABS:
   case R_ARM_THM_MOVT_PREL:
+  case R_ARM_THM_MOVT_BREL:
     // Encoding T1: A = imm4:i:imm3:imm8
     write16le(loc,
               0xf2c0 |                     // opcode
@@ -503,6 +619,7 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
     break;
   case R_ARM_THM_MOVW_ABS_NC:
   case R_ARM_THM_MOVW_PREL_NC:
+  case R_ARM_THM_MOVW_BREL_NC:
     // Encoding T3: A = imm4:i:imm3:imm8
     write16le(loc,
               0xf240 |                     // opcode
@@ -513,8 +630,92 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
                   ((val << 4) & 0x7000) |    // imm3
                   (val & 0x00ff));           // imm8
     break;
+  case R_ARM_ALU_PC_G0: {
+    // ADR (literal) add = bit23, sub = bit22
+    // literal is a 12-bit modified immediate, made up of a 4-bit even rotate
+    // right and an 8-bit immediate. The code-sequence here is derived from
+    // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we
+    // want to give an error if we cannot encode the constant.
+    uint32_t opcode = 0x00800000;
+    if (val >> 63) {
+      opcode = 0x00400000;
+      val = ~val + 1;
+    }
+    if ((val & ~255U) != 0) {
+      uint32_t rotAmt = getSOImmValRotate(val);
+      // Error if we cannot encode this with a single shift
+      if (rotr32(~255U, rotAmt) & val)
+        error(getErrorLocation(loc) + "unencodeable immediate " +
+              Twine(val).str() + " for relocation " + toString(rel.type));
+      val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8);
+    }
+    write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val);
+    break;
+  }
+  case R_ARM_LDR_PC_G0: {
+    // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - P.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    // LDR (literal) u = bit23
+    int64_t imm = val;
+    uint32_t u = 0x00800000;
+    if (imm < 0) {
+      imm = -imm;
+      u = 0;
+    }
+    checkUInt(loc, imm, 12, rel);
+    write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm);
+    break;
+  }
+  case R_ARM_THM_ALU_PREL_11_0: {
+    // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
+    int64_t imm = val;
+    uint16_t sub = 0;
+    if (imm < 0) {
+      imm = -imm;
+      sub = 0x00a0;
+    }
+    checkUInt(loc, imm, 12, rel);
+    write16le(loc, (read16le(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1);
+    write16le(loc + 2,
+              (read16le(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff));
+    break;
+  }
+  case R_ARM_THM_PC8:
+    // ADR and LDR literal encoding T1 positive offset only imm8:00
+    // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - Pa.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    checkUInt(loc, val, 10, rel);
+    checkAlignment(loc, val, 4, rel);
+    write16le(loc, (read16le(loc) & 0xff00) | (val & 0x3fc) >> 2);
+    break;
+  case R_ARM_THM_PC12: {
+    // LDR (literal) encoding T2, add = (U == '1') imm12
+    // imm12 is unsigned
+    // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
+    // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+    // bottom bit to recover S + A - Pa.
+    if (rel.sym->isFunc())
+      val &= ~0x1;
+    int64_t imm12 = val;
+    uint16_t u = 0x0080;
+    if (imm12 < 0) {
+      imm12 = -imm12;
+      u = 0;
+    }
+    checkUInt(loc, imm12, 12, rel);
+    write16le(loc, read16le(loc) | u);
+    write16le(loc + 2, (read16le(loc + 2) & 0xf000) | imm12);
+    break;
+  }
   default:
-    error(getErrorLocation(loc) + "unrecognized relocation " + toString(type));
+    error(getErrorLocation(loc) + "unrecognized relocation " +
+          toString(rel.type));
   }
 }
 
@@ -582,14 +783,18 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
   case R_ARM_MOVW_ABS_NC:
   case R_ARM_MOVT_ABS:
   case R_ARM_MOVW_PREL_NC:
-  case R_ARM_MOVT_PREL: {
+  case R_ARM_MOVT_PREL:
+  case R_ARM_MOVW_BREL_NC:
+  case R_ARM_MOVT_BREL: {
     uint64_t val = read32le(buf) & 0x000f0fff;
     return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff));
   }
   case R_ARM_THM_MOVW_ABS_NC:
   case R_ARM_THM_MOVT_ABS:
   case R_ARM_THM_MOVW_PREL_NC:
-  case R_ARM_THM_MOVT_PREL: {
+  case R_ARM_THM_MOVT_PREL:
+  case R_ARM_THM_MOVW_BREL_NC:
+  case R_ARM_THM_MOVT_BREL: {
     // Encoding T3: A = imm4:i:imm3:imm8
     uint16_t hi = read16le(buf);
     uint16_t lo = read16le(buf + 2);
@@ -598,13 +803,50 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
                             ((lo & 0x7000) >> 4) |  // imm3
                             (lo & 0x00ff));         // imm8
   }
+  case R_ARM_ALU_PC_G0: {
+    // 12-bit immediate is a modified immediate made up of a 4-bit even
+    // right rotation and 8-bit constant. After the rotation the value
+    // is zero-extended. When bit 23 is set the instruction is an add, when
+    // bit 22 is set it is a sub.
+    uint32_t instr = read32le(buf);
+    uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);
+    return (instr & 0x00400000) ? -val : val;
+  }
+  case R_ARM_LDR_PC_G0: {
+    // ADR (literal) add = bit23, sub = bit22
+    // LDR (literal) u = bit23 unsigned imm12
+    bool u = read32le(buf) & 0x00800000;
+    uint32_t imm12 = read32le(buf) & 0xfff;
+    return u ? imm12 : -imm12;
+  }
+  case R_ARM_THM_ALU_PREL_11_0: {
+    // Thumb2 ADR, which is an alias for a sub or add instruction with an
+    // unsigned immediate.
+    // ADR encoding T2 (sub), T3 (add) i:imm3:imm8
+    uint16_t hi = read16le(buf);
+    uint16_t lo = read16le(buf + 2);
+    uint64_t imm = (hi & 0x0400) << 1 | // i
+                   (lo & 0x7000) >> 4 | // imm3
+                   (lo & 0x00ff);       // imm8
+    // For sub, addend is negative, add is positive.
+    return (hi & 0x00f0) ? -imm : imm;
+  }
+  case R_ARM_THM_PC8:
+    // ADR and LDR (literal) encoding T1
+    // From ELF for the ARM Architecture the initial signed addend is formed
+    // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4)
+    // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff
+    return ((((read16le(buf) & 0xff) << 2) + 4) & 0x3ff) - 4;
+  case R_ARM_THM_PC12: {
+    // LDR (literal) encoding T2, add = (U == '1') imm12
+    bool u = read16le(buf) & 0x0080;
+    uint64_t imm12 = read16le(buf + 2) & 0x0fff;
+    return u ? imm12 : -imm12;
+  }
   }
 }
 
-TargetInfo *getARMTargetInfo() {
+TargetInfo *elf::getARMTargetInfo() {
   static ARM target;
   return &target;
 }
-
-} // namespace elf
-} // namespace lld