diff options
Diffstat (limited to 'lld/ELF/Arch/ARM.cpp')
-rw-r--r-- | lld/ELF/Arch/ARM.cpp | 326 |
1 files changed, 284 insertions, 42 deletions
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index de1023346aa5..fd90557cc4f6 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -18,9 +18,8 @@ using namespace llvm; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class ARM final : public TargetInfo { @@ -43,7 +42,8 @@ public: int64_t a) const override; uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; }; } // namespace @@ -64,6 +64,7 @@ ARM::ARM() { ipltEntrySize = 16; trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; needsThunks = true; + defaultMaxPageSize = 65536; } uint32_t ARM::calcEFlags() const { @@ -120,6 +121,8 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, return R_TLSGD_PC; case R_ARM_TLS_LDM32: return R_TLSLD_PC; + case R_ARM_TLS_LDO32: + return R_DTPREL; case R_ARM_BASE_PREL: // B(S) + A - P // FIXME: currently B(S) assumed to be .got, this may not hold for all @@ -131,6 +134,19 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s, case R_ARM_THM_MOVW_PREL_NC: case R_ARM_THM_MOVT_PREL: return R_PC; + case R_ARM_ALU_PC_G0: + case R_ARM_LDR_PC_G0: + case R_ARM_THM_ALU_PREL_11_0: + case R_ARM_THM_PC8: + case R_ARM_THM_PC12: + return R_ARM_PCA; + case R_ARM_MOVW_BREL_NC: + case R_ARM_MOVW_BREL: + case R_ARM_MOVT_BREL: + case R_ARM_THM_MOVW_BREL_NC: + case R_ARM_THM_MOVW_BREL: + case R_ARM_THM_MOVT_BREL: + return R_ARM_SBREL; case R_ARM_NONE: return R_NONE; case R_ARM_TLS_LE32: @@ -262,7 +278,8 @@ void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, - uint64_t branchAddr, const Symbol &s, int64_t /*a*/) const { + uint64_t branchAddr, const Symbol &s, + int64_t /*a*/) const { // If S is an undefined weak symbol and does not have a PLT entry then it // will be resolved as a branch to the next instruction. if (s.isUndefWeak() && !s.isInPlt()) @@ -275,8 +292,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_PLT32: case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. - // Otherwise we need to interwork if Symbol has bit 0 set (Thumb). - if (expr == R_PC && ((s.getVA() & 1) == 1)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). + if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; LLVM_FALLTHROUGH; case R_ARM_CALL: { @@ -286,8 +303,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: // Source is Thumb, all PLT entries are ARM so interworking is required. - // Otherwise we need to interwork if Symbol has bit 0 clear (ARM). - if (expr == R_PLT_PC || ((s.getVA() & 1) == 0)) + // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). + if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) return true; LLVM_FALLTHROUGH; case R_ARM_THM_CALL: { @@ -375,8 +392,82 @@ bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { return distance <= range; } -void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { - switch (type) { +// Helper to produce message text when LLD detects that a CALL relocation to +// a non STT_FUNC symbol that may result in incorrect interworking between ARM +// or Thumb. +static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) { + assert(!s.isFunc()); + if (s.isSection()) { + // Section symbols must be defined and in a section. Users cannot change + // the type. Use the section name as getName() returns an empty string. + warn(getErrorLocation(loc) + "branch and link relocation: " + + toString(relt) + " to STT_SECTION symbol " + + cast<Defined>(s).section->name + " ; interworking not performed"); + } else { + // Warn with hint on how to alter the symbol type. + warn(getErrorLocation(loc) + "branch and link relocation: " + + toString(relt) + " to non STT_FUNC symbol: " + s.getName() + + " interworking not performed; consider using directive '.type " + + s.getName() + + ", %function' to give symbol type STT_FUNC if" + " interworking between ARM and Thumb is required"); + } +} + +// Utility functions taken from ARMAddressingModes.h, only changes are LLD +// coding style. + +// Rotate a 32-bit unsigned value right by a specified amt of bits. +static uint32_t rotr32(uint32_t val, uint32_t amt) { + assert(amt < 32 && "Invalid rotate amount"); + return (val >> amt) | (val << ((32 - amt) & 31)); +} + +// Rotate a 32-bit unsigned value left by a specified amt of bits. +static uint32_t rotl32(uint32_t val, uint32_t amt) { + assert(amt < 32 && "Invalid rotate amount"); + return (val << amt) | (val >> ((32 - amt) & 31)); +} + +// Try to encode a 32-bit unsigned immediate imm with an immediate shifter +// operand, this form is an 8-bit immediate rotated right by an even number of +// bits. We compute the rotate amount to use. If this immediate value cannot be +// handled with a single shifter-op, determine a good rotate amount that will +// take a maximal chunk of bits out of the immediate. +static uint32_t getSOImmValRotate(uint32_t imm) { + // 8-bit (or less) immediates are trivially shifter_operands with a rotate + // of zero. + if ((imm & ~255U) == 0) + return 0; + + // Use CTZ to compute the rotate amount. + unsigned tz = llvm::countTrailingZeros(imm); + + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, + // not 9. + unsigned rotAmt = tz & ~1; + + // If we can handle this spread, return it. + if ((rotr32(imm, rotAmt) & ~255U) == 0) + return (32 - rotAmt) & 31; // HW rotates right, not left. + + // For values like 0xF000000F, we should ignore the low 6 bits, then + // retry the hunt. + if (imm & 63U) { + unsigned tz2 = countTrailingZeros(imm & ~63U); + unsigned rotAmt2 = tz2 & ~1; + if ((rotr32(imm, rotAmt2) & ~255U) == 0) + return (32 - rotAmt2) & 31; // HW rotates right, not left. + } + + // Otherwise, we have no way to cover this span of bits with a single + // shifter_op immediate. Return a chunk of bits that will be useful to + // handle. + return (32 - rotAmt) & 31; // HW rotates right, not left. +} + +void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_ARM_ABS32: case R_ARM_BASE_PREL: case R_ARM_GOTOFF32: @@ -397,40 +488,49 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val); break; case R_ARM_PREL31: - checkInt(loc, val, 31, type); + checkInt(loc, val, 31, rel); write32le(loc, (read32le(loc) & 0x80000000) | (val & ~0x80000000)); break; - case R_ARM_CALL: - // R_ARM_CALL is used for BL and BLX instructions, depending on the - // value of bit 0 of Val, we must select a BL or BLX instruction - if (val & 1) { - // If bit 0 of Val is 1 the target is Thumb, we must select a BLX. + case R_ARM_CALL: { + // R_ARM_CALL is used for BL and BLX instructions, for symbols of type + // STT_FUNC we choose whether to write a BL or BLX depending on the + // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is + // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we don't need to interwork. + assert(rel.sym); // R_ARM_CALL is always reached via relocate(). + bool bit0Thumb = val & 1; + bool isBlx = (read32le(loc) & 0xfe000000) == 0xfa000000; + // lld 10.0 and before always used bit0Thumb when deciding to write a BLX + // even when type not STT_FUNC. + if (!rel.sym->isFunc() && isBlx != bit0Thumb) + stateChangeWarning(loc, rel.type, *rel.sym); + if (rel.sym->isFunc() ? bit0Thumb : isBlx) { // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' - checkInt(loc, val, 26, type); + checkInt(loc, val, 26, rel); write32le(loc, 0xfa000000 | // opcode ((val & 2) << 23) | // H ((val >> 2) & 0x00ffffff)); // imm24 break; } - if ((read32le(loc) & 0xfe000000) == 0xfa000000) - // BLX (always unconditional) instruction to an ARM Target, select an - // unconditional BL. - write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff)); + // BLX (always unconditional) instruction to an ARM Target, select an + // unconditional BL. + write32le(loc, 0xeb000000 | (read32le(loc) & 0x00ffffff)); // fall through as BL encoding is shared with B + } LLVM_FALLTHROUGH; case R_ARM_JUMP24: case R_ARM_PC24: case R_ARM_PLT32: - checkInt(loc, val, 26, type); + checkInt(loc, val, 26, rel); write32le(loc, (read32le(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff)); break; case R_ARM_THM_JUMP11: - checkInt(loc, val, 12, type); + checkInt(loc, val, 12, rel); write16le(loc, (read32le(loc) & 0xf800) | ((val >> 1) & 0x07ff)); break; case R_ARM_THM_JUMP19: // Encoding T3: Val = S:J2:J1:imm6:imm11:0 - checkInt(loc, val, 21, type); + checkInt(loc, val, 21, rel); write16le(loc, (read16le(loc) & 0xfbc0) | // opcode cond ((val >> 10) & 0x0400) | // S @@ -441,20 +541,32 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val >> 5) & 0x2000) | // J1 ((val >> 1) & 0x07ff)); // imm11 break; - case R_ARM_THM_CALL: - // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the - // value of bit 0 of Val, we must select a BL or BLX instruction - if ((val & 1) == 0) { - // Ensure BLX destination is 4-byte aligned. As BLX instruction may - // only be two byte aligned. This must be done before overflow check + case R_ARM_THM_CALL: { + // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type + // STT_FUNC we choose whether to write a BL or BLX depending on the + // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is + // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we need to interwork. + assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). + bool bit0Thumb = val & 1; + bool isBlx = (read16le(loc + 2) & 0x1000) == 0; + // lld 10.0 and before always used bit0Thumb when deciding to write a BLX + // even when type not STT_FUNC. PLT entries generated by LLD are always ARM. + if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb) + stateChangeWarning(loc, rel.type, *rel.sym); + if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) { + // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As + // the BLX instruction may only be two byte aligned. This must be done + // before overflow check. val = alignTo(val, 4); + write16le(loc + 2, read16le(loc + 2) & ~0x1000); + } else { + write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | 1 << 12); } - // Bit 12 is 0 for BLX, 1 for BL - write16le(loc + 2, (read16le(loc + 2) & ~0x1000) | (val & 1) << 12); if (!config->armJ1J2BranchEncoding) { // Older Arm architectures do not support R_ARM_THM_JUMP24 and have // different encoding rules and range due to J1 and J2 always being 1. - checkInt(loc, val, 23, type); + checkInt(loc, val, 23, rel); write16le(loc, 0xf000 | // opcode ((val >> 12) & 0x07ff)); // imm11 @@ -464,11 +576,12 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val >> 1) & 0x07ff)); // imm11 break; } + } // Fall through as rest of encoding is the same as B.W LLVM_FALLTHROUGH; case R_ARM_THM_JUMP24: // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 - checkInt(loc, val, 25, type); + checkInt(loc, val, 25, rel); write16le(loc, 0xf000 | // opcode ((val >> 14) & 0x0400) | // S @@ -481,16 +594,19 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_ARM_MOVW_ABS_NC: case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVW_BREL_NC: write32le(loc, (read32le(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) | (val & 0x0fff)); break; case R_ARM_MOVT_ABS: case R_ARM_MOVT_PREL: + case R_ARM_MOVT_BREL: write32le(loc, (read32le(loc) & ~0x000f0fff) | (((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff)); break; case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVT_BREL: // Encoding T1: A = imm4:i:imm3:imm8 write16le(loc, 0xf2c0 | // opcode @@ -503,6 +619,7 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVW_BREL_NC: // Encoding T3: A = imm4:i:imm3:imm8 write16le(loc, 0xf240 | // opcode @@ -513,8 +630,92 @@ void ARM::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { ((val << 4) & 0x7000) | // imm3 (val & 0x00ff)); // imm8 break; + case R_ARM_ALU_PC_G0: { + // ADR (literal) add = bit23, sub = bit22 + // literal is a 12-bit modified immediate, made up of a 4-bit even rotate + // right and an 8-bit immediate. The code-sequence here is derived from + // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we + // want to give an error if we cannot encode the constant. + uint32_t opcode = 0x00800000; + if (val >> 63) { + opcode = 0x00400000; + val = ~val + 1; + } + if ((val & ~255U) != 0) { + uint32_t rotAmt = getSOImmValRotate(val); + // Error if we cannot encode this with a single shift + if (rotr32(~255U, rotAmt) & val) + error(getErrorLocation(loc) + "unencodeable immediate " + + Twine(val).str() + " for relocation " + toString(rel.type)); + val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8); + } + write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val); + break; + } + case R_ARM_LDR_PC_G0: { + // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - P. + if (rel.sym->isFunc()) + val &= ~0x1; + // LDR (literal) u = bit23 + int64_t imm = val; + uint32_t u = 0x00800000; + if (imm < 0) { + imm = -imm; + u = 0; + } + checkUInt(loc, imm, 12, rel); + write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm); + break; + } + case R_ARM_THM_ALU_PREL_11_0: { + // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 + int64_t imm = val; + uint16_t sub = 0; + if (imm < 0) { + imm = -imm; + sub = 0x00a0; + } + checkUInt(loc, imm, 12, rel); + write16le(loc, (read16le(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1); + write16le(loc + 2, + (read16le(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff)); + break; + } + case R_ARM_THM_PC8: + // ADR and LDR literal encoding T1 positive offset only imm8:00 + // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - Pa. + if (rel.sym->isFunc()) + val &= ~0x1; + checkUInt(loc, val, 10, rel); + checkAlignment(loc, val, 4, rel); + write16le(loc, (read16le(loc) & 0xff00) | (val & 0x3fc) >> 2); + break; + case R_ARM_THM_PC12: { + // LDR (literal) encoding T2, add = (U == '1') imm12 + // imm12 is unsigned + // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a + // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear + // bottom bit to recover S + A - Pa. + if (rel.sym->isFunc()) + val &= ~0x1; + int64_t imm12 = val; + uint16_t u = 0x0080; + if (imm12 < 0) { + imm12 = -imm12; + u = 0; + } + checkUInt(loc, imm12, 12, rel); + write16le(loc, read16le(loc) | u); + write16le(loc + 2, (read16le(loc + 2) & 0xf000) | imm12); + break; + } default: - error(getErrorLocation(loc) + "unrecognized relocation " + toString(type)); + error(getErrorLocation(loc) + "unrecognized relocation " + + toString(rel.type)); } } @@ -582,14 +783,18 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVT_PREL: { + case R_ARM_MOVT_PREL: + case R_ARM_MOVW_BREL_NC: + case R_ARM_MOVT_BREL: { uint64_t val = read32le(buf) & 0x000f0fff; return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff)); } case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVT_PREL: { + case R_ARM_THM_MOVT_PREL: + case R_ARM_THM_MOVW_BREL_NC: + case R_ARM_THM_MOVT_BREL: { // Encoding T3: A = imm4:i:imm3:imm8 uint16_t hi = read16le(buf); uint16_t lo = read16le(buf + 2); @@ -598,13 +803,50 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { ((lo & 0x7000) >> 4) | // imm3 (lo & 0x00ff)); // imm8 } + case R_ARM_ALU_PC_G0: { + // 12-bit immediate is a modified immediate made up of a 4-bit even + // right rotation and 8-bit constant. After the rotation the value + // is zero-extended. When bit 23 is set the instruction is an add, when + // bit 22 is set it is a sub. + uint32_t instr = read32le(buf); + uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2); + return (instr & 0x00400000) ? -val : val; + } + case R_ARM_LDR_PC_G0: { + // ADR (literal) add = bit23, sub = bit22 + // LDR (literal) u = bit23 unsigned imm12 + bool u = read32le(buf) & 0x00800000; + uint32_t imm12 = read32le(buf) & 0xfff; + return u ? imm12 : -imm12; + } + case R_ARM_THM_ALU_PREL_11_0: { + // Thumb2 ADR, which is an alias for a sub or add instruction with an + // unsigned immediate. + // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 + uint16_t hi = read16le(buf); + uint16_t lo = read16le(buf + 2); + uint64_t imm = (hi & 0x0400) << 1 | // i + (lo & 0x7000) >> 4 | // imm3 + (lo & 0x00ff); // imm8 + // For sub, addend is negative, add is positive. + return (hi & 0x00f0) ? -imm : imm; + } + case R_ARM_THM_PC8: + // ADR and LDR (literal) encoding T1 + // From ELF for the ARM Architecture the initial signed addend is formed + // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4) + // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff + return ((((read16le(buf) & 0xff) << 2) + 4) & 0x3ff) - 4; + case R_ARM_THM_PC12: { + // LDR (literal) encoding T2, add = (U == '1') imm12 + bool u = read16le(buf) & 0x0080; + uint64_t imm12 = read16le(buf + 2) & 0x0fff; + return u ? imm12 : -imm12; + } } } -TargetInfo *getARMTargetInfo() { +TargetInfo *elf::getARMTargetInfo() { static ARM target; return ⌖ } - -} // namespace elf -} // namespace lld |