aboutsummaryrefslogtreecommitdiff
path: root/lld/ELF/Arch/X86_64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lld/ELF/Arch/X86_64.cpp')
-rw-r--r--lld/ELF/Arch/X86_64.cpp386
1 files changed, 355 insertions, 31 deletions
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 74b72eb91293..24711ec210a4 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
+#include "OutputSections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@@ -18,9 +19,8 @@ using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
-
-namespace lld {
-namespace elf {
+using namespace lld;
+using namespace lld::elf;
namespace {
class X86_64 : public TargetInfo {
@@ -35,20 +35,44 @@ public:
void writePltHeader(uint8_t *buf) const override;
void writePlt(uint8_t *buf, const Symbol &sym,
uint64_t pltEntryAddr) const override;
- void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
+ void relocate(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const override;
+ void applyJumpInstrMod(uint8_t *loc, JumpModType type,
+ unsigned size) const override;
RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
RelExpr expr) const override;
- void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override;
- void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
- void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
- void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
- void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
+ void relaxGot(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const override;
+ void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const override;
+ void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const override;
+ void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const override;
+ void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const override;
bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
uint8_t stOther) const override;
+ bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+ InputSection *nextIS) const override;
};
} // namespace
+// This is vector of NOP instructions of sizes from 1 to 8 bytes. The
+// appropriately sized instructions are used to fill the gaps between sections
+// which are executed during fall through.
+static const std::vector<std::vector<uint8_t>> nopInstructions = {
+ {0x90},
+ {0x66, 0x90},
+ {0x0f, 0x1f, 0x00},
+ {0x0f, 0x1f, 0x40, 0x00},
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
+ {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
+
X86_64::X86_64() {
copyRel = R_X86_64_COPY;
gotRel = R_X86_64_GLOB_DAT;
@@ -65,6 +89,7 @@ X86_64::X86_64() {
pltEntrySize = 16;
ipltEntrySize = 16;
trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
+ nopInstrs = nopInstructions;
// Align to the large page size (known as a superpage or huge page).
// FreeBSD automatically promotes large, superpage-aligned allocations.
@@ -73,6 +98,216 @@ X86_64::X86_64() {
int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
+// Opcodes for the different X86_64 jmp instructions.
+enum JmpInsnOpcode : uint32_t {
+ J_JMP_32,
+ J_JNE_32,
+ J_JE_32,
+ J_JG_32,
+ J_JGE_32,
+ J_JB_32,
+ J_JBE_32,
+ J_JL_32,
+ J_JLE_32,
+ J_JA_32,
+ J_JAE_32,
+ J_UNKNOWN,
+};
+
+// Given the first (optional) and second byte of the insn's opcode, this
+// returns the corresponding enum value.
+static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
+ const uint8_t *second) {
+ if (*second == 0xe9)
+ return J_JMP_32;
+
+ if (first == nullptr)
+ return J_UNKNOWN;
+
+ if (*first == 0x0f) {
+ switch (*second) {
+ case 0x84:
+ return J_JE_32;
+ case 0x85:
+ return J_JNE_32;
+ case 0x8f:
+ return J_JG_32;
+ case 0x8d:
+ return J_JGE_32;
+ case 0x82:
+ return J_JB_32;
+ case 0x86:
+ return J_JBE_32;
+ case 0x8c:
+ return J_JL_32;
+ case 0x8e:
+ return J_JLE_32;
+ case 0x87:
+ return J_JA_32;
+ case 0x83:
+ return J_JAE_32;
+ }
+ }
+ return J_UNKNOWN;
+}
+
+// Return the relocation index for input section IS with a specific Offset.
+// Returns the maximum size of the vector if no such relocation is found.
+static unsigned getRelocationWithOffset(const InputSection &is,
+ uint64_t offset) {
+ unsigned size = is.relocations.size();
+ for (unsigned i = size - 1; i + 1 > 0; --i) {
+ if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE)
+ return i;
+ }
+ return size;
+}
+
+// Returns true if R corresponds to a relocation used for a jump instruction.
+// TODO: Once special relocations for relaxable jump instructions are available,
+// this should be modified to use those relocations.
+static bool isRelocationForJmpInsn(Relocation &R) {
+ return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
+ R.type == R_X86_64_PC8;
+}
+
+// Return true if Relocation R points to the first instruction in the
+// next section.
+// TODO: Delete this once psABI reserves a new relocation type for fall thru
+// jumps.
+static bool isFallThruRelocation(InputSection &is, InputFile *file,
+ InputSection *nextIS, Relocation &r) {
+ if (!isRelocationForJmpInsn(r))
+ return false;
+
+ uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
+ uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
+ file, r.type, r.addend, addrLoc, *r.sym, r.expr);
+
+ // If this jmp is a fall thru, the target offset is the beginning of the
+ // next section.
+ uint64_t nextSectionOffset =
+ nextIS->getOutputSection()->addr + nextIS->outSecOff;
+ return (addrLoc + 4 + targetOffset) == nextSectionOffset;
+}
+
+// Return the jmp instruction opcode that is the inverse of the given
+// opcode. For example, JE inverted is JNE.
+static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
+ switch (opcode) {
+ case J_JE_32:
+ return J_JNE_32;
+ case J_JNE_32:
+ return J_JE_32;
+ case J_JG_32:
+ return J_JLE_32;
+ case J_JGE_32:
+ return J_JL_32;
+ case J_JB_32:
+ return J_JAE_32;
+ case J_JBE_32:
+ return J_JA_32;
+ case J_JL_32:
+ return J_JGE_32;
+ case J_JLE_32:
+ return J_JG_32;
+ case J_JA_32:
+ return J_JBE_32;
+ case J_JAE_32:
+ return J_JB_32;
+ default:
+ return J_UNKNOWN;
+ }
+}
+
+// Deletes direct jump instruction in input sections that jumps to the
+// following section as it is not required. If there are two consecutive jump
+// instructions, it checks if they can be flipped and one can be deleted.
+// For example:
+// .section .text
+// a.BB.foo:
+// ...
+// 10: jne aa.BB.foo
+// 16: jmp bar
+// aa.BB.foo:
+// ...
+//
+// can be converted to:
+// a.BB.foo:
+// ...
+// 10: je bar #jne flipped to je and the jmp is deleted.
+// aa.BB.foo:
+// ...
+bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
+ InputSection *nextIS) const {
+ const unsigned sizeOfDirectJmpInsn = 5;
+
+ if (nextIS == nullptr)
+ return false;
+
+ if (is.getSize() < sizeOfDirectJmpInsn)
+ return false;
+
+ // If this jmp insn can be removed, it is the last insn and the
+ // relocation is 4 bytes before the end.
+ unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
+ if (rIndex == is.relocations.size())
+ return false;
+
+ Relocation &r = is.relocations[rIndex];
+
+ // Check if the relocation corresponds to a direct jmp.
+ const uint8_t *secContents = is.data().data();
+ // If it is not a direct jmp instruction, there is nothing to do here.
+ if (*(secContents + r.offset - 1) != 0xe9)
+ return false;
+
+ if (isFallThruRelocation(is, file, nextIS, r)) {
+ // This is a fall thru and can be deleted.
+ r.expr = R_NONE;
+ r.offset = 0;
+ is.drop_back(sizeOfDirectJmpInsn);
+ is.nopFiller = true;
+ return true;
+ }
+
+ // Now, check if flip and delete is possible.
+ const unsigned sizeOfJmpCCInsn = 6;
+ // To flip, there must be atleast one JmpCC and one direct jmp.
+ if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
+ return 0;
+
+ unsigned rbIndex =
+ getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
+ if (rbIndex == is.relocations.size())
+ return 0;
+
+ Relocation &rB = is.relocations[rbIndex];
+
+ const uint8_t *jmpInsnB = secContents + rB.offset - 1;
+ JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
+ if (jmpOpcodeB == J_UNKNOWN)
+ return false;
+
+ if (!isFallThruRelocation(is, file, nextIS, rB))
+ return false;
+
+ // jmpCC jumps to the fall thru block, the branch can be flipped and the
+ // jmp can be deleted.
+ JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
+ if (jInvert == J_UNKNOWN)
+ return false;
+ is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4});
+ // Move R's values to rB except the offset.
+ rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
+ // Cancel R
+ r.expr = R_NONE;
+ r.offset = 0;
+ is.drop_back(sizeOfDirectJmpInsn);
+ is.nopFiller = true;
+ return true;
+}
+
RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
if (type == R_X86_64_GOTTPOFF)
@@ -177,8 +412,9 @@ RelType X86_64::getDynRel(RelType type) const {
return R_X86_64_NONE;
}
-void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
- if (type == R_X86_64_TLSGD) {
+void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const {
+ if (rel.type == R_X86_64_TLSGD) {
// Convert
// .byte 0x66
// leaq x@tlsgd(%rip), %rdi
@@ -201,7 +437,7 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
// lea x@tlsgd(%rip), %rax
// call *(%rax)
// to the following two instructions.
- assert(type == R_X86_64_GOTPC32_TLSDESC);
+ assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
"in callq *x@tlsdesc(%rip), %rax");
@@ -217,8 +453,9 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
}
}
-void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
- if (type == R_X86_64_TLSGD) {
+void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const {
+ if (rel.type == R_X86_64_TLSGD) {
// Convert
// .byte 0x66
// leaq x@tlsgd(%rip), %rdi
@@ -241,7 +478,7 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
// lea x@tlsgd(%rip), %rax
// call *(%rax)
// to the following two instructions.
- assert(type == R_X86_64_GOTPC32_TLSDESC);
+ assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
"in callq *x@tlsdesc(%rip), %rax");
@@ -258,7 +495,8 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
// R_X86_64_TPOFF32 so that it does not use GOT.
-void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
+void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &,
+ uint64_t val) const {
uint8_t *inst = loc - 3;
uint8_t reg = loc[-1] >> 3;
uint8_t *regSlot = loc - 1;
@@ -299,12 +537,13 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
write32le(loc, val + 4);
}
-void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
- if (type == R_X86_64_DTPOFF64) {
+void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
+ uint64_t val) const {
+ if (rel.type == R_X86_64_DTPOFF64) {
write64le(loc, val);
return;
}
- if (type == R_X86_64_DTPOFF32) {
+ if (rel.type == R_X86_64_DTPOFF32) {
write32le(loc, val);
return;
}
@@ -347,26 +586,114 @@ void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
}
-void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
+// A JumpInstrMod at a specific offset indicates that the jump instruction
+// opcode at that offset must be modified. This is specifically used to relax
+// jump instructions with basic block sections. This function looks at the
+// JumpMod and effects the change.
+void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
+ unsigned size) const {
switch (type) {
+ case J_JMP_32:
+ if (size == 4)
+ *loc = 0xe9;
+ else
+ *loc = 0xeb;
+ break;
+ case J_JE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x84;
+ } else
+ *loc = 0x74;
+ break;
+ case J_JNE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x85;
+ } else
+ *loc = 0x75;
+ break;
+ case J_JG_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8f;
+ } else
+ *loc = 0x7f;
+ break;
+ case J_JGE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8d;
+ } else
+ *loc = 0x7d;
+ break;
+ case J_JB_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x82;
+ } else
+ *loc = 0x72;
+ break;
+ case J_JBE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x86;
+ } else
+ *loc = 0x76;
+ break;
+ case J_JL_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8c;
+ } else
+ *loc = 0x7c;
+ break;
+ case J_JLE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x8e;
+ } else
+ *loc = 0x7e;
+ break;
+ case J_JA_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x87;
+ } else
+ *loc = 0x77;
+ break;
+ case J_JAE_32:
+ if (size == 4) {
+ loc[-1] = 0x0f;
+ *loc = 0x83;
+ } else
+ *loc = 0x73;
+ break;
+ case J_UNKNOWN:
+ llvm_unreachable("Unknown Jump Relocation");
+ }
+}
+
+void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
+ switch (rel.type) {
case R_X86_64_8:
- checkIntUInt(loc, val, 8, type);
+ checkIntUInt(loc, val, 8, rel);
*loc = val;
break;
case R_X86_64_PC8:
- checkInt(loc, val, 8, type);
+ checkInt(loc, val, 8, rel);
*loc = val;
break;
case R_X86_64_16:
- checkIntUInt(loc, val, 16, type);
+ checkIntUInt(loc, val, 16, rel);
write16le(loc, val);
break;
case R_X86_64_PC16:
- checkInt(loc, val, 16, type);
+ checkInt(loc, val, 16, rel);
write16le(loc, val);
break;
case R_X86_64_32:
- checkUInt(loc, val, 32, type);
+ checkUInt(loc, val, 32, rel);
write32le(loc, val);
break;
case R_X86_64_32S:
@@ -384,7 +711,7 @@ void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
case R_X86_64_TLSLD:
case R_X86_64_DTPOFF32:
case R_X86_64_SIZE32:
- checkInt(loc, val, 32, type);
+ checkInt(loc, val, 32, rel);
write32le(loc, val);
break;
case R_X86_64_64:
@@ -495,7 +822,7 @@ static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
write32le(loc, val);
}
-void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const {
+void X86_64::relaxGot(uint8_t *loc, const Relocation &, uint64_t val) const {
const uint8_t op = loc[-2];
const uint8_t modRm = loc[-1];
@@ -758,7 +1085,4 @@ static TargetInfo *getTargetInfo() {
return &t;
}
-TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); }
-
-} // namespace elf
-} // namespace lld
+TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); }