diff options
Diffstat (limited to 'lld/ELF/Arch/X86_64.cpp')
-rw-r--r-- | lld/ELF/Arch/X86_64.cpp | 386 |
1 files changed, 355 insertions, 31 deletions
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 74b72eb91293..24711ec210a4 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -18,9 +19,8 @@ using namespace llvm; using namespace llvm::object; using namespace llvm::support::endian; using namespace llvm::ELF; - -namespace lld { -namespace elf { +using namespace lld; +using namespace lld::elf; namespace { class X86_64 : public TargetInfo { @@ -35,20 +35,44 @@ public: void writePltHeader(uint8_t *buf) const override; void writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const override; - void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const override; RelExpr adjustRelaxExpr(RelType type, const uint8_t *data, RelExpr expr) const override; - void relaxGot(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override; - void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override; + void relaxGot(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; + void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const override; + bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const override; }; } // namespace +// This is vector of NOP instructions of sizes from 1 to 8 bytes. The +// appropriately sized instructions are used to fill the gaps between sections +// which are executed during fall through. +static const std::vector<std::vector<uint8_t>> nopInstructions = { + {0x90}, + {0x66, 0x90}, + {0x0f, 0x1f, 0x00}, + {0x0f, 0x1f, 0x40, 0x00}, + {0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}}; + X86_64::X86_64() { copyRel = R_X86_64_COPY; gotRel = R_X86_64_GLOB_DAT; @@ -65,6 +89,7 @@ X86_64::X86_64() { pltEntrySize = 16; ipltEntrySize = 16; trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 + nopInstrs = nopInstructions; // Align to the large page size (known as a superpage or huge page). // FreeBSD automatically promotes large, superpage-aligned allocations. @@ -73,6 +98,216 @@ X86_64::X86_64() { int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; } +// Opcodes for the different X86_64 jmp instructions. +enum JmpInsnOpcode : uint32_t { + J_JMP_32, + J_JNE_32, + J_JE_32, + J_JG_32, + J_JGE_32, + J_JB_32, + J_JBE_32, + J_JL_32, + J_JLE_32, + J_JA_32, + J_JAE_32, + J_UNKNOWN, +}; + +// Given the first (optional) and second byte of the insn's opcode, this +// returns the corresponding enum value. +static JmpInsnOpcode getJmpInsnType(const uint8_t *first, + const uint8_t *second) { + if (*second == 0xe9) + return J_JMP_32; + + if (first == nullptr) + return J_UNKNOWN; + + if (*first == 0x0f) { + switch (*second) { + case 0x84: + return J_JE_32; + case 0x85: + return J_JNE_32; + case 0x8f: + return J_JG_32; + case 0x8d: + return J_JGE_32; + case 0x82: + return J_JB_32; + case 0x86: + return J_JBE_32; + case 0x8c: + return J_JL_32; + case 0x8e: + return J_JLE_32; + case 0x87: + return J_JA_32; + case 0x83: + return J_JAE_32; + } + } + return J_UNKNOWN; +} + +// Return the relocation index for input section IS with a specific Offset. +// Returns the maximum size of the vector if no such relocation is found. +static unsigned getRelocationWithOffset(const InputSection &is, + uint64_t offset) { + unsigned size = is.relocations.size(); + for (unsigned i = size - 1; i + 1 > 0; --i) { + if (is.relocations[i].offset == offset && is.relocations[i].expr != R_NONE) + return i; + } + return size; +} + +// Returns true if R corresponds to a relocation used for a jump instruction. +// TODO: Once special relocations for relaxable jump instructions are available, +// this should be modified to use those relocations. +static bool isRelocationForJmpInsn(Relocation &R) { + return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 || + R.type == R_X86_64_PC8; +} + +// Return true if Relocation R points to the first instruction in the +// next section. +// TODO: Delete this once psABI reserves a new relocation type for fall thru +// jumps. +static bool isFallThruRelocation(InputSection &is, InputFile *file, + InputSection *nextIS, Relocation &r) { + if (!isRelocationForJmpInsn(r)) + return false; + + uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset; + uint64_t targetOffset = InputSectionBase::getRelocTargetVA( + file, r.type, r.addend, addrLoc, *r.sym, r.expr); + + // If this jmp is a fall thru, the target offset is the beginning of the + // next section. + uint64_t nextSectionOffset = + nextIS->getOutputSection()->addr + nextIS->outSecOff; + return (addrLoc + 4 + targetOffset) == nextSectionOffset; +} + +// Return the jmp instruction opcode that is the inverse of the given +// opcode. For example, JE inverted is JNE. +static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) { + switch (opcode) { + case J_JE_32: + return J_JNE_32; + case J_JNE_32: + return J_JE_32; + case J_JG_32: + return J_JLE_32; + case J_JGE_32: + return J_JL_32; + case J_JB_32: + return J_JAE_32; + case J_JBE_32: + return J_JA_32; + case J_JL_32: + return J_JGE_32; + case J_JLE_32: + return J_JG_32; + case J_JA_32: + return J_JBE_32; + case J_JAE_32: + return J_JB_32; + default: + return J_UNKNOWN; + } +} + +// Deletes direct jump instruction in input sections that jumps to the +// following section as it is not required. If there are two consecutive jump +// instructions, it checks if they can be flipped and one can be deleted. +// For example: +// .section .text +// a.BB.foo: +// ... +// 10: jne aa.BB.foo +// 16: jmp bar +// aa.BB.foo: +// ... +// +// can be converted to: +// a.BB.foo: +// ... +// 10: je bar #jne flipped to je and the jmp is deleted. +// aa.BB.foo: +// ... +bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, + InputSection *nextIS) const { + const unsigned sizeOfDirectJmpInsn = 5; + + if (nextIS == nullptr) + return false; + + if (is.getSize() < sizeOfDirectJmpInsn) + return false; + + // If this jmp insn can be removed, it is the last insn and the + // relocation is 4 bytes before the end. + unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4); + if (rIndex == is.relocations.size()) + return false; + + Relocation &r = is.relocations[rIndex]; + + // Check if the relocation corresponds to a direct jmp. + const uint8_t *secContents = is.data().data(); + // If it is not a direct jmp instruction, there is nothing to do here. + if (*(secContents + r.offset - 1) != 0xe9) + return false; + + if (isFallThruRelocation(is, file, nextIS, r)) { + // This is a fall thru and can be deleted. + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; + } + + // Now, check if flip and delete is possible. + const unsigned sizeOfJmpCCInsn = 6; + // To flip, there must be atleast one JmpCC and one direct jmp. + if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn) + return 0; + + unsigned rbIndex = + getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4)); + if (rbIndex == is.relocations.size()) + return 0; + + Relocation &rB = is.relocations[rbIndex]; + + const uint8_t *jmpInsnB = secContents + rB.offset - 1; + JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB); + if (jmpOpcodeB == J_UNKNOWN) + return false; + + if (!isFallThruRelocation(is, file, nextIS, rB)) + return false; + + // jmpCC jumps to the fall thru block, the branch can be flipped and the + // jmp can be deleted. + JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB); + if (jInvert == J_UNKNOWN) + return false; + is.jumpInstrMods.push_back({jInvert, (rB.offset - 1), 4}); + // Move R's values to rB except the offset. + rB = {r.expr, r.type, rB.offset, r.addend, r.sym}; + // Cancel R + r.expr = R_NONE; + r.offset = 0; + is.drop_back(sizeOfDirectJmpInsn); + is.nopFiller = true; + return true; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { if (type == R_X86_64_GOTTPOFF) @@ -177,8 +412,9 @@ RelType X86_64::getDynRel(RelType type) const { return R_X86_64_NONE; } -void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_TLSGD) { +void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_TLSGD) { // Convert // .byte 0x66 // leaq x@tlsgd(%rip), %rdi @@ -201,7 +437,7 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { // lea x@tlsgd(%rip), %rax // call *(%rax) // to the following two instructions. - assert(type == R_X86_64_GOTPC32_TLSDESC); + assert(rel.type == R_X86_64_GOTPC32_TLSDESC); if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " "in callq *x@tlsdesc(%rip), %rax"); @@ -217,8 +453,9 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const { } } -void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_TLSGD) { +void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_TLSGD) { // Convert // .byte 0x66 // leaq x@tlsgd(%rip), %rdi @@ -241,7 +478,7 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // lea x@tlsgd(%rip), %rax // call *(%rax) // to the following two instructions. - assert(type == R_X86_64_GOTPC32_TLSDESC); + assert(rel.type == R_X86_64_GOTPC32_TLSDESC); if (memcmp(loc - 3, "\x48\x8d\x05", 3)) { error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used " "in callq *x@tlsdesc(%rip), %rax"); @@ -258,7 +495,8 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const { // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to // R_X86_64_TPOFF32 so that it does not use GOT. -void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { +void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &, + uint64_t val) const { uint8_t *inst = loc - 3; uint8_t reg = loc[-1] >> 3; uint8_t *regSlot = loc - 1; @@ -299,12 +537,13 @@ void X86_64::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const { write32le(loc, val + 4); } -void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { - if (type == R_X86_64_DTPOFF64) { +void X86_64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + if (rel.type == R_X86_64_DTPOFF64) { write64le(loc, val); return; } - if (type == R_X86_64_DTPOFF32) { + if (rel.type == R_X86_64_DTPOFF32) { write32le(loc, val); return; } @@ -347,26 +586,114 @@ void X86_64::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const { "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD"); } -void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { +// A JumpInstrMod at a specific offset indicates that the jump instruction +// opcode at that offset must be modified. This is specifically used to relax +// jump instructions with basic block sections. This function looks at the +// JumpMod and effects the change. +void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type, + unsigned size) const { switch (type) { + case J_JMP_32: + if (size == 4) + *loc = 0xe9; + else + *loc = 0xeb; + break; + case J_JE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x84; + } else + *loc = 0x74; + break; + case J_JNE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x85; + } else + *loc = 0x75; + break; + case J_JG_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8f; + } else + *loc = 0x7f; + break; + case J_JGE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8d; + } else + *loc = 0x7d; + break; + case J_JB_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x82; + } else + *loc = 0x72; + break; + case J_JBE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x86; + } else + *loc = 0x76; + break; + case J_JL_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8c; + } else + *loc = 0x7c; + break; + case J_JLE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x8e; + } else + *loc = 0x7e; + break; + case J_JA_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x87; + } else + *loc = 0x77; + break; + case J_JAE_32: + if (size == 4) { + loc[-1] = 0x0f; + *loc = 0x83; + } else + *loc = 0x73; + break; + case J_UNKNOWN: + llvm_unreachable("Unknown Jump Relocation"); + } +} + +void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { + switch (rel.type) { case R_X86_64_8: - checkIntUInt(loc, val, 8, type); + checkIntUInt(loc, val, 8, rel); *loc = val; break; case R_X86_64_PC8: - checkInt(loc, val, 8, type); + checkInt(loc, val, 8, rel); *loc = val; break; case R_X86_64_16: - checkIntUInt(loc, val, 16, type); + checkIntUInt(loc, val, 16, rel); write16le(loc, val); break; case R_X86_64_PC16: - checkInt(loc, val, 16, type); + checkInt(loc, val, 16, rel); write16le(loc, val); break; case R_X86_64_32: - checkUInt(loc, val, 32, type); + checkUInt(loc, val, 32, rel); write32le(loc, val); break; case R_X86_64_32S: @@ -384,7 +711,7 @@ void X86_64::relocateOne(uint8_t *loc, RelType type, uint64_t val) const { case R_X86_64_TLSLD: case R_X86_64_DTPOFF32: case R_X86_64_SIZE32: - checkInt(loc, val, 32, type); + checkInt(loc, val, 32, rel); write32le(loc, val); break; case R_X86_64_64: @@ -495,7 +822,7 @@ static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, write32le(loc, val); } -void X86_64::relaxGot(uint8_t *loc, RelType type, uint64_t val) const { +void X86_64::relaxGot(uint8_t *loc, const Relocation &, uint64_t val) const { const uint8_t op = loc[-2]; const uint8_t modRm = loc[-1]; @@ -758,7 +1085,4 @@ static TargetInfo *getTargetInfo() { return &t; } -TargetInfo *getX86_64TargetInfo() { return getTargetInfo(); } - -} // namespace elf -} // namespace lld +TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); } |