Skip to content

Commit 1817642

Browse files
committed
[BOLT] Add support for GOTPCRELX relocations
The linker can convert instructions with GOTPCRELX relocations into a form that uses an absolute addressing with an immediate. BOLT needs to recognize such conversions and symbolize the immediates. Reviewed By: rafauler Differential Revision: https://reviews.llvm.org/D126747
1 parent ffe86e3 commit 1817642

File tree

5 files changed

+148
-68
lines changed

5 files changed

+148
-68
lines changed

bolt/include/bolt/Core/BinaryFunction.h

+16-2
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,20 @@ class BinaryFunction {
842842
return (RI == Relocations.end()) ? nullptr : &RI->second;
843843
}
844844

845+
/// Return the first relocation in the function that starts at an address in
846+
/// the [StartOffset, EndOffset) range. Return nullptr if no such relocation
847+
/// exists.
848+
const Relocation *getRelocationInRange(uint64_t StartOffset,
849+
uint64_t EndOffset) const {
850+
assert(CurrentState == State::Empty &&
851+
"Relocations unavailable in the current function state.");
852+
auto RI = Relocations.lower_bound(StartOffset);
853+
if (RI != Relocations.end() && RI->first < EndOffset)
854+
return &RI->second;
855+
856+
return nullptr;
857+
}
858+
845859
/// Returns the raw binary encoding of this function.
846860
ErrorOr<ArrayRef<uint8_t>> getData() const;
847861

@@ -1314,11 +1328,11 @@ class BinaryFunction {
13141328
case ELF::R_X86_64_PC8:
13151329
case ELF::R_X86_64_PC32:
13161330
case ELF::R_X86_64_PC64:
1331+
case ELF::R_X86_64_GOTPCRELX:
1332+
case ELF::R_X86_64_REX_GOTPCRELX:
13171333
Relocations[Offset] = Relocation{Offset, Symbol, RelType, Addend, Value};
13181334
return;
13191335
case ELF::R_X86_64_PLT32:
1320-
case ELF::R_X86_64_GOTPCRELX:
1321-
case ELF::R_X86_64_REX_GOTPCRELX:
13221336
case ELF::R_X86_64_GOTPCREL:
13231337
case ELF::R_X86_64_TPOFF32:
13241338
case ELF::R_X86_64_GOTTPOFF:

bolt/include/bolt/Core/Relocation.h

+3
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ struct Relocation {
7777
/// Return true if relocation type implies the creation of a GOT entry
7878
static bool isGOT(uint64_t Type);
7979

80+
/// Special relocation type that allows the linker to modify the instruction.
81+
static bool isX86GOTPCRELX(uint64_t Type);
82+
8083
/// Return true if relocation type is NONE
8184
static bool isNone(uint64_t Type);
8285

bolt/lib/Core/Relocation.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,12 @@ bool Relocation::isGOT(uint64_t Type) {
562562
return isGOTX86(Type);
563563
}
564564

565+
bool Relocation::isX86GOTPCRELX(uint64_t Type) {
566+
if (Arch != Triple::x86_64)
567+
return false;
568+
return Type == ELF::R_X86_64_GOTPCRELX || Type == ELF::R_X86_64_REX_GOTPCRELX;
569+
}
570+
565571
bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }
566572

567573
bool Relocation::isRelative(uint64_t Type) {

bolt/lib/Target/X86/X86MCSymbolizer.cpp

+70-36
Original file line numberDiff line numberDiff line change
@@ -47,54 +47,88 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
4747
Inst.addOperand(MCOperand::createExpr(Expr));
4848
};
4949

50-
// Check for relocations against the operand.
51-
const uint64_t InstOffset = InstAddress - Function.getAddress();
52-
if (const Relocation *Relocation =
53-
Function.getRelocationAt(InstOffset + ImmOffset)) {
54-
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
55-
if (Relocation->isPCRelative())
56-
SymbolValue += InstAddress + ImmOffset;
50+
// Check if the operand being added is a displacement part of a compound
51+
// memory operand that uses PC-relative addressing. If it is, try to symbolize
52+
// it without relocations. Return true on success, false otherwise.
53+
auto processPCRelOperandNoRel = [&]() {
54+
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
55+
if (MemOp == -1)
56+
return false;
57+
58+
const unsigned DispOp = MemOp + X86::AddrDisp;
59+
if (Inst.getNumOperands() != DispOp)
60+
return false;
5761

58-
// Process reference to the symbol.
59-
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
62+
const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
63+
if (Base.getReg() != BC.MRI->getProgramCounter())
64+
return false;
6065

61-
uint64_t Addend = Relocation->Addend;
62-
// Real addend for pc-relative targets is adjusted with a delta from
63-
// the relocation placement to the next instruction.
64-
if (Relocation->isPCRelative())
65-
Addend += InstOffset + InstSize - Relocation->Offset;
66+
const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
67+
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
68+
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
69+
return false;
70+
71+
const MCSymbol *TargetSymbol;
72+
uint64_t TargetOffset;
73+
std::tie(TargetSymbol, TargetOffset) =
74+
BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);
75+
76+
addOperand(TargetSymbol, TargetOffset);
77+
78+
return true;
79+
};
6680

67-
addOperand(Relocation->Symbol, Addend);
81+
// Check for GOTPCRELX relocations first. Because these relocations allow the
82+
// linker to modify the instruction, we have to check the offset range
83+
// corresponding to the instruction, not the offset of the operand.
84+
// Note that if there is GOTPCRELX relocation against the instruction, there
85+
// will be no other relocation in this range, since GOTPCRELX applies only to
86+
// certain instruction types.
87+
const uint64_t InstOffset = InstAddress - Function.getAddress();
88+
const Relocation *Relocation =
89+
Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
90+
if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
91+
// If the operand is PC-relative, convert it without using the relocation
92+
// information. For GOTPCRELX, it is safe to use the absolute address
93+
// instead of extracting the addend from the relocation, as non-standard
94+
// forms will be rejected by linker conversion process and the operand
95+
// will always reference GOT which we don't rewrite.
96+
if (processPCRelOperandNoRel())
97+
return true;
98+
99+
// The linker converted the PC-relative address to an absolute one.
100+
// Symbolize this address.
101+
BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);
102+
const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
103+
assert(Target &&
104+
"BinaryData should exist at converted GOTPCRELX destination");
105+
106+
addOperand(Target->getSymbol(), /*Addend=*/0);
68107

69108
return true;
70109
}
71110

72-
// Check if the operand being added is a displacement part of a compound
73-
// memory operand that uses PC-relative addressing. If it is, try to symbolize
74-
// it without relocations.
75-
const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
76-
if (MemOp == -1)
77-
return false;
111+
// Check for relocations against the operand.
112+
if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
113+
Relocation = Function.getRelocationAt(InstOffset + ImmOffset);
78114

79-
const unsigned DispOp = MemOp + X86::AddrDisp;
80-
if (Inst.getNumOperands() != DispOp)
81-
return false;
115+
if (!Relocation)
116+
return processPCRelOperandNoRel();
82117

83-
const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
84-
if (Base.getReg() != BC.MRI->getProgramCounter())
85-
return false;
118+
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
119+
if (Relocation->isPCRelative())
120+
SymbolValue += InstAddress + ImmOffset;
86121

87-
const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
88-
const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
89-
if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
90-
return false;
122+
// Process reference to the symbol.
123+
BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
91124

92-
const MCSymbol *TargetSymbol;
93-
uint64_t TargetOffset;
94-
std::tie(TargetSymbol, TargetOffset) =
95-
BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
125+
uint64_t Addend = Relocation->Addend;
126+
// Real addend for pc-relative targets is adjusted with a delta from
127+
// the relocation placement to the next instruction.
128+
if (Relocation->isPCRelative())
129+
Addend += InstOffset + InstSize - Relocation->Offset;
96130

97-
addOperand(TargetSymbol, TargetOffset);
131+
addOperand(Relocation->Symbol, Addend);
98132

99133
return true;
100134
}

bolt/test/X86/gotpcrelx.s

+53-30
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,69 @@
1-
# This reproduces a bug with misinterpreting the gotpcrelx reloc
2-
3-
# Here we use llvm-mc -relax-relocations to produce R_X86_64_REX_GOTPCRELX
4-
# and ld.lld to consume it and optimize it, transforming a CMP <mem, reg>
5-
# into CMP <imm, reg>.
6-
# Then we check that BOLT updates correctly the imm operand that references
7-
# a function address. Currently XFAIL as we do not support it.
8-
91
# REQUIRES: system-linux
10-
# XFAIL: *
2+
3+
## Check that BOLT correctly handles different types of instructions with
4+
## R_X86_64_GOTPCRELX or R_X86_64_REX_GOTPCRELX relocations and different
5+
## kinds of handling of the relocation by the linker (no relaxation, pic, and
6+
## non-pic).
117

128
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux \
139
# RUN: -relax-relocations %s -o %t.o
14-
# RUN: llvm-strip --strip-unneeded %t.o
1510
# RUN: ld.lld %t.o -o %t.exe -q
16-
# RUN: llvm-readobj -r %t.exe | FileCheck --check-prefix=READOBJ %s
17-
# RUN: llvm-bolt %t.exe -relocs -o %t.out -lite=0
11+
# RUN: ld.lld %t.o -o %t.pie.exe -q -pie
12+
# RUN: ld.lld %t.o -o %t.no-relax.exe -q --no-relax
13+
# RUN: llvm-bolt %t.exe -relocs -o %t.out -print-cfg -print-only=_start \
14+
# RUN: |& FileCheck --check-prefix=BOLT %s
15+
# RUN: llvm-bolt %t.pie.exe -o /dev/null -print-cfg -print-only=_start \
16+
# RUN: |& FileCheck --check-prefix=PIE-BOLT %s
17+
# RUN: llvm-bolt %t.no-relax.exe -o /dev/null -print-cfg -print-only=_start \
18+
# RUN: |& FileCheck --check-prefix=NO-RELAX-BOLT %s
1819
# RUN: llvm-objdump -d --no-show-raw-insn --print-imm-hex \
1920
# RUN: %t.out | FileCheck --check-prefix=DISASM %s
2021

21-
# Check that R_X86_64_REX_GOTPCRELX is present in the input binary
22-
# READOBJ: 0x[[#%X,]] R_X86_64_REX_GOTPCRELX foo 0x[[#%X,]]
23-
24-
# DISASM: Disassembly of section .text:
25-
# DISASM-EMPTY:
26-
# DISASM-NEXT: <_start>:
27-
# DISASM-NEXT: leaq 0x[[#%x,ADDR:]], %rax
28-
# DISASM-NEXT: cmpq 0x[[#ADDR]], %rax
29-
3022
.text
3123
.globl _start
3224
.type _start, %function
3325
_start:
3426
.cfi_startproc
35-
leaq foo, %rax
36-
cmpq foo@GOTPCREL(%rip), %rax
37-
je b
38-
c:
39-
mov $1, %rdi
40-
callq foo
41-
b:
42-
xorq %rdi, %rdi
43-
callq foo
27+
# DISASM: Disassembly of section .text:
28+
# DISASM-EMPTY:
29+
# DISASM-NEXT: <_start>:
30+
31+
call *foo@GOTPCREL(%rip)
32+
# NO-RELAX-BOLT: callq *{{.*}}(%rip)
33+
# BOLT: callq foo
34+
# PIE-BOLT: callq foo
35+
# DISASM-NEXT: callq 0x[[#%x,ADDR:]]
36+
37+
movq foo@GOTPCREL(%rip), %rdi
38+
# NO-RELAX-BOLT-NEXT: movq {{.*}}(%rip), %rdi
39+
# BOLT-NEXT: leaq foo(%rip), %rdi
40+
# PIE-BOLT-NEXT: leaq foo(%rip), %rdi
41+
# DISASM-NEXT: leaq {{.*}}(%rip), %rdi # 0x[[#ADDR]]
42+
43+
movl foo@GOTPCREL+4(%rip), %edi
44+
# NO-RELAX-BOLT-NEXT: movl {{.*}}(%rip), %edi
45+
# BOLT-NEXT: movl {{.*}}(%rip), %edi
46+
# PIE-BOLT-NEXT: movl {{.*}}(%rip), %edi
47+
# DISASM-NEXT: movl {{.*}}(%rip), %edi
48+
49+
test %rdi, foo@GOTPCREL(%rip)
50+
# NO-RELAX-BOLT-NEXT: testq %rdi, DATA{{.*}}(%rip)
51+
# BOLT-NEXT: testq $foo, %rdi
52+
# PIE-BOLT-NEXT: testq %rdi, DATA{{.*}}(%rip)
53+
# DISASM-NEXT: testq $0x[[#ADDR]], %rdi
54+
55+
cmpq foo@GOTPCREL(%rip), %rax
56+
# NO-RELAX-BOLT-NEXT: cmpq DATA{{.*}}(%rip), %rax
57+
# BOLT-NEXT: cmpq $foo, %rax
58+
# PIE-BOLT-NEXT: cmpq DATA{{.*}}(%rip), %rax
59+
# DISASM-NEXT: cmpq $0x[[#ADDR]], %rax
60+
61+
jmp *foo@GOTPCREL(%rip)
62+
# NO-RELAX-BOLT-NEXT: jmpq *DATA{{.*}}(%rip)
63+
# BOLT-NEXT: jmp foo
64+
# PIE-BOLT-NEXT: jmp foo
65+
# DISASM-NEXT: jmp 0x[[#ADDR]]
66+
4467
ret
4568
.cfi_endproc
4669
.size _start, .-_start

0 commit comments

Comments
 (0)