Skip to content

Commit 5b3d19b

Browse files
Xu Kuohaiborkmann
Xu Kuohai
authored andcommitted
bpf, arm64: Adjust the offset of str/ldr(immediate) to positive number
The BPF STX/LDX instruction uses offset relative to the FP to address stack space. Since the BPF_FP locates at the top of the frame, the offset is usually a negative number. However, arm64 str/ldr immediate instruction requires that offset be a positive number. Therefore, this patch tries to convert the offsets. The method is to find the negative offset furthest from the FP firstly. Then add it to the FP, calculate a bottom position, called FPB, and then adjust the offsets in other STR/LDX instructions relative to FPB. FPB is saved using the callee-saved register x27 of arm64 which is not used yet. Before adjusting the offset, the patch checks every instruction to ensure that the FP does not change in run-time. If the FP may change, no offset is adjusted. For example, for the following bpftrace command: bpftrace -e 'kprobe:do_sys_open { printf("opening: %s\n", str(arg1)); }' Without this patch, jited code(fragment): 0: bti c 4: stp x29, x30, [sp, #-16]! 8: mov x29, sp c: stp x19, x20, [sp, #-16]! 10: stp x21, x22, [sp, #-16]! 14: stp x25, x26, [sp, #-16]! 18: mov x25, sp 1c: mov x26, #0x0 // #0 20: bti j 24: sub sp, sp, #0x90 28: add x19, x0, #0x0 2c: mov x0, #0x0 // #0 30: mov x10, #0xffffffffffffff78 // #-136 34: str x0, [x25, x10] 38: mov x10, #0xffffffffffffff80 // #-128 3c: str x0, [x25, x10] 40: mov x10, #0xffffffffffffff88 // #-120 44: str x0, [x25, x10] 48: mov x10, #0xffffffffffffff90 // #-112 4c: str x0, [x25, x10] 50: mov x10, #0xffffffffffffff98 // #-104 54: str x0, [x25, x10] 58: mov x10, #0xffffffffffffffa0 // #-96 5c: str x0, [x25, x10] 60: mov x10, #0xffffffffffffffa8 // #-88 64: str x0, [x25, x10] 68: mov x10, #0xffffffffffffffb0 // #-80 6c: str x0, [x25, x10] 70: mov x10, #0xffffffffffffffb8 // #-72 74: str x0, [x25, x10] 78: mov x10, #0xffffffffffffffc0 // #-64 7c: str x0, [x25, x10] 80: mov x10, #0xffffffffffffffc8 // #-56 84: str x0, [x25, x10] 88: mov x10, #0xffffffffffffffd0 // #-48 8c: str x0, [x25, x10] 90: mov x10, #0xffffffffffffffd8 // #-40 94: str x0, [x25, x10] 98: mov x10, #0xffffffffffffffe0 // #-32 9c: str x0, [x25, x10] a0: mov x10, #0xffffffffffffffe8 // #-24 a4: str x0, [x25, x10] a8: mov x10, #0xfffffffffffffff0 // #-16 ac: str x0, [x25, x10] b0: mov x10, #0xfffffffffffffff8 // #-8 b4: str x0, [x25, x10] b8: mov x10, #0x8 // torvalds#8 bc: ldr x2, [x19, x10] [...] With this patch, jited code(fragment): 0: bti c 4: stp x29, x30, [sp, #-16]! 8: mov x29, sp c: stp x19, x20, [sp, #-16]! 10: stp x21, x22, [sp, #-16]! 14: stp x25, x26, [sp, #-16]! 18: stp x27, x28, [sp, #-16]! 1c: mov x25, sp 20: sub x27, x25, #0x88 24: mov x26, #0x0 // #0 28: bti j 2c: sub sp, sp, #0x90 30: add x19, x0, #0x0 34: mov x0, #0x0 // #0 38: str x0, [x27] 3c: str x0, [x27, torvalds#8] 40: str x0, [x27, torvalds#16] 44: str x0, [x27, torvalds#24] 48: str x0, [x27, torvalds#32] 4c: str x0, [x27, torvalds#40] 50: str x0, [x27, torvalds#48] 54: str x0, [x27, torvalds#56] 58: str x0, [x27, torvalds#64] 5c: str x0, [x27, torvalds#72] 60: str x0, [x27, torvalds#80] 64: str x0, [x27, torvalds#88] 68: str x0, [x27, torvalds#96] 6c: str x0, [x27, torvalds#104] 70: str x0, [x27, torvalds#112] 74: str x0, [x27, torvalds#120] 78: str x0, [x27, torvalds#128] 7c: ldr x2, [x19, torvalds#8] [...] Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20220321152852.2334294-4-xukuohai@huawei.com
1 parent 7db6c0f commit 5b3d19b

File tree

1 file changed

+138
-27
lines changed

1 file changed

+138
-27
lines changed

arch/arm64/net/bpf_jit_comp.c

+138-27
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
2727
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
2828
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
29+
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
2930

3031
#define check_imm(bits, imm) do { \
3132
if ((((imm) > 0) && ((imm) >> (bits))) || \
@@ -63,6 +64,7 @@ static const int bpf2a64[] = {
6364
[TCALL_CNT] = A64_R(26),
6465
/* temporary register for blinding constants */
6566
[BPF_REG_AX] = A64_R(9),
67+
[FP_BOTTOM] = A64_R(27),
6668
};
6769

6870
struct jit_ctx {
@@ -73,6 +75,7 @@ struct jit_ctx {
7375
int exentry_idx;
7476
__le32 *image;
7577
u32 stack_size;
78+
int fpb_offset;
7679
};
7780

7881
static inline void emit(const u32 insn, struct jit_ctx *ctx)
@@ -218,7 +221,7 @@ static bool is_addsub_imm(u32 imm)
218221
*
219222
* offset = (u64)imm12 << scale
220223
*/
221-
static bool is_lsi_offset(s16 offset, int scale)
224+
static bool is_lsi_offset(int offset, int scale)
222225
{
223226
if (offset < 0)
224227
return false;
@@ -234,9 +237,9 @@ static bool is_lsi_offset(s16 offset, int scale)
234237

235238
/* Tail call offset to jump into */
236239
#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
237-
#define PROLOGUE_OFFSET 8
240+
#define PROLOGUE_OFFSET 9
238241
#else
239-
#define PROLOGUE_OFFSET 7
242+
#define PROLOGUE_OFFSET 8
240243
#endif
241244

242245
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
@@ -248,6 +251,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
248251
const u8 r9 = bpf2a64[BPF_REG_9];
249252
const u8 fp = bpf2a64[BPF_REG_FP];
250253
const u8 tcc = bpf2a64[TCALL_CNT];
254+
const u8 fpb = bpf2a64[FP_BOTTOM];
251255
const int idx0 = ctx->idx;
252256
int cur_offset;
253257

@@ -286,6 +290,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
286290
emit(A64_PUSH(r6, r7, A64_SP), ctx);
287291
emit(A64_PUSH(r8, r9, A64_SP), ctx);
288292
emit(A64_PUSH(fp, tcc, A64_SP), ctx);
293+
emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
289294

290295
/* Set up BPF prog stack base register */
291296
emit(A64_MOV(1, fp, A64_SP), ctx);
@@ -306,6 +311,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
306311
emit(A64_BTI_J, ctx);
307312
}
308313

314+
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx);
315+
309316
/* Stack must be multiples of 16B */
310317
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
311318

@@ -553,10 +560,13 @@ static void build_epilogue(struct jit_ctx *ctx)
553560
const u8 r8 = bpf2a64[BPF_REG_8];
554561
const u8 r9 = bpf2a64[BPF_REG_9];
555562
const u8 fp = bpf2a64[BPF_REG_FP];
563+
const u8 fpb = bpf2a64[FP_BOTTOM];
556564

557565
/* We're done with BPF stack */
558566
emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
559567

568+
/* Restore x27 and x28 */
569+
emit(A64_POP(fpb, A64_R(28), A64_SP), ctx);
560570
/* Restore fs (x25) and x26 */
561571
emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
562572

@@ -650,6 +660,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
650660
const u8 src = bpf2a64[insn->src_reg];
651661
const u8 tmp = bpf2a64[TMP_REG_1];
652662
const u8 tmp2 = bpf2a64[TMP_REG_2];
663+
const u8 fp = bpf2a64[BPF_REG_FP];
664+
const u8 fpb = bpf2a64[FP_BOTTOM];
653665
const s16 off = insn->off;
654666
const s32 imm = insn->imm;
655667
const int i = insn - ctx->prog->insnsi;
@@ -658,6 +670,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
658670
u8 jmp_cond;
659671
s32 jmp_offset;
660672
u32 a64_insn;
673+
u8 src_adj;
674+
u8 dst_adj;
675+
int off_adj;
661676
int ret;
662677

663678
switch (code) {
@@ -1012,34 +1027,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
10121027
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
10131028
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
10141029
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1030+
if (ctx->fpb_offset > 0 && src == fp) {
1031+
src_adj = fpb;
1032+
off_adj = off + ctx->fpb_offset;
1033+
} else {
1034+
src_adj = src;
1035+
off_adj = off;
1036+
}
10151037
switch (BPF_SIZE(code)) {
10161038
case BPF_W:
1017-
if (is_lsi_offset(off, 2)) {
1018-
emit(A64_LDR32I(dst, src, off), ctx);
1039+
if (is_lsi_offset(off_adj, 2)) {
1040+
emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
10191041
} else {
10201042
emit_a64_mov_i(1, tmp, off, ctx);
10211043
emit(A64_LDR32(dst, src, tmp), ctx);
10221044
}
10231045
break;
10241046
case BPF_H:
1025-
if (is_lsi_offset(off, 1)) {
1026-
emit(A64_LDRHI(dst, src, off), ctx);
1047+
if (is_lsi_offset(off_adj, 1)) {
1048+
emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
10271049
} else {
10281050
emit_a64_mov_i(1, tmp, off, ctx);
10291051
emit(A64_LDRH(dst, src, tmp), ctx);
10301052
}
10311053
break;
10321054
case BPF_B:
1033-
if (is_lsi_offset(off, 0)) {
1034-
emit(A64_LDRBI(dst, src, off), ctx);
1055+
if (is_lsi_offset(off_adj, 0)) {
1056+
emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
10351057
} else {
10361058
emit_a64_mov_i(1, tmp, off, ctx);
10371059
emit(A64_LDRB(dst, src, tmp), ctx);
10381060
}
10391061
break;
10401062
case BPF_DW:
1041-
if (is_lsi_offset(off, 3)) {
1042-
emit(A64_LDR64I(dst, src, off), ctx);
1063+
if (is_lsi_offset(off_adj, 3)) {
1064+
emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
10431065
} else {
10441066
emit_a64_mov_i(1, tmp, off, ctx);
10451067
emit(A64_LDR64(dst, src, tmp), ctx);
@@ -1070,36 +1092,43 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
10701092
case BPF_ST | BPF_MEM | BPF_H:
10711093
case BPF_ST | BPF_MEM | BPF_B:
10721094
case BPF_ST | BPF_MEM | BPF_DW:
1095+
if (ctx->fpb_offset > 0 && dst == fp) {
1096+
dst_adj = fpb;
1097+
off_adj = off + ctx->fpb_offset;
1098+
} else {
1099+
dst_adj = dst;
1100+
off_adj = off;
1101+
}
10731102
/* Load imm to a register then store it */
10741103
emit_a64_mov_i(1, tmp, imm, ctx);
10751104
switch (BPF_SIZE(code)) {
10761105
case BPF_W:
1077-
if (is_lsi_offset(off, 2)) {
1078-
emit(A64_STR32I(tmp, dst, off), ctx);
1106+
if (is_lsi_offset(off_adj, 2)) {
1107+
emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
10791108
} else {
10801109
emit_a64_mov_i(1, tmp2, off, ctx);
10811110
emit(A64_STR32(tmp, dst, tmp2), ctx);
10821111
}
10831112
break;
10841113
case BPF_H:
1085-
if (is_lsi_offset(off, 1)) {
1086-
emit(A64_STRHI(tmp, dst, off), ctx);
1114+
if (is_lsi_offset(off_adj, 1)) {
1115+
emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
10871116
} else {
10881117
emit_a64_mov_i(1, tmp2, off, ctx);
10891118
emit(A64_STRH(tmp, dst, tmp2), ctx);
10901119
}
10911120
break;
10921121
case BPF_B:
1093-
if (is_lsi_offset(off, 0)) {
1094-
emit(A64_STRBI(tmp, dst, off), ctx);
1122+
if (is_lsi_offset(off_adj, 0)) {
1123+
emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
10951124
} else {
10961125
emit_a64_mov_i(1, tmp2, off, ctx);
10971126
emit(A64_STRB(tmp, dst, tmp2), ctx);
10981127
}
10991128
break;
11001129
case BPF_DW:
1101-
if (is_lsi_offset(off, 3)) {
1102-
emit(A64_STR64I(tmp, dst, off), ctx);
1130+
if (is_lsi_offset(off_adj, 3)) {
1131+
emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
11031132
} else {
11041133
emit_a64_mov_i(1, tmp2, off, ctx);
11051134
emit(A64_STR64(tmp, dst, tmp2), ctx);
@@ -1113,34 +1142,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
11131142
case BPF_STX | BPF_MEM | BPF_H:
11141143
case BPF_STX | BPF_MEM | BPF_B:
11151144
case BPF_STX | BPF_MEM | BPF_DW:
1145+
if (ctx->fpb_offset > 0 && dst == fp) {
1146+
dst_adj = fpb;
1147+
off_adj = off + ctx->fpb_offset;
1148+
} else {
1149+
dst_adj = dst;
1150+
off_adj = off;
1151+
}
11161152
switch (BPF_SIZE(code)) {
11171153
case BPF_W:
1118-
if (is_lsi_offset(off, 2)) {
1119-
emit(A64_STR32I(src, dst, off), ctx);
1154+
if (is_lsi_offset(off_adj, 2)) {
1155+
emit(A64_STR32I(src, dst_adj, off_adj), ctx);
11201156
} else {
11211157
emit_a64_mov_i(1, tmp, off, ctx);
11221158
emit(A64_STR32(src, dst, tmp), ctx);
11231159
}
11241160
break;
11251161
case BPF_H:
1126-
if (is_lsi_offset(off, 1)) {
1127-
emit(A64_STRHI(src, dst, off), ctx);
1162+
if (is_lsi_offset(off_adj, 1)) {
1163+
emit(A64_STRHI(src, dst_adj, off_adj), ctx);
11281164
} else {
11291165
emit_a64_mov_i(1, tmp, off, ctx);
11301166
emit(A64_STRH(src, dst, tmp), ctx);
11311167
}
11321168
break;
11331169
case BPF_B:
1134-
if (is_lsi_offset(off, 0)) {
1135-
emit(A64_STRBI(src, dst, off), ctx);
1170+
if (is_lsi_offset(off_adj, 0)) {
1171+
emit(A64_STRBI(src, dst_adj, off_adj), ctx);
11361172
} else {
11371173
emit_a64_mov_i(1, tmp, off, ctx);
11381174
emit(A64_STRB(src, dst, tmp), ctx);
11391175
}
11401176
break;
11411177
case BPF_DW:
1142-
if (is_lsi_offset(off, 3)) {
1143-
emit(A64_STR64I(src, dst, off), ctx);
1178+
if (is_lsi_offset(off_adj, 3)) {
1179+
emit(A64_STR64I(src, dst_adj, off_adj), ctx);
11441180
} else {
11451181
emit_a64_mov_i(1, tmp, off, ctx);
11461182
emit(A64_STR64(src, dst, tmp), ctx);
@@ -1167,6 +1203,79 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
11671203
return 0;
11681204
}
11691205

1206+
/*
1207+
* Return 0 if FP may change at runtime, otherwise find the minimum negative
1208+
* offset to FP, converts it to positive number, and align down to 8 bytes.
1209+
*/
1210+
static int find_fpb_offset(struct bpf_prog *prog)
1211+
{
1212+
int i;
1213+
int offset = 0;
1214+
1215+
for (i = 0; i < prog->len; i++) {
1216+
const struct bpf_insn *insn = &prog->insnsi[i];
1217+
const u8 class = BPF_CLASS(insn->code);
1218+
const u8 mode = BPF_MODE(insn->code);
1219+
const u8 src = insn->src_reg;
1220+
const u8 dst = insn->dst_reg;
1221+
const s32 imm = insn->imm;
1222+
const s16 off = insn->off;
1223+
1224+
switch (class) {
1225+
case BPF_STX:
1226+
case BPF_ST:
1227+
/* fp holds atomic operation result */
1228+
if (class == BPF_STX && mode == BPF_ATOMIC &&
1229+
((imm == BPF_XCHG ||
1230+
imm == (BPF_FETCH | BPF_ADD) ||
1231+
imm == (BPF_FETCH | BPF_AND) ||
1232+
imm == (BPF_FETCH | BPF_XOR) ||
1233+
imm == (BPF_FETCH | BPF_OR)) &&
1234+
src == BPF_REG_FP))
1235+
return 0;
1236+
1237+
if (mode == BPF_MEM && dst == BPF_REG_FP &&
1238+
off < offset)
1239+
offset = insn->off;
1240+
break;
1241+
1242+
case BPF_JMP32:
1243+
case BPF_JMP:
1244+
break;
1245+
1246+
case BPF_LDX:
1247+
case BPF_LD:
1248+
/* fp holds load result */
1249+
if (dst == BPF_REG_FP)
1250+
return 0;
1251+
1252+
if (class == BPF_LDX && mode == BPF_MEM &&
1253+
src == BPF_REG_FP && off < offset)
1254+
offset = off;
1255+
break;
1256+
1257+
case BPF_ALU:
1258+
case BPF_ALU64:
1259+
default:
1260+
/* fp holds ALU result */
1261+
if (dst == BPF_REG_FP)
1262+
return 0;
1263+
}
1264+
}
1265+
1266+
if (offset < 0) {
1267+
/*
1268+
* safely be converted to a positive 'int', since insn->off
1269+
* is 's16'
1270+
*/
1271+
offset = -offset;
1272+
/* align down to 8 bytes */
1273+
offset = ALIGN_DOWN(offset, 8);
1274+
}
1275+
1276+
return offset;
1277+
}
1278+
11701279
static int build_body(struct jit_ctx *ctx, bool extra_pass)
11711280
{
11721281
const struct bpf_prog *prog = ctx->prog;
@@ -1288,6 +1397,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
12881397
goto out_off;
12891398
}
12901399

1400+
ctx.fpb_offset = find_fpb_offset(prog);
1401+
12911402
/*
12921403
* 1. Initial fake pass to compute ctx->idx and ctx->offset.
12931404
*

0 commit comments

Comments
 (0)