1. 编译选项

-fomit-frame-pointer表示在函数调用时不需要存储堆栈帧指针。此选项可减小生成的代码镜像大小, 一般在启用-On优化后会自动打开,也有的平台是默认打开的。

-fno-omit-frame-pointer表示将堆栈帧指针存储在寄存器中,帧指针是用来指示当前函数的栈帧(stack frame)的指针,在调试时可以帮助跟踪函数调用的堆栈信息。

在 AArch32 架构中,堆栈帧指针存储在寄存器R11A32代码)或寄存器R7T32代码)中.

在AArch64架构中,堆栈帧指针存储在寄存器X29中。作为帧指针使用的寄存器不能用作通用寄存器,但如果使用 -fomit-frame-pointer选项编译,则可用作通用寄存器。

2. ARM64

ARM64编译器默认使用-fno-omit-frame-pointer

x29x30分别是fplr,将fplr分别当前函数栈(sp-64)+0+8位置,然后将sp减掉64。

C语言伪代码

1*(sp - 64) = fp;
2*(sp - 64 + 8) = lr;
3sp -= 64;
4fp = sp;

实际汇编代码

 1000000000002db98 <find_cmd_tbl>:
 2find_cmd_tbl():
 3u-boot-2024.01/common/command.c:95
 4   2db98:   a9bc7bfd    stp x29, x30, [sp, #-64]!
 5   2db9c:   910003fd    mov x29, sp
 6   2dba0:   a90153f3    stp x19, x20, [sp, #16]
 7   2dba4:   a9025bf5    stp x21, x22, [sp, #32]
 8   2dba8:   a90363f7    stp x23, x24, [sp, #48]
 9u-boot-2024.01/common/command.c:103
10   2dbac:   b5000100    cbnz    x0, 2dbcc <find_cmd_tbl+0x34>
11u-boot-2024.01/common/command.c:104
12   2dbb0:   d2800017    mov x23, #0x0                       // #0
13u-boot-2024.01/common/command.c:126
14   2dbb4:   aa1703e0    mov x0, x23
15   2dbb8:   a94153f3    ldp x19, x20, [sp, #16]
16   2dbbc:   a9425bf5    ldp x21, x22, [sp, #32]
17   2dbc0:   a94363f7    ldp x23, x24, [sp, #48]
18   2dbc4:   a8c47bfd    ldp x29, x30, [sp], #64
19   2dbc8:   d65f03c0    ret
20   2dbcc:   aa0103f4    mov x20, x1
21   2dbd0:   aa0003f5    mov x21, x0
22   2dbd4:   2a0203f6    mov w22, w2
23/// ... ...
24u-boot-2024.01/common/command.c:111 (discriminator 2)
25   2dc48:   9100e294    add x20, x20, #0x38
26   2dc4c:   17ffffed    b   2dc00 <find_cmd_tbl+0x68>
27   2dc50:   aa1403f7    mov x23, x20
28   2dc54:   17ffffd8    b   2dbb4 <find_cmd_tbl+0x1c>

3. riscv64

riscv64编译器默认-fomit-frame-pointer,手动添加-fno-omit-frame-pointer后,反汇编如下:

s0ra分别是fplr

 1000000008001d39e <find_cmd_tbl>:
 2find_cmd_tbl():
 3u-boot-2024.01/common/command.c:95
 4    8001d39e:   7139                    addi    sp,sp,-64
 5    8001d3a0:   fc06                    sd  ra,56(sp)
 6    8001d3a2:   f822                    sd  s0,48(sp)
 7    8001d3a4:   f426                    sd  s1,40(sp)
 8    8001d3a6:   f04a                    sd  s2,32(sp)
 9    8001d3a8:   ec4e                    sd  s3,24(sp)
10    8001d3aa:   e852                    sd  s4,16(sp)
11    8001d3ac:   e456                    sd  s5,8(sp)
12u-boot-2024.01/common/command.c:103
13    8001d3ae:   ed01                    bnez    a0,8001d3c6 <find_cmd_tbl+0x28>
14u-boot-2024.01/common/command.c:104
15    8001d3b0:   4a01                    li  s4,0
16u-boot-2024.01/common/command.c:126
17    8001d3b2:   70e2                    ld  ra,56(sp)
18    8001d3b4:   7442                    ld  s0,48(sp)
19    8001d3b6:   74a2                    ld  s1,40(sp)
20    8001d3b8:   7902                    ld  s2,32(sp)
21    8001d3ba:   69e2                    ld  s3,24(sp)
22    8001d3bc:   6aa2                    ld  s5,8(sp)
23    8001d3be:   8552                    mv  a0,s4
24    8001d3c0:   6a42                    ld  s4,16(sp)
25    8001d3c2:   6121                    addi    sp,sp,64
26    8001d3c4:   8082                    ret
27    8001d3c6:   842e                    mv  s0,a1
28/// ... ...
29u-boot-2024.01/common/command.c:111 (discriminator 2)
30    8001d41a:   03840413            addi    s0,s0,56
31    8001d41e:   bfc1                    j   8001d3ee <find_cmd_tbl+0x50>
32    8001d420:   8a22                    mv  s4,s0
33    8001d422:   bf41                    j   8001d3b2 <find_cmd_tbl+0x14>

C语言伪代码

1#define STACK_FRAME_SIZE 64
2sp -= STACK_FRAME_SIZE;
3*(sp + STACK_FRAME_SIZE - 8) = ra;
4*(sp + STACK_FRAME_SIZE - 16) = s0;
5fp = sp + STACK_FRAME_SIZE;

4. 总结

假设是find_cmd调用了find_cmd_tblfp记录的是find_cmd用到的splr记录的是bl find_cmd_tblfind_cmd下一条指令的地址。

arm64编译器先将fplr存入当前函数栈低地址中,然后将sp减掉64,再将sp存入fp,得到新的fp

riscv64编译器先将sp减掉64,再将fplr存入当前函数栈高地址中,再使用sp+64得到新的fp

find_cmd_tbl栈增长后,栈布局如下

1high addr
2     |           | <---+---+  <-- prev sp(find_cmd)
3  /  | ra(riscv) |     |   |
4  |  | fp(riscv) | ----+   |
5 64B |  ... ...  |         |
6  |  | lr(arm64) |         |
7  \  | fp(arm64) | --------+  <-- current sp(find_cmd_tbl)
8low addr

5. dump_backtrace

ARM64

代码来自U-Boot。

 1struct stackframe {
 2    unsigned long fp;
 3    unsigned long pc;
 4};
 5
 6static inline void start_backtrace(struct stackframe *frame,
 7                   unsigned long fp, unsigned long pc)
 8{
 9    frame->fp = fp;
10    frame->pc = pc;
11}
12
13static int unwind_frame(struct stackframe *frame)
14{
15    unsigned long fp = frame->fp;
16
17    if (fp & 0x7)
18        return 1;
19
20    frame->fp = *(unsigned long *)fp;
21    frame->pc = *(unsigned long *)(fp + 8);
22
23    if (frame->fp < fp)
24        return 1;
25
26    /* irq_sp == start_addr_sp */
27    if (frame->fp > gd->start_addr_sp)
28        return 1;
29
30    if (!frame->fp && !frame->pc)
31        return 1;
32
33    return 0;
34}
35
36static void dump_backtrace(struct pt_regs *regs)
37{
38    struct stackframe frame;
39    unsigned long offset = 0;
40
41    if (gd->flags & GD_FLG_RELOC)
42        offset = gd->reloc_off;
43
44    if (regs) {
45        start_backtrace(&frame, regs->regs[29], regs->regs[30]);
46    } else {
47        start_backtrace(&frame,
48                (unsigned long)__builtin_frame_address(0),
49                (unsigned long)dump_backtrace);
50    }
51
52    printf("stack: %p, irq stack: %p\n", (void*)gd->start_addr_sp, (void*)gd->irq_sp);
53    printf("Call trace:\n");
54    printf("  %-16s  %-16s\n", "fp", "pc");
55    do {
56        printf("  %p", (void *)frame.fp);
57        printf("  %p\n", (void *)frame.pc - offset);
58    } while (!unwind_frame(&frame));
59}
60
61void dump_stack(void)
62{
63    dump_backtrace(NULL);
64}
65
66void show_stack(struct pt_regs *regs)
67{
68    dump_backtrace(regs);
69}