1. 内核栈

在每一个进程的生命周期中,必然会通过到系统调用陷入内核。在执行系统调用陷入内核之后,这些内核代码所使用的栈并不是原先进程用户空间中的栈,而是一个单独内核空间的栈,这个称作进程内核栈。

1.1. init_task

init_task,也就是bootcpuswapper进程对应的task_struct,其内核栈是静态初始化的。

 1/// init/init_task.c
 2/*
 3 * Set up the first task table, touch at your own risk!. Base=0,
 4 * limit=0x1fffff (=2MB)
 5 */
 6struct task_struct init_task
 7#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK
 8    __init_task_data
 9#endif
10    __aligned(L1_CACHE_BYTES)
11= {
12#ifdef CONFIG_THREAD_INFO_IN_TASK
13    .thread_info	= INIT_THREAD_INFO(init_task),
14    .stack_refcount	= REFCOUNT_INIT(1),
15#endif
16    .__state	= 0,
17    .stack		= init_stack,
18    .usage		= REFCOUNT_INIT(2),
19    .flags		= PF_KTHREAD,
20    .prio		= MAX_PRIO - 20,
21    .static_prio	= MAX_PRIO - 20,
22    .normal_prio	= MAX_PRIO - 20,
23    .policy		= SCHED_NORMAL,
24    /// ... ...
1/// include/linux/sched.h
2extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];

include/asm-generic/vmlinux.lds.h可以看到,init_stack是通过链接时来分配的,大小为THREAD_SIZE,且与init_thread_union共用同一段内存。init_thread_union在内核栈空间的低地址。

 1/// include/asm-generic/vmlinux.lds.h
 2#define INIT_TASK_DATA(align)						\
 3    . = ALIGN(align);						\
 4    __start_init_task = .;						\
 5    init_thread_union = .;						\
 6    init_stack = .;							\
 7    KEEP(*(.data..init_task))					\
 8    KEEP(*(.data..init_thread_info))				\
 9    . = __start_init_task + THREAD_SIZE;				\
10    __end_init_task = .;

1.2. 非init_task的内核栈创建

除了init_task的内核栈,包括非bootcpu的swapper进程的所有内核线程和进程,都是通过fork_idle调用copy_process来动态申请的。

调用路径,copy_process -> dup_task_struct -> alloc_thread_stack_node

以ARM64为例:不论是用户进程还是内核线程,都会在copy_thread中,将cpu_context.sp指向内核栈的栈顶。

 1/// arch/arm64/kernel/process.c
 2int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 3{
 4    unsigned long clone_flags = args->flags;
 5    unsigned long stack_start = args->stack;
 6    unsigned long tls = args->tls;
 7    struct pt_regs *childregs = task_pt_regs(p);
 8
 9    memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
10
11    /// ... ...
12    p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
13    p->thread.cpu_context.sp = (unsigned long)childregs;
14    /*
15     * For the benefit of the unwinder, set up childregs->stackframe
16     * as the final frame for the new task.
17     */
18    p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
19
20    ptrace_hw_copy_thread(p);
21
22    return 0;
23}

2. 中断栈

ARM64默认不使用中断栈,而是直接复用当前进程的内核栈。

在特殊情况下,会用到call_on_irq_stack,如do_softirq_own_stackdo_interrupt_handleron_thread_stacktrue的情况,会用到专门的中断栈。

2.1. 中断栈的初始化

中断栈空间的低地址保存在per_cpu(irq_stack_ptr, cpu)中。中断栈的大小IRQ_STACK_SIZETHREAD_SIZE相同。

 1/// arch/arm64/kernel/irq.c
 2#ifdef CONFIG_VMAP_STACK
 3static void init_irq_stacks(void)
 4{
 5    int cpu;
 6    unsigned long *p;
 7
 8    for_each_possible_cpu(cpu) {
 9        p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
10        per_cpu(irq_stack_ptr, cpu) = p;
11    }
12}
13#else
14/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
15DEFINE_PER_CPU_ALIGNED(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
16
17static void init_irq_stacks(void)
18{
19    int cpu;
20
21    for_each_possible_cpu(cpu)
22        per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack, cpu);
23}
24#endif
25
26#ifndef CONFIG_PREEMPT_RT
27static void ____do_softirq(struct pt_regs *regs)
28{
29    __do_softirq();
30}
31
32void do_softirq_own_stack(void)
33{
34    call_on_irq_stack(NULL, ____do_softirq);
35}
36#endif

2.2. 中断栈的切换

call_on_irq_stack中,读取当前cpu的irq_stack_ptr,然后加上IRQ_STACK_SIZE,作为中断栈顶地址。

 1/// arch/arm64/kernel/entry.S
 2/*
 3 * void call_on_irq_stack(struct pt_regs *regs,
 4 * 		          void (*func)(struct pt_regs *));
 5 *
 6 * Calls func(regs) using this CPU's irq stack and shadow irq stack.
 7 */
 8SYM_FUNC_START(call_on_irq_stack)
 9#ifdef CONFIG_SHADOW_CALL_STACK
10    get_current_task x16
11    scs_save x16
12    ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
13#endif
14
15    /* Create a frame record to save our LR and SP (implicit in FP) */
16    stp	x29, x30, [sp, #-16]!
17    mov	x29, sp
18
19    /// 读取irq_stack_ptr,获取当前cpu的中断栈地址
20    ldr_this_cpu x16, irq_stack_ptr, x17
21
22    /* Move to the new stack and call the function there */
23    add	sp, x16, #IRQ_STACK_SIZE
24    blr	x1
25
26    /*
27     * Restore the SP from the FP, and restore the FP and LR from the frame
28     * record.
29     */
30    mov	sp, x29
31    ldp	x29, x30, [sp], #16
32    scs_load_current
33    ret
34SYM_FUNC_END(call_on_irq_stack)
35NOKPROBE(call_on_irq_stack)

3. 参考资料

浅谈Linux 中的进程栈、线程栈、内核栈、中断栈 Linux中的进程栈、线程栈、内核栈以及中断栈