1. 2号线程kthreadd

Linux内核规定,所有的线程必须由已存在的线程创建出来,也就是要求所有的task_struct都需要在已有的task_struct上复制出来。Linux可以通过kernel_thread来创建内核线程,这个函数会复制当前线程的task_struct。如果任由各个子系统或驱动自己调用kernel_thread来创建线程,那在创建内核线程时current所指向的task_struct是不确定的。为了解决这一问题,Linux内核将所有内核线程的创建交给固定的线程来做,这个线程就是2号线程kthreadd。

2. 内核线程创建的接口

内核为开发者提供了两个创建内核线程的宏kthread_create和kthread_run,kthread_create用于创建内核线程,而kthread_run则是将创建并唤醒内核线程。

 1/// include/linux/kthread.h
 2/**
 3 * kthread_create - create a kthread on the current node
 4 * @threadfn: the function to run in the thread
 5 * @data: data pointer for @threadfn()
 6 * @namefmt: printf-style format string for the thread name
 7 * @arg: arguments for @namefmt.
 8 *
 9 * This macro will create a kthread on the current node, leaving it in
10 * the stopped state.  This is just a helper for kthread_create_on_node();
11 * see the documentation there for more details.
12 */
13#define kthread_create(threadfn, data, namefmt, arg...) \
14    kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
15
16/**
17 * kthread_run - create and wake a thread.
18 * @threadfn: the function to run until signal_pending(current).
19 * @data: data ptr for @threadfn.
20 * @namefmt: printf-style name for the thread.
21 *
22 * Description: Convenient wrapper for kthread_create() followed by
23 * wake_up_process().  Returns the kthread or ERR_PTR(-ENOMEM).
24 */
25#define kthread_run(threadfn, data, namefmt, ...)			   \
26({									   \
27    struct task_struct *__k						   \
28        = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
29    if (!IS_ERR(__k))						   \
30        wake_up_process(__k);					   \
31    __k;								   \
32})

创建内核线程的核心函数是__kthread_create_on_node,其主要工作就是将需要创建的线程数据挂载到kthread_create_list链表,然后唤醒kthreadd,等待线程创建完成,最后为线程设置名字。

 1/// kernel/kthread.c
 2static __printf(4, 0)
 3struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
 4                            void *data, int node,
 5                            const char namefmt[],
 6                            va_list args)
 7{
 8    /// ... ...
 9    create->threadfn = threadfn;
10    create->data = data;
11    create->node = node;
12    create->done = &done;
13
14    spin_lock(&kthread_create_lock);
15    list_add_tail(&create->list, &kthread_create_list);
16    spin_unlock(&kthread_create_lock);
17
18    wake_up_process(kthreadd_task);
19    /// ... ...
20}

3. kthreadd线程的工作

kthreadd线程被唤醒后,从kthread_create_list取出创建线程所需要的数据,将创建线程的工作委托给create_kthread。

 1/// kernel/kthread.c
 2int kthreadd(void *unused)
 3{
 4    struct task_struct *tsk = current;
 5
 6    /// 重置上下文,其他内核线程会继承这些特性
 7    /* Setup a clean context for our children to inherit. */
 8    set_task_comm(tsk, "kthreadd");
 9    ignore_signals(tsk);
10    set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
11    set_mems_allowed(node_states[N_MEMORY]);
12
13    current->flags |= PF_NOFREEZE;
14    cgroup_init_kthreadd();
15
16    for (;;) {
17        set_current_state(TASK_INTERRUPTIBLE);
18        if (list_empty(&kthread_create_list))
19            schedule();
20        __set_current_state(TASK_RUNNING);
21
22        spin_lock(&kthread_create_lock);
23        while (!list_empty(&kthread_create_list)) {
24            struct kthread_create_info *create;
25
26            create = list_entry(kthread_create_list.next,
27                        struct kthread_create_info, list);
28            list_del_init(&create->list);
29            spin_unlock(&kthread_create_lock);
30
31            create_kthread(create);
32
33            spin_lock(&kthread_create_lock);
34        }
35        spin_unlock(&kthread_create_lock);
36    }
37
38    return 0;
39}

3.1. create_kthread

create_kthread使用kernel_thread来创建一个新的内核线程,新的内核线程通过kthread来间接调用在创建线程时指定的函数。如果失败,则检查是否是因为收到一个致命的信号导致的,如果是,则释放创建线程所需要的数据,否则通知kthread_create的调用者创建失败。

 1/// kernel/kthread.c
 2static void create_kthread(struct kthread_create_info *create)
 3{
 4    int pid;
 5
 6#ifdef CONFIG_NUMA
 7    current->pref_node_fork = create->node;
 8#endif
 9    /* We want our own signal handler (we take no signals by default). */
10    pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
11    if (pid < 0) {
12        /* Release the structure when caller killed by a fatal signal. */
13        struct completion *done = xchg(&create->done, NULL);
14
15        if (!done) {
16            kfree(create);
17            return;
18        }
19        create->result = ERR_PTR(pid);
20        complete(done);
21    }
22}

3.2. kthread

kthread运行在新线程的上下文,主要流程见代码注释。 注意新创建的kthread需要被唤醒才能运行。

 1/// kernel/kthread.c
 2static int kthread(void *_create)
 3{
 4    static const struct sched_param param = { .sched_priority = 0 };
 5    /* Copy data: it's on kthread's stack */
 6    struct kthread_create_info *create = _create;
 7    int (*threadfn)(void *data) = create->threadfn;
 8    void *data = create->data;
 9    struct completion *done;
10    struct kthread *self;
11    int ret;
12
13    self = to_kthread(current);
14
15    /* Release the structure when caller killed by a fatal signal. */
16    done = xchg(&create->done, NULL);       /// 再次检查是否有致命信号
17    if (!done) {
18        kfree(create);
19        kthread_exit(-EINTR);
20    }
21
22    self->threadfn = threadfn;
23    self->data = data;
24
25    /// 重置新进程的调度策略和cpu亲和性
26    /*
27     * The new thread inherited kthreadd's priority and CPU mask. Reset
28     * back to default in case they have been changed.
29     */
30    sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
31    set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
32
33    /* OK, tell user we're spawned, wait for stop or wakeup */
34    __set_current_state(TASK_UNINTERRUPTIBLE);      /// 新创建的进程需要被唤醒才能运行
35    create->result = current;
36    /*
37     * Thread is going to call schedule(), do not preempt it,
38     * or the creator may spend more time in wait_task_inactive().
39     */
40    preempt_disable();                  /// 禁用抢占
41    complete(done);                     /// 创建成功,通知kthread_create的调用者
42    schedule_preempt_disabled();        /// 在禁用抢占的情况下调度,防止kthread_create的调用者等待时间过长
43    preempt_enable();                   /// 重新调度
44
45    ret = -EINTR;
46    if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
47        /// 内核中cgroup_kthread_ready的注释
48        /*
49         * This kthread finished initialization.  The creator should have
50         * set PF_NO_SETAFFINITY if this kthread should stay in the root.
51         */
52        cgroup_kthread_ready();
53        __kthread_parkme(self);         /// 检查是否设置KTHREAD_SHOULD_PARK标志,如果没有,继续执行
54        ret = threadfn(data);           /// 真正的回调函数
55    }
56    kthread_exit(ret);                  /// 线程退出,回收资源
57}