1. 2号线程kthreadd
Linux内核规定,所有的线程必须由已存在的线程创建出来,也就是要求所有的task_struct都需要在已有的task_struct上复制出来。Linux可以通过kernel_thread来创建内核线程,这个函数会复制当前线程的task_struct。如果任由各个子系统或驱动自己调用kernel_thread来创建线程,那在创建内核线程时current所指向的task_struct是不确定的。为了解决这一问题,Linux内核将所有内核线程的创建交给固定的线程来做,这个线程就是2号线程kthreadd。
2. 内核线程创建的接口
内核为开发者提供了两个创建内核线程的宏kthread_create和kthread_run,kthread_create用于创建内核线程,而kthread_run则是将创建并唤醒内核线程。
1/// include/linux/kthread.h
2/**
3 * kthread_create - create a kthread on the current node
4 * @threadfn: the function to run in the thread
5 * @data: data pointer for @threadfn()
6 * @namefmt: printf-style format string for the thread name
7 * @arg: arguments for @namefmt.
8 *
9 * This macro will create a kthread on the current node, leaving it in
10 * the stopped state. This is just a helper for kthread_create_on_node();
11 * see the documentation there for more details.
12 */
13#define kthread_create(threadfn, data, namefmt, arg...) \
14 kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
15
16/**
17 * kthread_run - create and wake a thread.
18 * @threadfn: the function to run until signal_pending(current).
19 * @data: data ptr for @threadfn.
20 * @namefmt: printf-style name for the thread.
21 *
22 * Description: Convenient wrapper for kthread_create() followed by
23 * wake_up_process(). Returns the kthread or ERR_PTR(-ENOMEM).
24 */
25#define kthread_run(threadfn, data, namefmt, ...) \
26({ \
27 struct task_struct *__k \
28 = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
29 if (!IS_ERR(__k)) \
30 wake_up_process(__k); \
31 __k; \
32})
创建内核线程的核心函数是__kthread_create_on_node
,其主要工作就是将需要创建的线程数据挂载到kthread_create_list
链表,然后唤醒kthreadd,等待线程创建完成,最后为线程设置名字。
1/// kernel/kthread.c
2static __printf(4, 0)
3struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
4 void *data, int node,
5 const char namefmt[],
6 va_list args)
7{
8 /// ... ...
9 create->threadfn = threadfn;
10 create->data = data;
11 create->node = node;
12 create->done = &done;
13
14 spin_lock(&kthread_create_lock);
15 list_add_tail(&create->list, &kthread_create_list);
16 spin_unlock(&kthread_create_lock);
17
18 wake_up_process(kthreadd_task);
19 /// ... ...
20}
3. kthreadd线程的工作
kthreadd线程被唤醒后,从kthread_create_list取出创建线程所需要的数据,将创建线程的工作委托给create_kthread。
1/// kernel/kthread.c
2int kthreadd(void *unused)
3{
4 struct task_struct *tsk = current;
5
6 /// 重置上下文,其他内核线程会继承这些特性
7 /* Setup a clean context for our children to inherit. */
8 set_task_comm(tsk, "kthreadd");
9 ignore_signals(tsk);
10 set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD));
11 set_mems_allowed(node_states[N_MEMORY]);
12
13 current->flags |= PF_NOFREEZE;
14 cgroup_init_kthreadd();
15
16 for (;;) {
17 set_current_state(TASK_INTERRUPTIBLE);
18 if (list_empty(&kthread_create_list))
19 schedule();
20 __set_current_state(TASK_RUNNING);
21
22 spin_lock(&kthread_create_lock);
23 while (!list_empty(&kthread_create_list)) {
24 struct kthread_create_info *create;
25
26 create = list_entry(kthread_create_list.next,
27 struct kthread_create_info, list);
28 list_del_init(&create->list);
29 spin_unlock(&kthread_create_lock);
30
31 create_kthread(create);
32
33 spin_lock(&kthread_create_lock);
34 }
35 spin_unlock(&kthread_create_lock);
36 }
37
38 return 0;
39}
3.1. create_kthread
create_kthread使用kernel_thread来创建一个新的内核线程,新的内核线程通过kthread来间接调用在创建线程时指定的函数。如果失败,则检查是否是因为收到一个致命的信号导致的,如果是,则释放创建线程所需要的数据,否则通知kthread_create的调用者创建失败。
1/// kernel/kthread.c
2static void create_kthread(struct kthread_create_info *create)
3{
4 int pid;
5
6#ifdef CONFIG_NUMA
7 current->pref_node_fork = create->node;
8#endif
9 /* We want our own signal handler (we take no signals by default). */
10 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
11 if (pid < 0) {
12 /* Release the structure when caller killed by a fatal signal. */
13 struct completion *done = xchg(&create->done, NULL);
14
15 if (!done) {
16 kfree(create);
17 return;
18 }
19 create->result = ERR_PTR(pid);
20 complete(done);
21 }
22}
3.2. kthread
kthread运行在新线程的上下文,主要流程见代码注释。 注意新创建的kthread需要被唤醒才能运行。
1/// kernel/kthread.c
2static int kthread(void *_create)
3{
4 static const struct sched_param param = { .sched_priority = 0 };
5 /* Copy data: it's on kthread's stack */
6 struct kthread_create_info *create = _create;
7 int (*threadfn)(void *data) = create->threadfn;
8 void *data = create->data;
9 struct completion *done;
10 struct kthread *self;
11 int ret;
12
13 self = to_kthread(current);
14
15 /* Release the structure when caller killed by a fatal signal. */
16 done = xchg(&create->done, NULL); /// 再次检查是否有致命信号
17 if (!done) {
18 kfree(create);
19 kthread_exit(-EINTR);
20 }
21
22 self->threadfn = threadfn;
23 self->data = data;
24
25 /// 重置新进程的调度策略和cpu亲和性
26 /*
27 * The new thread inherited kthreadd's priority and CPU mask. Reset
28 * back to default in case they have been changed.
29 */
30 sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
31 set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD));
32
33 /* OK, tell user we're spawned, wait for stop or wakeup */
34 __set_current_state(TASK_UNINTERRUPTIBLE); /// 新创建的进程需要被唤醒才能运行
35 create->result = current;
36 /*
37 * Thread is going to call schedule(), do not preempt it,
38 * or the creator may spend more time in wait_task_inactive().
39 */
40 preempt_disable(); /// 禁用抢占
41 complete(done); /// 创建成功,通知kthread_create的调用者
42 schedule_preempt_disabled(); /// 在禁用抢占的情况下调度,防止kthread_create的调用者等待时间过长
43 preempt_enable(); /// 重新调度
44
45 ret = -EINTR;
46 if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
47 /// 内核中cgroup_kthread_ready的注释
48 /*
49 * This kthread finished initialization. The creator should have
50 * set PF_NO_SETAFFINITY if this kthread should stay in the root.
51 */
52 cgroup_kthread_ready();
53 __kthread_parkme(self); /// 检查是否设置KTHREAD_SHOULD_PARK标志,如果没有,继续执行
54 ret = threadfn(data); /// 真正的回调函数
55 }
56 kthread_exit(ret); /// 线程退出,回收资源
57}