Linux內核進程創(chuàng)建do_fork()解析(一文解決~)
內核創(chuàng)建進行所進行的工作,本文閱讀的內核代碼為Linux kernel 2.6。
進程創(chuàng)建的大部分工作由do_fork這個函數完成,函數原型如下:
long do_fork(unsigned long clone_flags,
? ? ? ? ?unsigned long stack_start,
? ? ? ? ?struct pt_regs *regs,
? ? ? ? ?unsigned long stack_size,
? ? ? ? ?int __user *parent_tidptr,
? ? ? ? ?int __user *child_tidptr)
1、首先進行一些參數及權限的檢查。
if (clone_flags & CLONE_NEWUSER) {
? ? ? ?if (clone_flags & CLONE_THREAD)
? ? ? ? ? ?return -EINVAL;
? ? ? ?/* hopefully this check will go away when userns support is
? ? ? ? * complete
? ? ? ? */
? ? ? ?if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
? ? ? ? ? ? ? ?!capable(CAP_SETGID))
? ? ? ? ? ?return -EPERM;
? ?}
2、進行狀態(tài)的檢查。這里主要是進行進程停止狀態(tài)的檢查。
if (unlikely(clone_flags & CLONE_STOPPED)) {
? ? ? ?static int __read_mostly count = 100;
? ? ? ?if (count > 0 && printk_ratelimit()) {
? ? ? ? ? ?char comm[TASK_COMM_LEN];
? ? ? ? ? ?count--;
? ? ? ? ? ?printk(KERN_INFO "fork(): process `%s' used deprecated "
? ? ? ? ? ? ? ? ? ?"clone flags 0x%lx\n",
? ? ? ? ? ? ? ?get_task_comm(comm, current),
? ? ? ? ? ? ? ?clone_flags & CLONE_STOPPED);
? ? ? ?}
? ?}
3、用戶空間檢查,下面這段代碼比較有用。主要是進行準備復制準備工作然后復制當前進程。
/* ? * When called from kernel_thread, don't do user tracing stuff. ? */
if (likely(user_mode(regs))) ? trace = tracehook_prepare_clone(clone_flags);
p = copy_process(clone_flags, stack_start, regs, stack_size, ? ? child_tidptr, NULL, trace);
首先是用戶空間壓棧操作,保存寄存器。其中regs是這么一個參數。
struct pt_regs {
unsigned long long pc;
unsigned long long sr;
long long syscall_nr;
unsigned long long regs[63];
unsigned long long tregs[8];
unsigned long long pad[2];
};
從結構體中的成員可以看到,包含
pc:程序計數器
sr:scratch寄存器
syscall_nr:系統(tǒng)調用
總之,這里是包含進程在退出cpu時所需的最小信息。
p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace);
這里即復制一個進程。返回值為task_struct的結構體,該結構描述了一個進程的基本狀態(tài)。這里并不進行詳細的介紹。
【文章福利】小編推薦自己的Linux內核技術交流群:【891587639】整理了一些個人覺得比較好的學習書籍、視頻資料共享在群文件里面,有需要的可以自行添加哦?。。。ê曨l教程、電子書、實戰(zhàn)項目及代碼)??


4,、對創(chuàng)建的進程進行一些錯誤檢查。這里發(fā)生的可能性不大,可以暫時先不管,把握住我們的主線。
if (!IS_ERR(p)) { ? struct completion vfork;
?trace_sched_process_fork(current, p);
?nr = task_pid_vnr(p);
?if (clone_flags & CLONE_PARENT_SETTID) ? ?put_user(nr, parent_tidptr);
?if (clone_flags & CLONE_VFORK) { ? ?p->vfork_done = &vfork; ? ?init_completion(&vfork); ?
}
5、然后接著下面兩個函數。
audit_finish_fork(p);//主要是檢查完成的進程的狀態(tài)。 tracehook_report_clone(regs, clone_flags, nr, p);//主要是阻塞剛剛創(chuàng)建的子進程,因為現在還是在父進程進程中,子進程并未開始執(zhí)行,暫時掛起子進程。下面為函數解釋。
/**
* tracehook_report_clone - in parent, new child is about to start running
* @regs: ?parent's user register state
* @clone_flags: flags from parent's system call
* @pid: ?new child's PID in the parent's namespace
* @child: ?new child task
*
* Called after a child is set up, but before it has been started running.
* This is not a good place to block, because the child has not started
* yet. ?Suspend the child here if desired, and then block in
* tracehook_report_clone_complete(). ?This must prevent the child from
* self-reaping if tracehook_report_clone_complete() uses the @child
* pointer; otherwise it might have died and been released by the time
* tracehook_report_clone_complete() is called.
*
* Called with no locks held, but the child cannot run until this returns.
*/
6、設置進程標志位。
/*
? * We set PF_STARTING at creation in case tracing wants to
? * use this to distinguish a fully live task from one that
? * hasn't gotten to tracehook_report_clone() yet. ?Now we
? * clear it and set the child going.
? */
?p->flags &= ~PF_STARTING;
(PF_STARTING宏定義解釋為:#define PF_STARTING 0x00000002 /* being created */,表明該進程已創(chuàng)建)
7、喚醒進程。這里先判斷復制標志是否為 CLONE_STOPPED狀態(tài),但是大多數情形下,并不為CLONE_STOPPED狀態(tài)。
if (unlikely(clone_flags & CLONE_STOPPED)) {
? /*
? ?* We'll start up with an immediate SIGSTOP.
? ?*/
? sigaddset(&p->pending.signal, SIGSTOP);
? set_tsk_thread_flag(p, TIF_SIGPENDING);
? __set_task_state(p, TASK_STOPPED);
?} else {
? wake_up_new_task(p, clone_flags);
?}
?tracehook_report_clone_complete(trace, regs,
? ? ?clone_flags, nr, p);/*這個函數主要是報告當前之前阻塞的子進程已經開始運行*/
下面是wake_up_new_task函數功能解釋。主要功能是首次喚醒創(chuàng)建的進程,同時完成一些初始化調度的所需的工作,并將進程放入運行隊列中。
關于一個進程如何添加到隊列中去,可以從這里進行研究。本文先不對此進行研究,還是放在進程的創(chuàng)建上來。
/*
* wake_up_new_task - wake up a newly created task for the first time.
*
* This function will do some initial scheduler statistics housekeeping
* that must be done for every newly created context, then puts the task
* on the runqueue and wakes it.
*/
8、檢查clone_flags標志位。若當前標志位為 CLONE_VFORK(#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */),該標志位表明父進程想在內存釋放后喚醒, wait_for_completion(&vfork);中有個自旋鎖的操作,主要是等待由用戶空間返回內核空間。
if (clone_flags & CLONE_VFORK) {
? freezer_do_not_count();
? wait_for_completion(&vfork);
? freezer_count();
? tracehook_report_vfork_done(p, nr);
?}
9、完成所有操作,返回。其中返回值為新的進程的pid。
} else {
?nr = PTR_ERR(p);
}
return nr;
10、下面是完整的程序。
/*
* ?Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
long do_fork(unsigned long clone_flags,
? ? ? ? ?unsigned long stack_start,
? ? ? ? ?struct pt_regs *regs,
? ? ? ? ?unsigned long stack_size,
? ? ? ? ?int __user *parent_tidptr,
? ? ? ? ?int __user *child_tidptr)
{
? ?struct task_struct *p;
? ?int trace = 0;
? ?long nr;
? ?/*
? ? * Do some preliminary argument and permissions checking before we
? ? * actually start allocating stuff
? ? */
? ?if (clone_flags & CLONE_NEWUSER) {
? ? ? ?if (clone_flags & CLONE_THREAD)
? ? ? ? ? ?return -EINVAL;
? ? ? ?/* hopefully this check will go away when userns support is
? ? ? ? * complete
? ? ? ? */
? ? ? ?if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
? ? ? ? ? ? ? ?!capable(CAP_SETGID))
? ? ? ? ? ?return -EPERM;
? ?}
? ?/*
? ? * We hope to recycle these flags after 2.6.26
? ? */
? ?if (unlikely(clone_flags & CLONE_STOPPED)) {
? ? ? ?static int __read_mostly count = 100;
? ? ? ?if (count > 0 && printk_ratelimit()) {
? ? ? ? ? ?char comm[TASK_COMM_LEN];
? ? ? ? ? ?count--;
? ? ? ? ? ?printk(KERN_INFO "fork(): process `%s' used deprecated "
? ? ? ? ? ? ? ? ? ?"clone flags 0x%lx\n",
? ? ? ? ? ? ? ?get_task_comm(comm, current),
? ? ? ? ? ? ? ?clone_flags & CLONE_STOPPED);
? ? ? ?}
? ?}
? ?/*
? ? * When called from kernel_thread, don't do user tracing stuff.
? ? */
? ?if (likely(user_mode(regs)))
? ? ? ?trace = tracehook_prepare_clone(clone_flags);
? ?p = copy_process(clone_flags, stack_start, regs, stack_size,
? ? ? ? ? ? child_tidptr, NULL, trace);
? ?/*
? ? * Do this prior waking up the new thread - the thread pointer
? ? * might get invalid after that point, if the thread exits quickly.
? ? */
? ?if (!IS_ERR(p)) {
? ? ? ?struct completion vfork;
? ? ? ?trace_sched_process_fork(current, p);
? ? ? ?nr = task_pid_vnr(p);
? ? ? ?if (clone_flags & CLONE_PARENT_SETTID)
? ? ? ? ? ?put_user(nr, parent_tidptr);
? ? ? ?if (clone_flags & CLONE_VFORK) {
? ? ? ? ? ?p->vfork_done = &vfork;
? ? ? ? ? ?init_completion(&vfork);
? ? ? ?}
? ? ? ?audit_finish_fork(p);
? ? ? ?tracehook_report_clone(regs, clone_flags, nr, p);
? ? ? ?/*
? ? ? ? * We set PF_STARTING at creation in case tracing wants to
? ? ? ? * use this to distinguish a fully live task from one that
? ? ? ? * hasn't gotten to tracehook_report_clone() yet. ?Now we
? ? ? ? * clear it and set the child going.
? ? ? ? */
? ? ? ?p->flags &= ~PF_STARTING;
? ? ? ?if (unlikely(clone_flags & CLONE_STOPPED)) {
? ? ? ? ? ?/*
? ? ? ? ? ? * We'll start up with an immediate SIGSTOP.
? ? ? ? ? ? */
? ? ? ? ? ?sigaddset(&p->pending.signal, SIGSTOP);
? ? ? ? ? ?set_tsk_thread_flag(p, TIF_SIGPENDING);
? ? ? ? ? ?__set_task_state(p, TASK_STOPPED);
? ? ? ?} else {
? ? ? ? ? ?wake_up_new_task(p, clone_flags);
? ? ? ?}
? ? ? ?tracehook_report_clone_complete(trace, regs,
? ? ? ? ? ? ? ? ? ? ? ?clone_flags, nr, p);
? ? ? ?if (clone_flags & CLONE_VFORK) {
? ? ? ? ? ?freezer_do_not_count();
? ? ? ? ? ?wait_for_completion(&vfork);
? ? ? ? ? ?freezer_count();
? ? ? ? ? ?tracehook_report_vfork_done(p, nr);
? ? ? ?}
? ?} else {
? ? ? ?nr = PTR_ERR(p);
? ?}
? ?return nr;
}
