rcu_scheduler_starting(); //启动RCU机制,这个与后面的rcu_read_lock和rcu_read_unlock是配套的,用于多核同步 /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */
/* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current);//current表示当前进程,当前0号进程init_task设置为idle进程 schedule_preempt_disabled(); //0号进程主动请求调度,让出cpu,1号进程kernel_init将会运行,并且禁止抢占 /* Call into cpu_idle with preempt disabled */ cpu_startup_entry(CPUHP_ONLINE);// 这个函数会调用cpu_idle_loop()使得idle进程进入自己的事件处理循环 }
/* * This function is invoked towards the end of the scheduler's initialization * process. Before this is called, the idle task might contain * RCU read-side critical sections (during which time, this idle * task is booting the system). After this function is called, the * idle tasks are prohibited from containing RCU read-side critical * sections. This function also enables RCU lockdep checking. */ void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); //WARN_ON相当于警告,会打印出当前栈信息,不会重启, //num_online_cpus表示当前启动的cpu数
/* Reset policy of current process to default */ void numa_default_policy(void) { do_set_mempolicy(MPOL_DEFAULT, 0, NULL); //设定NUMA系统的内存访问策略为MPOL_DEFAULT }
void cpu_startup_entry(enum cpuhp_state state) { /* * This #ifdef needs to die, but it's too late in the cycle to * make this generic (arm and sh have never invoked the canary * init for the non boot cpus!). Will be fixed in 3.11 */ /* * 1.C语言中#ifdef和#else、#endif是条件编译语句,也就是说在满足某些条件的时候, * 夹在这几个关键字中间的代码才编译,不满足就不编译 * 2.下面这句话的意思就是如果定义了CONFIG_X86这个宏,就把boot_init_stack_canary这个代码编译进去 */ #ifdef CONFIG_X86 /* * If we're the non-boot CPU, nothing set the stack canary up * for us. The boot CPU already has it initialized but no harm * in doing it again. This is a good place for updating it, as * we wont ever return from this function (so the invalid * canaries already on the stack wont ever trigger). */ boot_init_stack_canary();//只有在x86这种non-boot CPU机器上执行,该函数主要用于初始化stack_canary的值,用于防止栈溢出 #endif __current_set_polling(); //设置本架构下面有标示轮询poll的bit位,保证cpu进行重新调度。 arch_cpu_idle_prepare(); //进行idle前的准备工作,ARM64中没有实现 per_cpu(idle_force_poll, smp_processor_id()) = 0; cpu_idle_loop(); //进入idle进程的事件循环 }
/* * In poll mode we reenable interrupts and spin. * * Also if we detected in the wakeup from idle * path that the tick broadcast device expired * for us, we don't want to go deep idle as we * know that the IPI is going to arrive right * away */ if (cpu_idle_force_poll || tick_check_broadcast_expired() || __get_cpu_var(idle_force_poll)) { cpu_idle_poll(); //进入 CPU 的poll mode模式,避免进入深度睡眠,可以处理 处理器间中断 } else { if (!current_clr_polling_and_test()) { stop_critical_timings(); rcu_idle_enter(); arch_cpu_idle(); //进入 CPU 的 idle 模式,省电 WARN_ON_ONCE(irqs_disabled()); rcu_idle_exit(); start_critical_timings(); } else { local_irq_enable(); } __current_set_polling(); } arch_cpu_idle_exit(); } tick_nohz_idle_exit(); //如果有进程需要调度,则先开启周期时钟 schedule_preempt_disabled(); //让出cpu,执行调度 if (cpu_is_offline(smp_processor_id())) //如果当前cpu处理offline状态,关闭idle进程 arch_cpu_idle_dead();
int kthreadd(void *unused) { struct task_struct *tsk = current;
/* Setup a clean context for our children to inherit. */ set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); set_cpus_allowed_ptr(tsk, cpu_all_mask); // 允许kthreadd在任意CPU上运行 set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
for (;;) { set_current_state(TASK_INTERRUPTIBLE); //首先将线程状态设置为 TASK_INTERRUPTIBLE, //如果当前没有要创建的线程则主动放弃 CPU 完成调度.此进程变为阻塞态
/* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); create->result = current; complete(&create->done); //表示线程创建完毕 schedule(); //让出CPU,注意这里并没有执行新线程的threadfn函数就直接进入睡眠了,然后等待线程被手动唤醒,然后才执行threadfn
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) { __kthread_parkme(&self); ret = threadfn(data); } /* we can't just return, we must preserve "self" on stack */ do_exit(ret); }
/** * kthread_create_on_node - create a kthread. * @threadfn: the function to run until signal_pending(current). * @data: data ptr for @threadfn. * @node: memory node number. * @namefmt: printf-style name for the thread. * * Description: This helper function creates and names a kernel * thread. The thread will be stopped: use wake_up_process() to start * it. See also kthread_run(). * * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give -1. * When woken, the thread will run @threadfn() with @data as its * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero * or a negative error number; it will be passed to kthread_stop(). * * Returns a task_struct or ERR_PTR(-ENOMEM). */ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, int node, const char namefmt[], ...) { struct kthread_create_info create;
va_start(args, namefmt); vsnprintf(create.result->comm, sizeof(create.result->comm), namefmt, args); va_end(args); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. */ sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); //create.result类型为task_struct, //该函数作用是设置新线程调度策略,SCHED_NORMAL 普通调度策略,非实时, //优先级低于实时调度策略SCHED_FIFO和SCHED_RR,param的优先级上面定义为0
/* * __ref 这个跟之前讲的__init作用一样 */ static int __ref kernel_init(void *unused) { kernel_init_freeable(); //进行init进程的一些初始化操作 /* need to finish all async __init code before freeing the memory */ async_synchronize_full();// 等待所有异步调用执行完成,,在释放内存前,必须完成所有的异步 __init 代码 free_initmem();// 释放所有init.* 段中的内存 mark_rodata_ro(); //arm64空实现 system_state = SYSTEM_RUNNING;// 设置系统状态为运行状态 numa_default_policy(); // 设定NUMA系统的默认内存访问策略
flush_delayed_fput(); // 释放所有延时的struct file结构体
if (ramdisk_execute_command) { //ramdisk_execute_command的值为"/init" if (!run_init_process(ramdisk_execute_command)) //运行根目录下的init程序 return 0; pr_err("Failed to execute %s\n", ramdisk_execute_command); }
/* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are * trying to recover a really broken machine. */ if (execute_command) { //execute_command的值如果有定义就去根目录下找对应的应用程序,然后启动 if (!run_init_process(execute_command)) return 0; pr_err("Failed to execute %s. Attempting defaults...\n", execute_command); } if (!run_init_process("/sbin/init") || //如果ramdisk_execute_command和execute_command定义的应用程序都没有找到, //就到根目录下找 /sbin/init,/etc/init,/bin/init,/bin/sh 这四个应用程序进行启动
static noinline void __init kernel_init_freeable(void) { /* * Wait until kthreadd is all set-up. */ wait_for_completion(&kthreadd_done); //等待&kthreadd_done这个值complete,这个在rest_init方法中有写,在ktreadd进程启动完成后设置为complete
/* Now the scheduler is fully set up and can do blocking allocations */ gfp_allowed_mask = __GFP_BITS_MASK;//设置bitmask, 使得init进程可以使用PM并且允许I/O阻塞操作
/* * init can allocate pages on any node */ set_mems_allowed(node_states[N_MEMORY]);//init进程可以分配物理页面 /* * init can run on any cpu. */ set_cpus_allowed_ptr(current, cpu_all_mask); //init进程可以在任意cpu上执行
/* Open the /dev/console on the rootfs, this should never fail */ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) // 打开/dev/console, //文件号0,作为init进程标准输入
pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0);// 标准输入 (void) sys_dup(0);// 标准输出 /* * check if there is an early userspace init. If yes, let it do all * the work */
if (!ramdisk_execute_command) //如果 ramdisk_execute_command 没有赋值,则赋值为"/init",之前有讲到 ramdisk_execute_command = "/init";
/* * Ok, we have completed the initial bootup, and * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */
/* rootfs is available now, try loading default modules */ load_default_modules(); // 加载I/O调度的电梯算法 }
/* * Ok, the machine is now initialized. None of the devices * have been touched yet, but the CPU subsystem is up and * running, and memory and process management works. * * Now we can finally start doing some real work.. */ static void __init do_basic_setup(void) { cpuset_init_smp();//针对SMP系统,初始化内核control group的cpuset子系统。 usermodehelper_init();// 创建khelper单线程工作队列,用于协助新建和运行用户空间程序 shmem_init();// 初始化共享内存 driver_init();// 初始化设备驱动,比较重要下面单独讲 init_irq_proc();//创建/proc/irq目录, 并初始化系统中所有中断对应的子目录 do_ctors();// 执行内核的构造函数 usermodehelper_enable();// 启用usermodehelper do_initcalls();//遍历initcall_levels数组,调用里面的initcall函数,这里主要是对设备、驱动、文件系统进行初始化, //之所有将函数封装到数组进行遍历,主要是为了好扩展
/** * driver_init - initialize driver model. * * Call the driver model init functions to initialize their * subsystems. Called early from init/main.c. */ void __init driver_init(void) { /* These are the core pieces */ devtmpfs_init();// 注册devtmpfs文件系统,启动kdevtmpfs进程 devices_init();// 初始化驱动模型中的部分子系统,kset:devices 和 kobject:dev、 dev/block、 dev/char buses_init();// 初始化驱动模型中的bus子系统,kset:bus、devices/system classes_init();// 初始化驱动模型中的class子系统,kset:class firmware_init();// 初始化驱动模型中的firmware子系统 ,kobject:firmware hypervisor_init();// 初始化驱动模型中的hypervisor子系统,kobject:hypervisor
/* These are also core pieces, but must come after the * core core pieces. */ platform_bus_init();// 初始化驱动模型中的bus/platform子系统,这个节点是所有platform设备和驱动的总线类型, //即所有platform设备和驱动都会挂载到这个总线上