基于mykernel完成多进程的简单内核
本文引用: https://github.com/mengning/linuxkernel/ 本文感谢孟宁老师的指导,让我收获颇多。 署名:426
完成一个简单的时间片轮转多道程序内核,可以连接对于内核的工作原理。
实验环境:Ubuntu16.04
1.搭建内核的编译环境:
- sudo apt-get install qemu # install QEMU
- sudo ln -s /usr/bin/qemu-system-i386 /usr/bin/qemu
- wget https://www.kernel.org/pub/linux/kernel/v3.x/linux-3.9.4.tar.xz # download Linux Kernel 3.9.4 source code
- wget https://raw.github.com/mengning/mykernel/master/mykernel_for_linux3.9.4sc.patch # download mykernel_for_linux3.9.4sc.patch
- xz -d linux-3.9.4.tar.xz
- tar -xvf linux-3.9.4.tar
- cd linux-3.9.4
- patch -p1 < ../mykernel_for_linux3.9.4sc.patch
- make allnoconfig
-
make
- qemu -kernel arch/x86/boot/bzImage
下载搭建环境必要的包,一次运行如上指令。可以看到一个基本的内核运行。
从qemu窗口中可以看到my_start_kernel在执行,同时my_timer_handler时钟中断处理程序周期性执行。
cd mykernel 可以看到mymain.c和myinterrupt.c两个文件:
mymain.c
1 #ifdef CONFIG_X86_LOCAL_APIC 2 #include <asm/smp.h> 3 #endif 4 5 void __init my_start_kernel(void) 6 { 7 int i = 0; 8 while(1) 9 { 10 i++; 11 if(i%100000 == 0) 12 printk(KERN_NOTICE "my_start_kernel here %d \n",i); 13 14 } 15 }
__init my_start_kernel(void) 是内核启动的入口, 执行十万次循环打印一次。
myinterrupt.c
#define CREATE_TRACE_POINTS #include <trace/events/timer.h> /* * Called by timer interrupt. */ void my_timer_handler(void) { printk(KERN_NOTICE "\n>>>>>>>>>>>>>>>>>my_timer_handler here<<<<<<<<<<<<<<<<<<\n\n"); }
这是时钟中断,中断一次输出一次。
要实现时间片轮转调度。可以参考代码:https://github.com/mengning/mykernel/tree/master/mykernel-1.1
将上述拷贝到mykernel文件下,删除原来目录下的.o文件,重新编译。
mypcb.c
1 /* 2 *定义PCB进程信息 3 */ 4 #define MAX_TASK_NUM 10 // max num of task in system 5 #define KERNEL_STACK_SIZE 1024*8 6 #define PRIORITY_MAX 30 //priority range from 0 to 30 7 8 /* CPU-specific state of this task */ 9 struct Thread { 10 unsigned long ip;//point to cpu run address 11 unsigned long sp;//point to the thread stack's top address 12 //todo add other attrubte of system thread 13 }; 14 //PCB Struct 15 typedef struct PCB{ 16 int pid; // pcb id 17 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ 18 char stack[KERNEL_STACK_SIZE];// each pcb stack size is 1024*8 19 /* CPU-specific state of this task */ 20 struct Thread thread; 21 unsigned long task_entry;//the task execute entry memory address 22 struct PCB *next;//pcb is a circular linked list 23 unsigned long priority;// task priority //////// 24 //todo add other attrubte of process control block 25 }tPCB; 26 27 //void my_schedule(int pid); 28 void my_schedule(void);
mypcb.h是一个头文件,定义了一些常量,最大进程数量,pcb栈的大小,优先级。还有两个结构体,一个Thread,一个PCB(进程控制块,Processing Control Block),PCB存放的是一个任务进程的必要信息。进程控制块是系统为了管理进程设置的一个专门的数据结构,主要表示进程状态。每一个进程都对应一个PCB来维护进程相关的信息。声明了my_schedule()函数。
mymain.c
1 #include <linux/types.h> 2 #include <linux/module.h> 3 #include <linux/proc_fs.h> 4 #include <linux/kernel.h> 5 #include <linux/syscalls.h> 6 #include <linux/stackprotector.h> 7 #include <linux/string.h> 8 #include <linux/ctype.h> 9 #include <linux/delay.h> 10 #include <linux/ioport.h> 11 #include <linux/init.h> 12 #include <linux/initrd.h> 13 #include <linux/bootmem.h> 14 #include <linux/acpi.h> 15 #include <linux/tty.h> 16 #include <linux/percpu.h> 17 #include <linux/kmod.h> 18 #include <linux/vmalloc.h> 19 #include <linux/kernel_stat.h> 20 #include <linux/start_kernel.h> 21 #include <linux/security.h> 22 #include <linux/smp.h> 23 #include <linux/profile.h> 24 #include <linux/rcupdate.h> 25 #include <linux/moduleparam.h> 26 #include <linux/kallsyms.h> 27 #include <linux/writeback.h> 28 #include <linux/cpu.h> 29 #include <linux/cpuset.h> 30 #include <linux/cgroup.h> 31 #include <linux/efi.h> 32 #include <linux/tick.h> 33 #include <linux/interrupt.h> 34 #include <linux/taskstats_kern.h> 35 #include <linux/delayacct.h> 36 #include <linux/unistd.h> 37 #include <linux/rmap.h> 38 #include <linux/mempolicy.h> 39 #include <linux/key.h> 40 #include <linux/buffer_head.h> 41 #include <linux/page_cgroup.h> 42 #include <linux/debug_locks.h> 43 #include <linux/debugobjects.h> 44 #include <linux/lockdep.h> 45 #include <linux/kmemleak.h> 46 #include <linux/pid_namespace.h> 47 #include <linux/device.h> 48 #include <linux/kthread.h> 49 #include <linux/sched.h> 50 #include <linux/signal.h> 51 #include <linux/idr.h> 52 #include <linux/kgdb.h> 53 #include <linux/ftrace.h> 54 #include <linux/async.h> 55 #include <linux/kmemcheck.h> 56 #include <linux/sfi.h> 57 #include <linux/shmem_fs.h> 58 #include <linux/slab.h> 59 #include <linux/perf_event.h> 60 #include <linux/file.h> 61 #include <linux/ptrace.h> 62 #include <linux/blkdev.h> 63 #include <linux/elevator.h> 64 65 #include <asm/io.h> 66 #include <asm/bugs.h> 67 #include <asm/setup.h> 68 #include <asm/sections.h> 69 #include <asm/cacheflush.h> 70 71 #include <linux/vmalloc.h> 72 73 #include <linux/module.h> 74 #include <linux/kernel.h> 75 76 #ifdef CONFIG_X86_LOCAL_APIC 77 #include <asm/smp.h> 78 #endif 79 #include "mypcb.h" 80 81 tPCB task[MAX_TASK_NUM]; 82 tPCB * my_current_task = NULL; 83 volatile int my_need_sched = 0; 84 85 void my_process(void); 86 unsigned long get_rand(int ); 87 88 void sand_priority(void) 89 { 90 int i; 91 for(i=0;i<MAX_TASK_NUM;i++) 92 task[i].priority=get_rand(PRIORITY_MAX); 93 } 94 void __init my_start_kernel(void) 95 { 96 int pid = 0; 97 /* Initialize process 0*/ 98 task[pid].pid = pid; 99 task[pid].state = 0;/* -1 unrunnable, 0 runnable, >0 stopped */ 100 // set task 0 execute entry address to my_process 101 task[pid].task_entry = task[pid].thread.ip = (unsigned long)my_process; 102 task[pid].thread.sp = (unsigned long)&task[pid].stack[KERNEL_STACK_SIZE-1]; 103 task[pid].next = &task[pid]; 104 /*fork more process */ 105 for(pid=1;pid<MAX_TASK_NUM;pid++) 106 { 107 memcpy(&task[pid],&task[0],sizeof(tPCB)); 108 task[pid].pid = pid; 109 task[pid].state = -1; 110 task[pid].thread.sp = (unsigned long)&task[pid].stack[KERNEL_STACK_SIZE-1]; 111 task[pid].priority=get_rand(PRIORITY_MAX);//each time all tasks get a random priority 112 } 113 task[MAX_TASK_NUM-1].next=&task[0]; 114 printk(KERN_NOTICE "\n\n\n\n\n\n system begin :>>>process 0 running!!!<<<\n\n"); 115 /* start process 0 by task[0] */ 116 pid = 0; 117 my_current_task = &task[pid]; 118 asm volatile( 119 "movl %1,%%esp\n\t" /* set task[pid].thread.sp to esp */ 120 "pushl %1\n\t" /* push ebp */ 121 "pushl %0\n\t" /* push task[pid].thread.ip */ 122 "ret\n\t" /* pop task[pid].thread.ip to eip */ 123 "popl %%ebp\n\t" 124 : 125 : "c" (task[pid].thread.ip),"d" (task[pid].thread.sp) /* input c or d mean %ecx/%edx*/ 126 ); 127 } 128 void my_process(void) 129 { 130 int i = 0; 131 while(1) 132 { 133 i++; 134 if(i%10000000 == 0) 135 { 136 137 if(my_need_sched == 1) 138 { 139 my_need_sched = 0; 140 sand_priority(); 141 my_schedule(); 142 143 } 144 } 145 } 146 }//end of my_process 147 148 //produce a random priority to a task 149 unsigned long get_rand(max) 150 { 151 unsigned long a; 152 unsigned long umax; 153 umax=(unsigned long)max; 154 get_random_bytes(&a, sizeof(unsigned long )); 155 a=(a+umax)%umax; 156 return a; 157 }
先定义一个PCB数组task,一个指向当前进程的指针,一个volatile常量。
sand_priority(void)函数是一个变换进程优先级的函数。里面用到了一个自定义函数unsigned long get_rand(max)。
unsigned long get_rand(max)函数是一个得到随机数的函数,调用了get_random_bytes()系统函数。
void get_random_bytes(void *buf, int nbytes)函数是内核获取随机数的函数。
__init my_start_kernel(void)是一个主函数,系统入口。先初始化了一个PCB,即task[0],然后用for循环,用memcpy()系统函数复制PCB,再修改一些信息,初始化整个PCB数组。
memcpy函数的功能是从源src所指的内存地址的起始位置开始拷贝n个字节到目标dest所指的内存地址的起始位置中。task[pid].task_entry = task[pid].thread.ip = (unsigned long)my_process让当前进程运行时调用my_process(void)。
下面是一些内联汇编,作用是调用函数框架,call eip改变eip,执行第一个进程,调用my_process(void)。
my_process(void)函数是一个无限循环,每当循环10000000判断一次,my_need_sched常量,为1置0,再sand_priority(),my_schedule()任务调度(在myinterrupt.c); 否则跳过。
myinterrupt.c
1 #include <linux/kernel_stat.h> 2 #include <linux/export.h> 3 #include <linux/interrupt.h> 4 #include <linux/percpu.h> 5 #include <linux/init.h> 6 #include <linux/mm.h> 7 #include <linux/swap.h> 8 #include <linux/pid_namespace.h> 9 #include <linux/notifier.h> 10 #include <linux/thread_info.h> 11 #include <linux/time.h> 12 #include <linux/jiffies.h> 13 #include <linux/posix-timers.h> 14 #include <linux/cpu.h> 15 #include <linux/syscalls.h> 16 #include <linux/delay.h> 17 #include <linux/tick.h> 18 #include <linux/kallsyms.h> 19 #include <linux/irq_work.h> 20 #include <linux/sched.h> 21 #include <linux/sched/sysctl.h> 22 #include <linux/slab.h> 23 24 #include <asm/uaccess.h> 25 #include <asm/unistd.h> 26 #include <asm/div64.h> 27 #include <asm/timex.h> 28 #include <asm/io.h> 29 30 #include <linux/types.h> 31 #include <linux/string.h> 32 #include <linux/ctype.h> 33 #include <linux/tty.h> 34 #include <linux/vmalloc.h> 35 36 #include "mypcb.h" 37 38 #define CREATE_TRACE_POINTS 39 #include <trace/events/timer.h> 40 41 extern tPCB task[MAX_TASK_NUM]; 42 extern tPCB * my_current_task; 43 extern volatile int my_need_sched; 44 volatile int time_count = 0; 45 46 /* 47 * Called by timer interrupt. 48 * it runs in the name of current running process, 49 * so it use kernel stack of current running process 50 */ 51 void my_timer_handler(void) 52 { 53 #if 1 54 // make sure need schedule after system circle 2000 times. 55 if(time_count%2000 == 0 && my_need_sched != 1) 56 { 57 my_need_sched = 1; 58 //time_count=0; 59 } 60 time_count ++ ; 61 #endif 62 return; 63 } 64 65 void all_task_print(void); 66 67 tPCB * get_next(void) 68 { 69 int pid,i; 70 tPCB * point=NULL; 71 tPCB * hig_pri=NULL;//points to the the hightest task 72 all_task_print(); 73 hig_pri=my_current_task; 74 for(i=0;i<MAX_TASK_NUM;i++) 75 if(task[i].priority<hig_pri->priority) 76 hig_pri=&task[i]; 77 printk(" higst process is:%d priority is:%d\n",hig_pri->pid,hig_pri->priority); 78 return hig_pri; 79 80 }//end of priority_schedule 81 82 void my_schedule(void) 83 { 84 tPCB * next; 85 tPCB * prev; 86 // if there no task running or only a task ,it shouldn't need schedule 87 if(my_current_task == NULL 88 || my_current_task->next == NULL) 89 { 90 printk(KERN_NOTICE " time out!!!,but no more than 2 task,need not schedule\n"); 91 return; 92 } 93 /* schedule */ 94 95 next = get_next(); 96 prev = my_current_task; 97 printk(KERN_NOTICE " the next task is %d priority is %u\n",next->pid,next->priority); 98 if(next->state == 0)/* -1 unrunnable, 0 runnable, >0 stopped */ 99 {//save current scene 100 /* switch to next process */ 101 asm volatile( 102 "pushl %%ebp\n\t" /* save ebp */ 103 "movl %%esp,%0\n\t" /* save esp */ 104 "movl %2,%%esp\n\t" /* restore esp */ 105 "movl $1f,%1\n\t" /* save eip */ 106 "pushl %3\n\t" 107 "ret\n\t" /* restore eip */ 108 "1:\t" /* next process start here */ 109 "popl %%ebp\n\t" 110 : "=m" (prev->thread.sp),"=m" (prev->thread.ip) 111 : "m" (next->thread.sp),"m" (next->thread.ip) 112 ); 113 my_current_task = next;//switch to the next task 114 printk(KERN_NOTICE " switch from %d process to %d process\n >>>process %d running!!!<<<\n\n",prev->pid,next->pid,next->pid); 115 116 } 117 else 118 { 119 next->state = 0; 120 my_current_task = next; 121 printk(KERN_NOTICE " switch from %d process to %d process\n >>>process %d running!!!<<<\n\n\n",prev->pid,next->pid,next->pid); 122 123 /* switch to new process */ 124 asm volatile( 125 "pushl %%ebp\n\t" /* save ebp */ 126 "movl %%esp,%0\n\t" /* save esp */ 127 "movl %2,%%esp\n\t" /* restore esp */ 128 "movl %2,%%ebp\n\t" /* restore ebp */ 129 "movl $1f,%1\n\t" /* save eip */ 130 "pushl %3\n\t" 131 "ret\n\t" /* restore eip */ 132 : "=m" (prev->thread.sp),"=m" (prev->thread.ip) 133 : "m" (next->thread.sp),"m" (next->thread.ip) 134 ); 135 } 136 return; 137 }//end of my_schedule 138 139 void all_task_print(void) 140 { 141 int i,cnum=62;// 142 printk(KERN_NOTICE "\n current task is:%d all task in OS are:\n",my_current_task->pid); 143 144 printk(" "); 145 for(i=0;i<cnum;i++) 146 printk("-"); 147 printk("\n | process:"); 148 for(i=0;i< MAX_TASK_NUM;i++) 149 printk("| %2d ",i); 150 printk("|\n | priority:"); 151 for(i=0;i<MAX_TASK_NUM;i++) 152 printk("| %2d ",task[i].priority); 153 154 printk("|\n "); 155 for(i=0;i<cnum;i++) 156 printk("-"); 157 printk("\n"); 158 }
extern可置于变量或者函数前,以表示变量或者函数的定义在别的文件中,提示编译器遇到此变量和函数时在其他模块中寻找其定义。另外,extern也可用来进行链接指定。
my_timer_handler(void),时钟中断,中断2000次,看一下my_need_sched,不是1改为1。
get_next(void)根据优先级返回下一个要执行的进程(小)。
my_schedule(void)进程调度。只一个进程,没有进程直接返回,判断进程状态,可执行状态,执行一些内联汇编,新建进程,执行另一些内联汇编。
all_task_print(void)打印所有任务信息。
两个正在运行的进程之间做进程上下文切换
"pushl %%ebp\n\t" //保存当前进程的ebp
“movl %%esp,%0\n\t" //把当前进程的esp赋值到这个0,0就是prev-thread.sp
"movl %2,%%esp\n\t" //把2也就是这个下一个进程的eip,2就是next->thread.sp
"movl $1f,%1\n\t" // $1f是指接下来的标号1:的位置,把eip给保存起来
"pushl %3\n\t" //把下一个进程的eip给push进来,也就是next->thread.ip
"ret\n\t" //return之后下一个进程就开始执行了
"1:\t" //下一个进程从这里开始
计算机硬件的三个法宝,讲到程序存储计算机,函数调用中堆栈和中断,实际上操作系统也有两个非常关键的东西,一个是保存现场和恢复现场和中断对应的中断处理程序,另一个是进程上下文的切换,这段代码就是比较关键的代码 ,这个地方有两种处理,一个是下一个进程为0的时候是正在执行的时候,还有一种情况是这个进程是一个新的进程,还从来没有执行过。