实现一个Linux系统调用，返回相关进程信息

记录操作系统课程实验，实验要求：新增一个系统调用，输出给定进程的父子进程相关信息，信息包括：进程号（pid），状态，运行时间，父进程以及第一个子进程pid。该系统调用有三个参数：给定进程pid，存储这些数据的缓冲，缓冲大小。

一、前期准备
安装VMware和Linux虚拟机的过程网上很多方法，在此不赘述，本人使用的是VMware15pro和ubuntu18.04，这里有一点需要注意的是给虚拟机分配大一点的磁盘空间用于内核编译，初始的20G不够用，本人分配60G可顺利完成。到Linux内核官网下载最新稳定版的内核代码（本人用的是Linux-5.5.7），将内核压缩包解压到/usr/src/目录下。然后安装好内核编译的一些依赖：

sudo apt-get install libncurses5-dev libssl-dev build-essential openssl bision flex

有可能有的虚拟机gcc之类的没有安装，看缺什么补什么

二、实验过程
实验的基本想法是在用户空间开辟一个缓存空间传到内核空间，内核根据进程号获取进程信息，将信息存放到内核缓冲空间，然后将内核空间信息传回用户空间。

首先内核如何根据进程号查询进程的信息参考了部分“咸鱼的自留地”的代码，修改为系统调用能使用的代码格式，其核心思想如下

struct pid *ppid;   //定义pid结构指针
struct task_struct *p;  //定义进程控制块指针
struct task_struct *pos;

ppid = find_get_pid(_pid);  //_pid是用户空间传入的进程号，该函数根据进程号找到对应pid struct
p = pid_task(ppid, PIDTYPE_PID);    //根据pid struct找到对应进程控制块

//接下来就可以使用进程控制块指针输出进程信息
printk(KERN_INFO"%s", p->comm);     
//printk是内核信息输出函数，comm是进程名称，类似的p->pid是进程号,p->state是进程状态,p->utime是进程用户运行时间,p->stime是进程内核运行时间

//获取父进程信息很简单，加个real_parent指针即可
printk(KERN_INFO"%s%s", p->real_parent->comm, p->real_parent->pid);

//获取子进程需要遍历进程队列，首先找到第一个子进程，再便利它的兄弟进程即可，将子进程指针赋给pos
list_for_each_entry(pos, &(p->children), sibling)
{
    printk(KERN_INFO"%s%s", pos->comm, pos->pid);
}

有了这些核心信息，就能开始写系统调用的c文件了，首先cd /usr/src/linux-5.5.7/kernel/目录下，新建一个show_process_family.c的文件，代码如下

// show_process_family.c
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/pid.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
#include <linux/slab.h>

#include <uapi/linux/show_process_family.h> //定义了struct pro_info_struct结构体存放进程信息

//SYSCALL_DEFINE3是一个宏定义，第一个参数是函数名称，3指的是除了函数名称外后面接3个<类型,参数名>的参数对。参数里面来自用户空间的指针需要加上__user标识
SYSCALL_DEFINE3(show_process_family, pid_t, _pid, struct proc_info_struct __user *, buffer, int __user *, len)
{
    long copied;
    struct proc_info_struct *k_buf;     //定义存放进程信息的内核缓冲
    int k_len;      //定义内核缓冲长度

    if(len < 0 || buffer == NULL) return -EINVAL;

    copied = copy_from_user(&k_len, len, sizeof(int));  //复制用户空间缓冲长度
    if(copied != 0) return -EFAULT;

    k_buf = kcalloc(k_len, sizeof(struct proc_info_struct), GFP_KERNEL);    //根据缓冲大小分配内核缓冲空间
    if(k_buf == NULL) return -ENOMEM;

    copied = copy_from_user(k_buf, buffer, sizeof(struct proc_info_struct) * k_len);    //复制用户空间缓冲信息
    if(copied != 0) return -EFAULT;

    struct pid *ppid;
    struct task_struct *p;
    struct task_struct *pos;
    char *ptype[3] = {"[I]", "[P]", "[C]"}; //预定义三种进程类型标识，Itself，Parent，Children

    // 通过进程的PID号pid一步步找到进程的进程控制块p
    ppid = find_get_pid(_pid);
   
    if (ppid == NULL)
    {
        printk("[ShowProcessFamily] Error, PID not exists.\n");
        return -1;
    }
    p = pid_task(ppid, PIDTYPE_PID);

    // 格式化输出表头
    printk(KERN_INFO"[I]:process itself [P]:parent process [C]:children process\n");
    printk(KERN_INFO"%-6s%-20s%-6s%-6s%-20s\n", "Type", "Name", "PID", "State", "Running_time");
    printk(KERN_INFO"------------------------------------------\n");

    int buf_idx = 0;

    // Itself
    // 打印自身信息
    unsigned rt_i = (p->utime + p->stime)/1000000;  //将utime和stime相加作为进程运行时间，除以1000000转换为秒单位
    printk(KERN_INFO"%-6s%-20s%-6d%-6ld%us\n", ptype[0], p->comm, p->pid, p->state, rt_i);
    strncpy(k_buf[buf_idx].type, ptype[0], 6);  //以下将信息复制到内核缓冲
    strncpy(k_buf[buf_idx].name, p->comm, 20);
    k_buf[buf_idx].pid = p->pid;
    k_buf[buf_idx].state = p->state;
    k_buf[buf_idx].running_time = rt_i;
    buf_idx++;

    // Parent
    // 打印父进程信息
    unsigned rt_p = (p->real_parent->utime + p->real_parent->stime)/1000000;
    printk(KERN_INFO"%-6s%-20s%-6d%-6ld%us\n", ptype[1], p->real_parent->comm, p->real_parent->pid, p->real_parent->state, rt_p);
    strncpy(k_buf[buf_idx].type, ptype[1], 6);
    strncpy(k_buf[buf_idx].name, p->real_parent->comm, 20);
    k_buf[buf_idx].pid = p->real_parent->pid;
    k_buf[buf_idx].state = p->real_parent->state;
    k_buf[buf_idx].running_time = rt_p;
    buf_idx++;

    // Children
    // 遍历”我“的子进程，输出信息
    list_for_each_entry(pos, &(p->children), sibling)
    {
        unsigned rt_c = (pos->utime + pos->stime)/1000000;
        printk(KERN_INFO"%-6s%-20s%-6d%-6ld%us\n", ptype[2], pos->comm, pos->pid, pos->state, rt_c);
        strncpy(k_buf[buf_idx].type, ptype[2], 6);
        strncpy(k_buf[buf_idx].name, pos->comm, 20);
        k_buf[buf_idx].pid = pos->pid;
        k_buf[buf_idx].state = pos->state;
        k_buf[buf_idx].running_time = rt_c;
        buf_idx++;
    }
   
    copied = copy_to_user(buffer, k_buf, sizeof(struct proc_info_struct) * k_len);  //将内核缓冲中的信息复制到用户空间缓冲区
    if(copied != 0) return -EFAULT;

    kfree(k_buf);  //释放内核缓冲区
   
    return 0;
}

上面show_process_family的函数中借鉴了jervisfm的想法，使用了三个参数，分别为pid_t _pid, struct proc_info_struct __user * buffer, int __user len。其中利用proc_info_struct结构体来存放进程信息，该结构体在show_process_family.h中定义。cd /usr/src/linux-5.5.7/include/uapi/linux/目录下，新建show_process_family.h文件，内容如下

#ifndef _SHOW_PROCESS_FAMILY_H_
#define _SHOW_PROCESS_FAMILY_H_

struct proc_info_struct
{
    char type[6];   //父子进程标识
    char name[20];  //进程名称
    pid_t pid;      //进程号
    long state;     //进程状态
    unsigned running_time;  //进程运行时间
};

#endif

至于为什么要放在/include/uapi/linux/目录下倒是吃了不少苦头才找到的资料，这里参考了stackoverflow的一个问题。该结构体要在内核中使用，首先要对内核可见，而在c文件里，该结构体也出现在用户空间的传入参数里面，因此也要对用户空间可见。要实现这一点就必须将结构体定义放入该目录下，内核才会为用户空间提供一个可见的api。
而仅仅放在该目录下还不行，还需要修改Kbuild文件，cd /usr/src/linux-5.5.7/include/uapi/修改该目录下的Kbuild文件（没有就新建一个），添加一行，如图

header-y += linux/show_process_family.h

之后在编译内核的时候加上一句make headers_install INSTALL_HDR_PATH=/usr就能将该头文件添加到/usr/include/linux这个用户可引用的目录下。

回到我们的系统调用部分，现在完成了c文件和结构体，但系统调用的流程还没有配置完。cd /usr/src/linux-5.5.7/include/linux/目录，修改该目录下的syscall.h文件。在文件的末尾添加一句

asmlinkage long sys_show_process_family(pid_t _pid, struct proc_info_struct __user * buffer, int __user * len);

asmlinkage告诉编译器在CPU栈中寻找系统调用函数参数，相当于一个系统调用的声明，这里面long是常用的返回类型不用管，函数名前面要加上sys_。由于此处也使用了proc_info_struct，因此在该文件前面头文件上也要增加该结构体的头文件，如图

接下来为我们自定义的系统调用定义调用号，cd /usr/src/linux-5.5.7/arch/x86/entry/syscall/修改该目录下的syscall_64.tbl文件，在64位系统调用末尾添加一行，如图

436 common show_process_family __64_sys_show_process_family

436是顺延的系统调用号，common照写，后面是系统调用函数名，再后面是函数名前面加上__x64_sys_。

到此基本完成新建系统调用的工作，还有一点收尾工作是要将编译时c文件生成的动态库链接到内核编译过程中，因此cd /usr/src/linux-5.5.7/kernel/也就是c文件所在目录下，修改Makefile文件，找到obj-y处，在后面加上show_process_family.o，如图

好了，接下来便可启动内核编译工作了，cd到内核代码根目录，cd /usr/src/linux-5.5.7/，输入

sudo make menuconfig

调出编译内核配置界面，有兴趣可以了解，也可以不调配置使用默认的，这里直接选择Exit然后ok保存默认配置就好

接下来输入

sudo make -j4
sudo make headers_install INSTALL_HDR_PATH=/usr   //输入一次即可，再次编译内核可不加
sudo make modules_install -j4
sudo make install -j4

完成内核编译流程，编译完后重启虚拟机输入uname -r可看到内核版本已更改。此外这里的4指的是CPU的核心数，看自己电脑来改，越大越快，本人第一次编译经过了漫长的两个半小时。后面再改动内核代码编译的话就不需要make menuconfig了，直接输入上面的编译指令即可，后面的编译过程本人大概每次编译要半小时。

三、验证系统调用
在任意目录新建一个test.c文件，代码如下

#include <stdio.h>
#include <stdlib.h>
#include <linux/kernel.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <linux/show_process_family.h>  //此头文件便是make headers_install后将内核中的结构体提供一份头文件接口到/usr/include/linux/目录下

void print_buffer(struct proc_info_struct *buffer, const int len);

int main(){
    int pid;    //定义进程号pid
    struct proc_info_struct *buffer;    //定义用户空间进程信息缓冲
    int len;    //定义缓冲长度

    printf("Input <pid> <buffer_len>: ");
    scanf("%d%d", &pid, &len);
    printf("\n");

    buffer = calloc(len, sizeof(struct proc_info_struct));  //按缓冲大小分配缓冲空间
    if(buffer == NULL){
        printf("Could not allocate buffer to store processes infomation\n");
        exit(-1);
    }

    int state = syscall(436, pid, buffer, &len);    //调用系统调用
    print_buffer(buffer, len);  //输出缓冲内容，即进程信息
    return 0;
}

void print_buffer(struct proc_info_struct *buffer, const int len){
    printf("[I]:process itself [P]:parent process [C]:children process\n");
    printf("%-6s%-20s%-6s%-6s%-20s\n", "Type", "Name", "PID", "State", "Running_time");
    printf("---------------------------------------------------\n");
    for(int i = 0; i < len; ++i){
        if(buffer[i].pid != 0){     //空白缓冲区部分不输出
            printf("%-6s%-20s%-6d%-6ld%us\n", buffer[i].type, buffer[i].name, buffer[i].pid, buffer[i].state, buffer[i].running_time);
        }
    }
}

进程树以及进程号可以输入