《Linux系统编程训练营》1-1_初探 Linux 系统调度

进程调度策略分析

已知：父进程创建子进程后，父子进程同时运行(应用程序从可执行文件变成进程的过程)
问题：如果计算机只有一个处理器，父子进程以什么方式执行（拿到了处理器的时间片资源，执行指令）？

Linux 系统调度

内核具有进程调度的能力，多个进程可同时运行
微观上，处理器同一时间只能执行一个进程
同时运行多个进程时，每个进程都会获得适当的执行时间片
当执行时间片用完，内核调度下一个进程执行

进程调度原理

n 个进程（n >= 2）同时位于内存中
处理器执行每个进程，每个进程拥有一个时间片
时间片用完，通过时钟中断完成进程切换（调度）

void Schedule()
{
    gCTaskAddr = &gTaskBuff[index % 4];

    index ++;

    PrepareForRun(gCTaskAddr);

    LoadTask(gCTaskAddr);
}

Linux 系统调度策略

普通调度策略
- SCHED_OTHER, Linux 默认的调度策略，也被称为CFS (Completely Fair Scheduler), 给每个进程动态计算优先级，根据优先级和进程执行的历史记录来确定下一个执行的进程
实时调度策略
- SCHED_FIFO, 基于优先级顺序调度进程，并在一个进程获得 CPU 时一直执行，直到进程主动释放
- SCHED_RR, 基于“时间片轮转”的调度策略，给每个进程设置一个固定的时间片，并按照优先级顺序对进程进行轮流调度

如何验证 Linux 中的进程调度？

实验目标
- 验证同一时刻只有一个进程在执行
- 验证不同调度策略，进程执行的连续性不同
实验设计：
- n 个进程同时运行，统计各个进程的执行时刻
- 记录运行方式
  - 每个 slice 时间记录如下值：进程编号，当前时间值，完成度
  - 在 total 时间后运行结束，并输出记录的数据
  - 通过记录的数据分析进程调度策略

进程调度实验设计

实验中需要解决的问题

如何让进程每次 “固定” 工作 slice 时间（单位毫秒）？
如何获取和改变进程的调度策略？
如何记录数据并输出数据（需要保存数据）？
如何图形化显示数据？

Linux 中的时间获取

#include <time.h>

struct timespec {
    time_t tv_sec;    /* seconds */
    long tv_nsec;    /* nanoseconds */
};

int clock_gettime(clockid_t clk_id, struct timespec *tp);

clk_id ：
    CLOCK_MONOTONIC : 系统启动后到当前的时间
    CLOCK_REAKTIME  : 1970.0.0 到当前的时间

“固定”时间工作量估算

#define NLOOP_FOR_ESTIMATION 1000000000UL
#define NSECS_PER_MSEC 1000000UL
#define NSECS_PER_SEC 1000000000UL

#define DiffNS(begin, end) ((end.tv_sec - begin.tv_sec) * NSECS_PER_SEC \
                            + (end.tv_nsec - begin.tv_nsec))
    
static unsigned long g_load_per_slice;
static struct timespec g_time_begin;

static unsigned long estimate_loops_per_msec()
{
    struct timespec begin = {0};
    struct timespec end = {0};
    unsigned long i = 0;
        
    clock_gettime(CLOCK_MONOTONIC, &begin);
 
    while( i < NLOOP_FOR_ESTIMATION ) i++;

    clock_gettime(CLOCK_MONOTONIC, &end);

    return  NLOOP_FOR_ESTIMATION * NSECS_PER_MSEC / DiffNS(begin, end);
}

获取 / 改变进程调度策略

#include <sched.h>

struct sched_param {
    // ...
    int sched_priority;
    // ...
};

int sched_setscheduler(pid_t pid, int polocy, const struct sched_param *param);
int sched_getscheduler(pid_t pid);

chrt 命令简介
- Linux 系统中可以使用 chrt 命令来查看、设置一个进程的优先级和调度策略
命令用法
- chrt [option] [prio] [[pid] | command [arg]...]
主要参数
- -p, --pid 操作一个已存在的 PID, 不启动一个新的任务
- -f, --fifo 设置调度策略未 SCHED_FIFO
- -m, --max 显示最小和最大有效优先级，然后退出
- -o, --other 设置策略调度策略为 SCHED_OTHER
- -r, --r 设置调度策略为 SCHED_RR

示例：

# 指定目的进程的 PID 来更改调度策略
chrt -p -r 99 1328

# 更改 bash 为实时进程
chrt -f 10 bash

记录进程运行后产生的数据

fd = open(buf, O_WRONLY | O_CREATE | O_TRUNC);

if (fd == -1) {
    for (i=0; i<nrecord; ++i) {
        sprintf(buf, "%d\t %ld\t%d\n",
                        id,
                        DiffNS(g_time_begin, tss[i]) / NSECS_PER_MSEC,
                        (i + 1) * 100 / nrecord);
    }
}

close(fd);

图形化数据显示与分析

进程调度实验分析

编程实验

#include <sys/types.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <time.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sched.h>
#include <fcntl.h>
 
#define NLOOP_FOR_ESTIMATION 1000000000UL
#define NSECS_PER_MSEC 1000000UL
#define NSECS_PER_SEC 1000000000UL

#define DiffNS(begin, end) ((end.tv_sec - begin.tv_sec) * NSECS_PER_SEC \
                            + (end.tv_nsec - begin.tv_nsec))
    
static unsigned long g_load_per_slice;
static struct timespec g_time_begin;

static unsigned long estimate_loops_per_msec()
{
    struct timespec begin = {0};
    struct timespec end = {0};
    unsigned long i = 0;
        
    clock_gettime(CLOCK_MONOTONIC, &begin);
 
    while( i < NLOOP_FOR_ESTIMATION ) i++;

    clock_gettime(CLOCK_MONOTONIC, &end);

    return  NLOOP_FOR_ESTIMATION * NSECS_PER_MSEC / DiffNS(begin, end);
}
 
static inline void work()
{
    unsigned long i = 0;
    
    while( i < g_load_per_slice ) i++;
}

static void test(int id, struct timespec* tss, int nrecord)
{
    struct timespec ts = {0};
    char buf[128] = {0};
    int fd = -1;
    int i = 0;
        
    for(i=0; i<nrecord; i++) {
        work();
        clock_gettime(CLOCK_MONOTONIC, &ts);
        tss[i] = ts;
    }
    
    sprintf(buf, "./%d-proc.log", id);
    
    printf("%s\n", buf);
    
    fd = open(buf, O_WRONLY|O_CREAT|O_TRUNC);
    
    if( fd != -1 ) {
        for(i=0; i<nrecord; i++) {
            sprintf(buf, "%d\t%ld\t%d\n", 
                          id, 
                          DiffNS(g_time_begin, tss[i]) / NSECS_PER_MSEC, 
                          (i + 1) * 100 / nrecord);
                          
            write(fd, buf, strlen(buf));
        }
    }
    
    close(fd);
}

int main(int argc, char *argv[])
{
    int nproc = atoi(argv[1]);   // 需要创建的进程数量
    int total = atoi(argv[2]);     // 每个创建出来的进程需要执行的总时间
    int slice = atoi(argv[3]);   // 进程执行的时间片时间
    int nrecord = total / slice; // 计算需要记录的系统时间次数
    struct timespec* logbuf = malloc(nrecord * sizeof(*logbuf));
    pid_t* pids = malloc(nproc * sizeof(*pids));
    
    total = total / slice * slice;
    
    if( logbuf && pids ) {
        int i = 0;
        int n = 0;
        
        printf("nproc = %d\n", nproc);
        printf("total = %d\n", total);
        printf("slice = %d\n", slice);
        
        printf("SCHED_OTHER = %d\n", SCHED_OTHER);
        printf("SCHED_FIFO = %d\n", SCHED_FIFO);
        printf("SCHED_RR = %d\n", SCHED_RR);
        
        printf("estimating the workload for one slice...\n");
        
        g_load_per_slice = estimate_loops_per_msec() * slice;  // 计算一个时间片内产生的计算次数
        
        printf("g_load_per_slice = %lu\n", g_load_per_slice);
        
        clock_gettime(CLOCK_MONOTONIC, &g_time_begin);
        
        for(i=0; i<nproc; i++) {
            pids[i] = fork();
            
            if( pids[i] < 0 ) {
                int j = 0;
                
                while( j < n ) {
                    kill(pids[j++], SIGKILL);
                } 
                
                printf("process create error...\n");
                
                break;
            }
            else if( pids[i] == 0 ) {      
                int sched = sched_getscheduler(0); 
                int pri = getpriority(PRIO_PROCESS, 0);
                
                printf("task %d ==> schedule policy: %d\n", i, sched);
                printf("task %d ==> schedule priority: %d\n", i, pri);
                
                test(i, logbuf, nrecord);
                exit(0);
            }
            else {
                n++;
            }
        }
        
        for(i=0; i<n; i++) {
            wait(NULL);
        }
    }
    
    free(logbuf);
    free(pids);
    
    return 0;
}

进程调度数据收集

关于 taskset

helloworld.c

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>

int main(void)
{
    printf("child = %d, ppid = %d, pgid = %d\n", getpid(), getppid(), getpgrp());
    
    printf("hello world\n");
    
    while(1) sleep(1);
    
    return 0;
}

输出：

wu_tiansong@ubuntu-server:~/test$ gcc helloworld.c -o helloworld.out
wu_tiansong@ubuntu-server:~/test$ ./helloworld.out &
[1] 442336
child = 442336, ppid = 438452, pgid = 442336
hello world
wu_tiansong@ubuntu-server:~/test$ taskset -p 442336
pid 442336's current affinity mask: f  // 当前进程可以在 1111 四颗处理器中的任意一颗调度执行

wu_tiansong@ubuntu-server:~/test$ taskset -c 0 ./helloworld.out &  // 指定进程在 0 号处理器中执行
[1] 445202
child = 445202, ppid = 438452, pgid = 445202
hello world

wu_tiansong@ubuntu-server:~/test$ taskset -p 445202
pid 445202's current affinity mask: 1  // 0001， 仅能被 0 号处理器调度

调度实验数据分析

proc-graph.py

import sys
import numpy as np
import matplotlib.pyplot as plt

flag = sys.argv[1]
nproc = int(sys.argv[2])

data = []
colors = []

for fid in range(0, nproc) :
    fname = str(fid) + '-proc.log'
    fd = open(fname, 'r')
    lines = fd.readlines()

    for s in lines :
        s = s.strip().split('\t')
        data.append([int(s[0]), int(s[1]), int(s[2])])

    fd.close()

    color = '#'

    for c in np.random.randint(0, 255, 3) :
        color += format(c, '02X')

    colors.append(color)

x_value = []
y_value = []
c_value = []

if flag == 'id-time' :
    for d in data :
        y_value.append(d[0])
        x_value.append(d[1])
        c_value.append(colors[d[0]])

    plt.scatter(x_value, y_value, c=c_value)
    plt.title("Data Analysis")
    plt.ylabel("Process ID")
    plt.xlabel("Time(ms)")
    plt.show()

if flag == 'work-time' :
    for d in data :
        y_value.append(d[2])
        x_value.append(d[1])
        c_value.append(colors[d[0]])

    plt.scatter(x_value, y_value, c=c_value)
    plt.title("Data Analysis")
    plt.ylabel("Work Load")
    plt.xlabel("Time(ms)")
    plt.show()

第一次输出，SCHED_OTHER 调度：

tiansong@tiansong:~/Desktop$ chrt -p $$
pid 2632's current scheduling policy: SCHED_OTHER
pid 2632's current scheduling priority: 0

tiansong@tiansong:~/Desktop$ taskset -c 0 ./a.out 3 1000 5
nproc = 3
total = 1000
slice = 5
SCHED_OTHER = 0
SCHED_FIFO = 1
SCHED_RR = 2
estimating the workload for one slice...
g_load_per_slice = 2566015
task 2 ==> schedule policy: 0
task 2 ==> schedule priority: 0
task 1 ==> schedule policy: 0
task 1 ==> schedule priority: 0
task 0 ==> schedule policy: 0
task 0 ==> schedule priority: 0
./2-proc.log
./0-proc.log
./1-proc.log

tiansong@tiansong:~/Desktop$ python proc-graph.py id-time 3 
tiansong@tiansong:~/Desktop$ python proc-graph.py work-time 3

进程 ID 与时间

完成度与时间

第二次输出，SCHED_FIFO 调度：

tiansong@tiansong:~/Desktop$ sudo chrt -f 1 bash

tiansong@tiansong:~/Desktop$ chrt -p $$
pid 2632's current scheduling policy: SCHED_FIFO
pid 2632's current scheduling priority: 1

tiansong@tiansong:~/Desktop$ taskset -c 0 ./a.out 3 1000 5

tiansong@tiansong:~/Desktop$ python proc-graph.py id-time 3 
tiansong@tiansong:~/Desktop$ python proc-graph.py work-time 3

进程 ID 与时间
完成度与时间

第三次输出，SCHED_RR 调度：

tiansong@tiansong:~/Desktop$ sudo chrt -r 1 bash

tiansong@tiansong:~/Desktop$ chrt -p $$
pid 2632's current scheduling policy: SCHED_RR
pid 2632's current scheduling priority: 1

tiansong@tiansong:~/Desktop$ taskset -c 0 ./a.out 3 1000 5

tiansong@tiansong:~/Desktop$ python proc-graph.py id-time 3 
tiansong@tiansong:~/Desktop$ python proc-graph.py work-time 3

进程 ID 与时间
完成度与时间

《Linux系统编程训练营》1-1_初探 Linux 系统调度

进程调度策略分析

Linux 系统调度

进程调度原理

Linux 系统调度策略

如何验证 Linux 中的进程调度？

进程调度实验设计

实验中需要解决的问题

Linux 中的时间获取

“固定”时间工作量估算

获取 / 改变进程调度策略

记录进程运行后产生的数据

图形化数据显示与分析

进程调度实验分析

编程实验

进程调度数据收集

关于 taskset

调度实验数据分析

TianSong

引用和评论

《Linux系统编程训练营》1-2_多核调度预备知识

OpenInfra 基金会董事会宣布加入 Linux 基金会意向，增强开源全球影响力

rocky linux 使用记录

linux替换原有java

发现一款出色的通用主机监控系统【WGCLOUD】免费

WGCLOUD支持在信创系统部署使用吗

资产盘点系统 WGFIX v1.1 更新特性详解

《Linux系统编程训练营》1-1_初探 Linux 系统调度

进程调度策略分析

Linux 系统调度

进程调度原理

Linux 系统调度策略

如何验证 Linux 中的进程调度？

进程调度实验设计

实验中需要解决的问题

Linux 中的时间获取

“固定”时间工作量估算

获取 / 改变 进程调度策略

记录进程运行后产生的数据

图形化数据显示与分析

进程调度实验分析

编程实验

进程调度数据收集

关于 taskset

调度实验数据分析

TianSong

引用和评论

《Linux系统编程训练营》1-2_多核调度预备知识

OpenInfra 基金会董事会宣布加入 Linux 基金会意向，增强开源全球影响力

rocky linux 使用记录

linux替换原有java

发现一款出色的通用主机监控系统 【WGCLOUD】免费

WGCLOUD支持在信创系统部署使用吗

资产盘点系统 WGFIX v1.1 更新特性详解

获取 / 改变进程调度策略

发现一款出色的通用主机监控系统【WGCLOUD】免费