Seccomp-bpf 使用execv时出现 Bad System Call

最近在学习linux内置的沙盒:Seccomp-bpf,资料略少,最后找到了这个网站提供的例子。
这个例子我已实验成功,能够正常阻止禁止的系统调用并结束程序,去掉禁止的系统调用后也能正常运行。
但是当我尝试用execv运行另外一个程序时却失败了,显示Bad System Call
我已经使用syscall-reporter检查了需要允许的System Call,但是仍旧无法运行。

源代码如下:

// main.c
/*
 * seccomp example with syscall reporting
 *
 * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
 * Authors:
 *  Kees Cook <keescook@chromium.org>
 *  Will Drewry <wad@chromium.org>
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <unistd.h>
#include "seccomp-bpf.h"
#include "syscall-reporter.h"

static int install_syscall_filter(void)
{
    struct sock_filter filter[] = {
        /* Validate architecture. */
        VALIDATE_ARCHITECTURE,
        /* Grab the system call number. */
        EXAMINE_SYSCALL,
        /* List allowed syscalls. */
        ALLOW_SYSCALL(rt_sigreturn),
#ifdef __NR_sigreturn
        ALLOW_SYSCALL(sigreturn),
#endif
        ALLOW_SYSCALL(exit_group),
        ALLOW_SYSCALL(exit),
        ALLOW_SYSCALL(read),
        ALLOW_SYSCALL(write),
        ALLOW_SYSCALL(close),
        ALLOW_SYSCALL(fstat),
        ALLOW_SYSCALL(brk),
        ALLOW_SYSCALL(execve),
        KILL_PROCESS,
    };
    struct sock_fprog prog = {
        .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
        .filter = filter,
    };

    if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
        perror("prctl(NO_NEW_PRIVS)");
        goto failed;
    }
    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
        perror("prctl(SECCOMP)");
        goto failed;
    }
    return 0;

failed:
    if (errno == EINVAL)
        fprintf(stderr, "SECCOMP_FILTER is not available. :(\n");
    return 1;
}

int main()
{
    prctl(PR_SET_NO_NEW_PRIVS, 1);
    if (install_syscall_reporter())
        return 1;
    if (install_syscall_filter())
        return 1;
    printf("Hello World!\n");
    execv("./test",NULL);
    printf("Hello World!\n");
    return 0;
}
//test.c
#include <stdio.h>
int main() {
    printf("Hello World!!!\n");
}

以下文件我没有修改,就不放源代码了,直接放下载链接。
seccomp-bpf.h
syscall-reporter.c
syscall-reporter.h

编译指令:

echo "static const char *syscall_names[] = {" > syscall-names.h ;\
        echo "#include <syscall.h>" | cpp -dM | grep '^#define __NR_' | \
                LC_ALL=C sed -r -n -e 's/^\#define[ \t]+__NR_([a-z0-9_]+)[ \t]+([0-9]+)(.*)/ [\2] = "\1",/p' >> syscall-names.h;\
        echo "};" >> syscall-names.h
gcc test.c -o test
gcc main.c syscall-reporter.c -o main

修改过程

# 从最初下载的源代码加上execv函数
ubuntu@ubuntu:~/seccomp$ ./main 
Looks like you also need syscall: brk(12)
# 此处添加系统调用白名单:brk
ubuntu@ubuntu:~/seccomp$ ./main 
Looks like you also need syscall: execv(59)
# 此处添加系统调用白名单:execv
ubuntu@ubuntu:~/seccomp$ ./main 
Hello World!
Bad system call
ubuntu@ubuntu:~/seccomp$ sudo ./main 
Hello World!
Bad system call (core dumped)

上面提供的源代码已经是最后一次修改的版本,运行环境为Ubuntu 16.04

ubuntu@ubuntu:~/seccomp$ uname -a
Linux ubuntu 4.4.0-53-generic #74-Ubuntu SMP Fri Dec 2 15:59:10 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

小弟初学,希望各位神犇不吝赐教,谢谢!

17.08.20补充内容:
main.c调用prctl PR_SET_SECCOMP系统调用大致有以下几种:

execve
brk
access
mmap
open
fstat
close
read
mprotect
arch_prctl
munmap
write
exit_group

于是main.c新允许以下调用,仍然无法运行:

ALLOW_SYSCALL(open),
ALLOW_SYSCALL(close),
ALLOW_SYSCALL(fstat),
ALLOW_SYSCALL(execve),
ALLOW_SYSCALL(mmap),
ALLOW_SYSCALL(mprotect),
ALLOW_SYSCALL(munmap),
ALLOW_SYSCALL(uname),
ALLOW_SYSCALL(arch_prctl),
ALLOW_SYSCALL(brk),
ALLOW_SYSCALL(access),
ALLOW_SYSCALL(readlink),
ALLOW_SYSCALL(sysinfo),
ALLOW_SYSCALL(writev),
ALLOW_SYSCALL(lseek),

使用strace ./main得到以下信息

execve("./main", ["./main"], [/* 23 vars */]) = 0
brk(NULL)                               = 0x706000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb318b15000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=23771, ...}) = 0
mmap(NULL, 23771, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fb318b0f000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\t\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1864888, ...}) = 0
mmap(NULL, 3967488, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb318529000
mprotect(0x7fb3186e9000, 2093056, PROT_NONE) = 0
mmap(0x7fb3188e8000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x7fb3188e8000
mmap(0x7fb3188ee000, 14848, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb3188ee000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb318b0e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb318b0d000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb318b0c000
arch_prctl(ARCH_SET_FS, 0x7fb318b0d700) = 0
mprotect(0x7fb3188e8000, 16384, PROT_READ) = 0
mprotect(0x601000, 4096, PROT_READ)     = 0
mprotect(0x7fb318b17000, 4096, PROT_READ) = 0
munmap(0x7fb318b0f000, 23771)           = 0
prctl(PR_SET_NO_NEW_PRIVS, 1, 0x7ffea2134838, 0, 0x400e80) = -1 EINVAL (Invalid argument)
rt_sigaction(SIGSYS, {0x400bf2, [], SA_RESTORER|SA_SIGINFO, 0x7fb31855e4a0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [SYS], NULL, 8) = 0
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)  = 0
prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, {len = 23, filter = 0x7ffea2134670}) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
brk(NULL)                               = 0x706000
brk(0x727000)                           = 0x727000
write(1, "Hello World!\n", 13Hello World!
)          = 13
execve("./test", NULL, [/* 23 vars */]) = 0
brk(NULL)                               = 0x21c7000
syscall_18446744073709551615(0x7f42edeb7b8a, 0, 0x7f42ee0bde28, 0x10, 0x7fff191037f8, 0x7fff19103040) = 0x15
--- SIGSYS {si_signo=SIGSYS, si_code=SYS_SECCOMP, si_call_addr=0x7f42edeb31b7, si_syscall=__NR_access, si_arch=AUDIT_ARCH_X86_64} ---
+++ killed by SIGSYS (core dumped) +++
Bad system call (core dumped)

使用strace ./test得到以下信息

execve("./test", ["./test"], [/* 23 vars */]) = 0
brk(NULL)                               = 0x115f000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4b1a556000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=23771, ...}) = 0
mmap(NULL, 23771, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4b1a550000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\t\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1864888, ...}) = 0
mmap(NULL, 3967488, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4b19f6a000
mprotect(0x7f4b1a12a000, 2093056, PROT_NONE) = 0
mmap(0x7f4b1a329000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bf000) = 0x7f4b1a329000
mmap(0x7f4b1a32f000, 14848, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4b1a32f000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4b1a54f000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4b1a54e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4b1a54d000
arch_prctl(ARCH_SET_FS, 0x7f4b1a54e700) = 0
mprotect(0x7f4b1a329000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ)     = 0
mprotect(0x7f4b1a558000, 4096, PROT_READ) = 0
munmap(0x7f4b1a550000, 23771)           = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
brk(NULL)                               = 0x115f000
brk(0x1180000)                          = 0x1180000
write(1, "Hello World!!!\n", 15Hello World!!!
)        = 15
exit_group(0)                           = ?
阅读 11.9k
1 个回答

我知道了。你允许的系统调用太少,根本不够执行你的 test 程序。(seccomp filter 是会被 execve 继承的。)

你可以 strace ./test 看看执行 test 的过程中都用了哪些系统调用。

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题