KRF框架原理深入剖析

KRF框架原理深入剖析

1.KRF故障注入实战演练

需要的依赖:

1
sudo apt install gcc make libelf-dev ruby linux-headers-$(uname -r)

构建步骤:

1
2
3
4
git clone https://github.com/trailofbits/krf && cd krf
make -j$(nproc)
sudo make install # Installs module to /lib/modules and utils to /usr/local/bin
sudo make insmod # Loads module

实战演练:

image-20240212150148360

2.KRF总体架构设计

image-20240212150157274

3.KRF框架构建过程

3.1根目录Makefile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
export CFLAGS := -std=gnu99 -Wall -Werror -pedantic
export PLATFORM := $(shell uname -s | tr '[:upper:]' '[:lower:]')

CLANG_FORMAT := clang-format
ALL_SRCS := $(shell find . -type f \( -name '*.c' -o -name '*.h' \))
PREFIX = /usr/local

all: module krfexec krfctl krfmesg example

.PHONY: module
module:
$(MAKE) -C src/module/$(PLATFORM) module

.PHONY: krfexec
krfexec:
$(MAKE) -C src/krfexec

.PHONY: krfctl
krfctl:
$(MAKE) -C src/krfctl

.PHONY: krfmesg
krfmesg:
$(MAKE) -C src/krfmesg

.PHONY: insmod
insmod:
$(MAKE) -C src/module/$(PLATFORM) insmod

.PHONY: rmmod
rmmod:
$(MAKE) -C src/module/$(PLATFORM) rmmod

.PHONY: example
example:
$(MAKE) -C example

.PHONY: clean
clean:
$(MAKE) -C src/module/$(PLATFORM) clean
$(MAKE) -C src/krfexec clean
$(MAKE) -C src/krfctl clean
$(MAKE) -C example clean

.PHONY: fmt
fmt:
$(CLANG_FORMAT) -i -style=file $(ALL_SRCS)

.PHONY: install-module
install-module: module
$(MAKE) -C src/module/$(PLATFORM) install

.PHONY: install-utils
install-utils: krfexec krfctl krfmesg
install -d $(DESTDIR)$(PREFIX)/bin
install src/krfexec/krfexec $(DESTDIR)$(PREFIX)/bin
install src/krfctl/krfctl $(DESTDIR)$(PREFIX)/bin
install src/krfmesg/krfmesg $(DESTDIR)$(PREFIX)/bin

.PHONY: install
install: install-module install-utils

3.2krfx核心模块构建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# krfx核心模块名称
MOD := krfx

# Ignore this insanity: we need to do some pathname rewriting
# thanks to the subprocess make that actually does the building.
KRF_SYSCALL_SRCS_FAKE := $(notdir $(wildcard $M/syscalls/*.c))
KRF_SYSCALL_OBJS_FAKE := $(KRF_SYSCALL_SRCS_FAKE:.c=.o)
KRF_SYSCALL_OBJS = $(foreach obj,$(KRF_SYSCALL_OBJS_FAKE),syscalls/$(obj))

# 系统调用的YML文件列表
KRF_SYSCALL_YMLS = $(wildcard ../codegen/linux/*.yml)
ccflags-y := -DKRF_CODEGEN=1 -DLINUX -std=gnu99 -Wno-declaration-after-statement
obj-m += $(MOD).o
$(MOD)-objs := krf.o syscalls.o netlink.o ../krf.o ../config.o $(KRF_SYSCALL_OBJS)

.PHONY: all
all: module

.PHONY: module
module: ../codegen/linux/.linux.mk
$(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules

.PHONY: codegen
codegen: ../codegen/linux/.linux.mk

# 根据系统调用如read.yml文件生成对应的krf_sys_internal_read函数
../codegen/linux/.linux.mk: ../codegen/linux/codegen $(KRF_SYSCALL_YMLS)
ruby ../codegen/linux/codegen $(FAULTS)
@touch ../codegen/linux/.linux.mk

clean:
$(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) clean
rm -f ../*.o *.ur-safe ../*.ur-safe # some garbage not cleaned by the kernel's clean target
rm -f *.gen.x *.gen.h */*.gen.h */*.gen.c ../codegen/linux/.linux.mk # codegen files

.PHONY: install
install: $(MOD).ko
sudo $(MAKE) -C /lib/modules/$(shell uname -r)/build M=$(shell pwd) modules_install
sudo depmod -a

.PHONY: insmod
insmod: $(MOD).ko
sudo insmod $(MOD).ko

.PHONY: rmmod
rmmod:
sudo rmmod $(MOD)

image-20240212150549412

image-20240212150836239

image-20240212151337254

3.3krfctl子模块构建

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
PROG := krfctl
SRCS := $(PROG).c table.gen.c profiles.gen.c $(wildcard ./$(PLATFORM)/*.c)
OBJS := $(SRCS:.c=.o)
YMLS = $(wildcard ../module/codegen/$(PLATFORM)/*.yml)

.PHONY: all
all: $(PROG)

# 生成系统调用名称->系统调用号的映射
table.gen.c: gentable
ruby gentable

# 生成系统调用类型->系统调用列表的映射
profiles.gen.c: genprofiles $(YMLS)
ruby genprofiles

$(OBJS): $(SRCS)

$(PROG): $(OBJS)

.PHONY: clean
clean:
rm -f $(PROG) $(OBJS)
rm -f *.gen.c # gentable/genprofiles files

image-20240212151934265

image-20240212152054112

3.4其他子模块构建

image-20240212152130110

4.KRF框架源码剖析

4.1krfx内核模块源码剖析

首先,看一看头文件定义的信息,其中定义了故障注入目录名称krf及其具体的配置文件名称如rng_stateprobabilitycontrollog_faultstargeting(后面会利用file_operations将配置项文件读写操作绑定到这些内核模块的属性上),故障注入的筛选模式结构体包括了personalitypiduidgidinode等。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/* Strings used to generate procfs filenames and sysctl strings */
#define KRF_PROC_DIR "krf"
#define KRF_RNG_STATE_FILENAME "rng_state"
#define KRF_PROBABILITY_FILENAME "probability"
#define KRF_CONTROL_FILENAME "control"
#define KRF_LOG_FAULTS_FILENAME "log_faults"
#define KRF_TARGETING_FILENAME "targeting"

/* Targeting modes */
/* 筛选模式条件的枚举类 */
typedef enum {
KRF_T_MODE_PERSONALITY = 0,
KRF_T_MODE_PID,
KRF_T_MODE_UID,
KRF_T_MODE_GID,
KRF_T_MODE_INODE,
// Insert new modes here
// KRF_T_NUM_MODES是可选筛选模式的上限,也表示筛选模式的数目
KRF_T_NUM_MODES
} krf_target_mode_t;

/* Netlink Defines */
/* Protocol family, consistent in both kernel prog and user prog. */
#define NETLINK_KRF 28
/* Multicast group, consistent in both kernel prog and user prog. */
#define NETLINK_MYGROUP 28

/* All of our options are unsigned ints,
* so 32 bytes should be more than enough for their string reps
* plus a trailing newline.
*/
#define KRF_PROCFS_MAX_SIZE 255

// 可选筛选模式的最大数目,考虑mode_mask只有32位且最后1位是边界位[KRF_T_NUM_MODES],因此[KRF_T_MODE_MAX]=32-1=31
#define KRF_T_MODE_MAX 31
#define KRF_T_MODE_MAX_MASK (1 << KRF_T_MODE_MAX)

// 极限情况:[0~30]位是有效模式位,31位是KRF_T_NUM_MODES位
_Static_assert(((KRF_T_NUM_MODES) <= (KRF_T_MODE_MAX)), "Too many modes");

// 筛选项结构体,包含有效筛选模式和对应的筛选值
typedef struct {
unsigned int mode_mask;
unsigned int target_data[KRF_T_MODE_MAX];
} krf_target_options_t;

unsigned int krf_rng_state = 0;
unsigned int krf_probability = 1000;
unsigned int krf_targeted_uid = 1002;
unsigned int krf_log_faults = 0;

// 默认情况下无有效筛选模式,不进行故障注入,执行原系统调用函数
krf_target_options_t krf_target_options = {0};

// 故障系统调用表
unsigned long *krf_faultable_table[KRF_NR_SYSCALLS] = {};
// 系统调用备份表
unsigned long *krf_sys_call_table[KRF_NR_SYSCALLS] = {};
// 内核系统调用表
unsigned long **linux_sys_call_table = NULL;

// 可选的系统调用数目
#define KRF_NR_SYSCALLS (NR_syscalls)

在SMP环境下如何安全的修改系统调用表的信息,preempt_disable() 是一个在操作系统内核开发中,尤其是在Linux内核中常见的函数。它的主要作用是防止当前正在运行的CPU核心被其他任务(通常是高优先级的任务)抢占(preempt)。这是通过增加抢占计数器的值来实现的。当抢占计数器的值大于0时,它表示抢占被禁止。

在多任务操作系统中,任务(或线程)切换是常见的,这通常由调度器根据优先级、时间片等因素决定。但在某些情况下,当前执行的任务可能在完成前不能被中断,例如,当操作关键资源或执行必须连续完成的代码段时preempt_disable() 就是用于这样的情况,确保当前任务能够连续运行,不被其他任务抢占CPU。

使用preempt_disable()时需要谨慎,因为如果抢占被禁用时间过长,它可能会影响系统的响应性和实时性能。因此,一旦不再需要禁用抢占,就应该立即调用preempt_enable()来重新允许抢占,这通常是成对出现的。preempt_enable()会减少抢占计数器的值,如果计数器的值降至0,则抢占重新被允许。

image-20240212161337069

Netlink是Linux内核与用户空间进程之间进行双向通信的一种机制,常用于各种系统管理任务,如路由、防火墙配置和进程间通信等。在内核程序和用户程序之间建立一个通信通道,并且这两端的程序都应该使用相同的协议族编号(NETLINK_KRF)和多播组编号(NETLINK_MYGROUP),以确保它们能够正确地相互通信。重要的是要注意选择的数值不要与现有的协议族和多播组编号冲突,以避免通信错误。

image-20240212161424886

下面是操作controltargeting故障注入配置文件的底层函数,后面看到文件操作绑定时就会发现,针对暴露给用户的故障注入配置目录及其配置项文件,其读取和写入操作其实最终都是调用到类似下面这些函数从而动态修改krfx内核模块的属性。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// 恢复内核的系统调用表
void krf_flush_table(void) {
int nr;
for (nr = 0; nr < KRF_NR_SYSCALLS; nr++) {
if (krf_sys_call_table[nr]) {
KRF_SAFE_WRITE({ KRF_EXTRACT_SYSCALL(KRF_SYSCALL_TABLE[nr]) = krf_sys_call_table[nr]; });
}
}
}

// 控制系统调用的故障注入
int control_file_handler(unsigned int sys_num) {
// 传入的系统调用号超出最大系统调用号,表明刷新恢复系统调用表
if (sys_num >= KRF_NR_SYSCALLS) {
KRF_LOG("krf: flushing all faulty syscalls\n");
krf_flush_table();
} else if (krf_faultable_table[sys_num] != NULL) {
// 系统调用表的对应系统调用项即函数指针拦截替换为故障系统调用
KRF_SAFE_WRITE(
{ KRF_EXTRACT_SYSCALL(KRF_SYSCALL_TABLE[sys_num]) = krf_faultable_table[sys_num]; });
} else {
// Valid syscall, but not supported by KRF
KRF_LOG("krf: user requested faulting of unsupported slot %u\n", sys_num);
return -EOPNOTSUPP;
}
return 0;
}

// 读取配置的筛选模式
void targeting_file_read_handler(char *buf) {
size_t offset = 0;
unsigned int current_mode;
// 逐个遍历krf_target_options结构体中的有效筛选模式位
for (current_mode = 0; current_mode < KRF_T_NUM_MODES; current_mode++) {
if ((krf_target_options.mode_mask & (1 << current_mode)) && (offset < KRF_PROCFS_MAX_SIZE)) {
offset += sprintf(buf + offset, "%u %u\n", current_mode,
krf_target_options.target_data[current_mode]);
}
}
}

// 写入筛选模式
int targeting_file_write_handler(unsigned int mode, unsigned int data) {
// 如果mode和data都为0,则重置筛选模式
if ((mode == 0) && (data == 0)) { // If both arguments are zero, remove all targeting
krf_target_options.mode_mask = 0;
KRF_LOG("krf: flushing all targeting options\n");
} else {
if (mode >= KRF_T_NUM_MODES) {
return -EINVAL;
}
// 设置特定的有效筛选模式位如personality、uid、gid、inode
krf_target_options.mode_mask |= (1 << mode);
// 设置对应的筛选模式值
krf_target_options.target_data[mode] = data;
}
return 0;
}

最后,我们看一下krfx内核模块的代码,这段代码是在Linux内核模块或驱动程序中定义文件操作的一种常见方式。它使用了一个struct file_operations结构体来指定与特定文件相关的操作函数。这个结构体是内核提供的一个标准方式,用于定义文件系统操作的回调函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// krfx内核模块
MODULE_LICENSE("GPL");
MODULE_AUTHOR("William Woodruff <william@yossarian.net>");
MODULE_DESCRIPTION("A Kernelspace Randomized Faulter");

static int krf_init(void);
static void krf_teardown(void);
static ssize_t rng_state_file_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t rng_state_file_write(struct file *, const char __user *, size_t, loff_t *);
static ssize_t probability_file_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t probability_file_write(struct file *, const char __user *, size_t, loff_t *);
static ssize_t control_file_write(struct file *, const char __user *, size_t, loff_t *);
static ssize_t log_faults_file_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t log_faults_file_write(struct file *, const char __user *, size_t, loff_t *);
static ssize_t targeting_file_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t targeting_file_write(struct file *, const char __user *, size_t, loff_t *);
static struct proc_dir_entry *krf_dir;

static const struct file_operations rng_state_file_ops = {
.owner = THIS_MODULE,
.read = rng_state_file_read,
.write = rng_state_file_write,
};

static const struct file_operations probability_file_ops = {
.owner = THIS_MODULE,
.read = probability_file_read,
.write = probability_file_write,
};

// 注意:[/proc/krf/control]故障注入控制文件无法读取
static const struct file_operations control_file_ops = {
.owner = THIS_MODULE,
.write = control_file_write,
};

static const struct file_operations log_faults_file_ops = {
.owner = THIS_MODULE,
.read = log_faults_file_read,
.write = log_faults_file_write,
};

static const struct file_operations targeting_file_ops = {
.owner = THIS_MODULE,
.read = targeting_file_read,
.write = targeting_file_write,
};

int init_module(void) {
int ret;
if ((ret = krf_init()) != 0) {
printk(KERN_ERR "krf_init failed with %d\n", ret);
return ret;
}
#ifdef KRF_CODEGEN
#include "krf.gen.x"
#endif
printk(KERN_INFO "krf " KRF_VERSION " loaded\n");
return 0;
}

void cleanup_module(void) {
krf_teardown();
printk(KERN_INFO "krf " KRF_VERSION " unloaded\n");
}

static int krf_init(void) {
// 创建内核netlink通信套接字
if (setup_netlink_socket() < 0) {
return -1;
}
// 获取Linux内核系统调用表
linux_sys_call_table = (void *)kallsyms_lookup_name("sys_call_table");
if (linux_sys_call_table == NULL) {
printk(KERN_ERR "krf couldn't load the syscall table\n");
return -2;
}
// 备份内核系统调用表
memcpy(krf_sys_call_table, linux_sys_call_table, KRF_NR_SYSCALLS * sizeof(unsigned long *));
// 创建KRF的故障注入配置目录
krf_dir = proc_mkdir(KRF_PROC_DIR, NULL);

if (krf_dir == NULL) {
printk(KERN_ERR "krf couldn't create /proc/" KRF_PROC_DIR);
return -2;
}
// 创建KRF的具体故障注入配置项文件如rng_state、故障发生概率、故障系统调用名称、日志开关、筛选模式
if (proc_create(KRF_RNG_STATE_FILENAME, 644, krf_dir, &rng_state_file_ops) == NULL ||
proc_create(KRF_PROBABILITY_FILENAME, 644, krf_dir, &probability_file_ops) == NULL ||
proc_create(KRF_CONTROL_FILENAME, 644, krf_dir, &control_file_ops) == NULL ||
proc_create(KRF_LOG_FAULTS_FILENAME, 644, krf_dir, &log_faults_file_ops) == NULL ||
proc_create(KRF_TARGETING_FILENAME, 644, krf_dir, &targeting_file_ops) == NULL) {
printk(KERN_ERR "krf couldn't create /proc entries\n");
return -3;
}

return 0;
}

static void krf_teardown(void) {
// 模块卸载时首先恢复内核原系统调用表
krf_flush_table();
// 其次移除KRF的故障注入配置目录
remove_proc_subtree(KRF_PROC_DIR, NULL);
// 最后销毁内核的netlink通信套接字
destroy_netlink_socket();
}

// 注意:[/proc/krf/rng_state]随机数文件无内容,利用该文件的read方法作为桥梁,实际读取的是内核模块的krf_rng_state属性
static ssize_t rng_state_file_read(struct file *f, char __user *ubuf, size_t size, loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
sprintf(buf, "%u\n", krf_rng_state);
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
if (*off > 0 || size < buflen) {
return 0;
}
if (copy_to_user(ubuf, buf, buflen)) {
return -EFAULT;
}
*off = buflen;
return buflen;
}

// 注意:[/proc/krf/rng_state]随机数文件无内容,利用该文件的write方法作为桥梁,实际修改的是内核模块的krf_rng_state属性
static ssize_t rng_state_file_write(struct file *f, const char __user *ubuf, size_t size,
loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
if (size > KRF_PROCFS_MAX_SIZE) {
size = KRF_PROCFS_MAX_SIZE;
}
if (copy_from_user(buf, ubuf, size)) {
return -EFAULT;
}
if (kstrtouint(buf, 0, &krf_rng_state) < 0) {
return -EINVAL;
}
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
*off = buflen;
return buflen;
}

// 注意:传入的ubuf的内容是系统调用号,krfctl中的table.gen.c保存了系统调用名称与系统调用号的映射关系表
// 注意:[/proc/krf/control]故障注入控制文件无内容,利用该文件的write方法作为桥梁调用control_file_handler,实际修改的是内核系统调用表
static ssize_t control_file_write(struct file *f, const char __user *ubuf, size_t size,
loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
unsigned int sys_num = KRF_NR_SYSCALLS;
if (size > KRF_PROCFS_MAX_SIZE) {
size = KRF_PROCFS_MAX_SIZE;
}
if (copy_from_user(buf, ubuf, size)) {
return -EFAULT;
}
if (kstrtouint(buf, 0, &sys_num) < 0) {
return -EINVAL;
}
if (control_file_handler(sys_num) < 0) {
return -EOPNOTSUPP;
}
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
*off = buflen;
return buflen;
}

// 注意:[/proc/krf/log_faults]日志开关文件无内容,利用该文件的read方法作为桥梁,实际读取的是内核模块的krf_log_faults属性
static ssize_t log_faults_file_read(struct file *f, char __user *ubuf, size_t size, loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
sprintf(buf, "%u\n", krf_log_faults);
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
if (*off > 0 || size < buflen) {
return 0;
}
if (copy_to_user(ubuf, buf, buflen)) {
return -EFAULT;
}
*off = buflen;
return buflen;
}

// 注意:[/proc/krf/log_faults]日志开关文件无内容,利用该文件的write方法作为桥梁,实际修改的是内核模块的krf_log_faults属性
static ssize_t log_faults_file_write(struct file *f, const char __user *ubuf, size_t size,
loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
if (size > KRF_PROCFS_MAX_SIZE) {
size = KRF_PROCFS_MAX_SIZE;
}
if (copy_from_user(buf, ubuf, size)) {
return -EFAULT;
}
if (kstrtouint(buf, 0, &krf_log_faults) < 0) {
return -EINVAL;
}
// 如果通过krfctl工具配置,这一句代码没有什么作用,但是如果用户是通过直接修改/proc/krf/log_faults文件则有用(非零值归为一)
krf_log_faults = !!krf_log_faults;
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
*off = buflen;
return buflen;
}

// 注意:[/proc/krf/targeting]筛选模式文件无内容,利用该文件的read方法作为桥梁调用targeting_file_read_handler,实际读取的是内核的krf_target_options结构体
static ssize_t targeting_file_read(struct file *f, char __user *ubuf, size_t size, loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
targeting_file_read_handler(buf);
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
if (*off > 0 || size < buflen) {
return 0;
}
if (copy_to_user(ubuf, buf, buflen)) {
return -EFAULT;
}
*off = buflen;
return buflen;
}

// // 注意:[/proc/krf/targeting]筛选模式文件无内容,利用该文件的write方法作为桥梁调用targeting_file_write_handler,实际修改的是内核的krf_target_options结构体
static ssize_t targeting_file_write(struct file *f, const char __user *ubuf, size_t size,
loff_t *off) {
char buf[KRF_PROCFS_MAX_SIZE + 1] = {0};
size_t buflen = 0;
krf_target_mode_t mode;
unsigned int data;
if (size > KRF_PROCFS_MAX_SIZE) {
size = KRF_PROCFS_MAX_SIZE;
}
if (copy_from_user(buf, ubuf, size)) {
return -EFAULT;
}
if (sscanf(buf, "%u %u", &mode, &data) != 2) {
return -EINVAL;
}
if (targeting_file_write_handler(mode, data) < 0) {
return -EINVAL;
}
buflen = strnlen(buf, KRF_PROCFS_MAX_SIZE);
*off = buflen;
return buflen;
}

下面是判断故障注入是否触发的函数,需要针对所有故障注入配置项逐一匹配比较,如果全部符合才允许故障注入发生:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
static __always_inline int krf_targeted(krf_ctx_t *context) {
int targeted = 1;
size_t i = 0;
// 对于选定有效的筛选模式,必须全部通过检测才返回正数(true),否则返回0(false)
// 注意:无有效筛选模式时返回0(false),这也是默认情况
for (; i < KRF_T_NUM_MODES; i++) {
// 遇到不符合的模式项,直接退出后续模式的匹配
if (targeted == 0)
break;
// 遇到有效筛选模式,其对应的二进制位为1
if (krf_target_options.mode_mask & (1 << i)) {
switch (i) {
case KRF_T_MODE_PERSONALITY:
if (krf_personality(krf_target_options.target_data[i], context))
targeted++;
else
targeted = 0;
break;
case KRF_T_MODE_PID:
if (krf_pid(krf_target_options.target_data[i], context))
targeted++;
else
targeted = 0;
break;
case KRF_T_MODE_UID:
if (krf_uid(krf_target_options.target_data[i], context))
targeted++;
else
targeted = 0;
break;
case KRF_T_MODE_GID:
if (krf_gid(krf_target_options.target_data[i], context))
targeted++;
else
targeted = 0;
break;
case KRF_T_MODE_INODE:
if (krf_inode(krf_target_options.target_data[i], context))
targeted++;
else
targeted = 0;
break;
}
}
}
return (targeted & (~1));
}

static __always_inline bool krf_personality(unsigned int target, krf_ctx_t *context) {
return (context->personality & (target));
}
static __always_inline bool krf_pid(unsigned int target, krf_ctx_t *context) {
return (context->pid == (target));
}
static __always_inline bool krf_uid(unsigned int target, krf_ctx_t *context) {
return (context->cred->uid.val == (target));
}
static __always_inline bool krf_gid(unsigned int target, krf_ctx_t *context) {
return (context->cred->gid.val == (target));
}
static __always_inline bool krf_inode(unsigned int target, krf_ctx_t *context) {
int i = 0;
while (context->files->fdt->fd[i] != NULL) {
if ((target == context->files->fdt->fd[i]->f_inode->i_ino)) {
return true;
}
i++;
}
return false;
}

4.2krfctl程序源码剖析

首先,还是先看一下头文件中的内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#define KRF_PROC_DIR "krf"
#define KRF_RNG_STATE_FILENAME "rng_state"
#define KRF_PROBABILITY_FILENAME "probability"
#define KRF_CONTROL_FILENAME "control"
#define KRF_LOG_FAULTS_FILENAME "log_faults"
#define KRF_TARGETING_FILENAME "targeting"

/* control will interpret any number larger than its syscall table
* as a command to clear all current masks.
* it's a good bet that linux will never have 65535 syscalls.
*/
#define CLEAR_MAGIC "65535"
#define CONTROL_FILE "/proc/" KRF_PROC_DIR "/" KRF_CONTROL_FILENAME
#define RNG_STATE_FILE "/proc/" KRF_PROC_DIR "/" KRF_RNG_STATE_FILENAME
#define PROBABILITY_FILE "/proc/" KRF_PROC_DIR "/" KRF_PROBABILITY_FILENAME
#define LOG_FAULTS_FILE "/proc/" KRF_PROC_DIR "/" KRF_LOG_FAULTS_FILENAME
#define TARGETING_FILE "/proc/" KRF_PROC_DIR "/" KRF_TARGETING_FILENAME

typedef struct syscall_lookup_t {
const char *sys_name;
/* no point in storing it as an int if we're just going to convert it */
const char *sys_num;
} syscall_lookup_t;

/*
1.table.gen.c:
#include <stdlib.h>
#include "krfctl.h"
syscall_lookup_t syscall_lookup_table[] = {
{ "read", "0" },
{ "write", "1" },
{ "open", "2" },
{ "close", "3" },
{ "stat", "4" },
{ "fstat", "5" },
{ "lstat", "6" },
{ "poll", "7" },
{ "lseek", "8" },
{ "mmap", "9" },
...
{ NULL, 0 },
};
*/

typedef struct fault_profile_t {
const char *profile;
const char *description;
/* GCC doesn't like flexible array initialization within
* structures, so just give ourselves enough room for
* sensibly sized profiles.
*/
const char *syscalls[256];
} fault_profile_t;

/*
2.profile.gen.c:
#include <stdlib.h>
#include "krfctl.h"
fault_profile_t fault_profile_table[] = {
{ "net", "socket and network syscalls", { "shutdown", "getsockopt", "setsockopt", "listen", "accept", "bind", "recvmsg", "getpeername", "getsockname", "recvfrom", "socket", "sendto", "socketpair", "connect", "sendmsg", NULL } },
...
{ NULL, NULL, { NULL } },
};
*/

extern syscall_lookup_t syscall_lookup_table[];
extern fault_profile_t fault_profile_table[];

然后,看一下krfctl应用程序的入口函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
// 遍历系统调用映射表,查找对应的系统调用号
const char *lookup_syscall_number(const char *sys_name) {
for (syscall_lookup_t *elem = syscall_lookup_table; elem->sys_name != NULL; elem++) {
if (!strcmp(sys_name, elem->sys_name)) {
return elem->sys_num;
}
}
return NULL;
}

// 遍历系统调用分类表,查找对应的系统调用列表
static const char **lookup_syscall_profile(const char *profile) {
for (fault_profile_t *elem = fault_profile_table; elem->profile != NULL; elem++) {
if (!strcmp(profile, elem->profile)) {
return elem->syscalls;
}
}
return NULL;
}

static void fault_syscall_spec(const char *s) {
const char *sys_name = NULL;
// 字符串拷贝,内部调用malloc函数
char *spec = strdup(s);
// 字符串切割依次得到每个系统调用名
sys_name = strtok(spec, ", ");
while (sys_name) {
// 故障注入
fault_syscall(sys_name);
sys_name = strtok(NULL, ", ");
}
// 及时释放内存,防止内存泄漏
free(spec);
}

static void fault_syscall_profile(const char *profile) {
// 获取某一方面的系统调用列表
const char **syscalls = lookup_syscall_profile(profile);
if (syscalls == NULL) {
errx(1, "couldn't find fault profile: %s", profile);
}
// 逐个进行故障注入
int i;
for (i = 0; syscalls[i]; i++) {
fault_syscall(syscalls[i]);
}
}

int fault_syscall(const char *sys_name) {
int fd;
const char *sys_num;
/* check for wait4 and select */
if (!strcmp(sys_name, "wait4") || !strcmp(sys_name, "select"))
fprintf(stderr,
"Warning: faulting syscall %s can potentially cause kernel oops on module unload\n",
sys_name);

/* TODO(ww): Opening the control file once per syscall is
* pretty nasty, but I don't like passing a fd around.
* Maybe a static variable that we test-and-set?
*/
if ((fd = open(CONTROL_FILE, O_WRONLY)) < 0) {
err(errno, "open " CONTROL_FILE);
}
if (!(sys_num = lookup_syscall_number(sys_name))) {
warnx("WARNING: couldn't find syscall: %s", sys_name);
close(fd);
return 1;
}
if (write(fd, sys_num, strlen(sys_num)) < 0) {
/* friendly error message on unsupported syscall */
if (errno == EOPNOTSUPP) {
errx(errno, "faulting for %s unimplemented", sys_name);
} else {
err(errno, "write " CONTROL_FILE);
}
}
close(fd);
return 0;
}

void clear_faulty_calls(void) {
int fd;
if ((fd = open(CONTROL_FILE, O_WRONLY)) < 0) {
err(errno, "open " CONTROL_FILE);
}
if (write(fd, CLEAR_MAGIC, strlen(CLEAR_MAGIC)) < 0) {
err(errno, "write " CONTROL_FILE);
}
close(fd);
}

void set_rng_state(const char *state) {
int fd;
if ((fd = open(RNG_STATE_FILE, O_WRONLY)) < 0) {
err(errno, "open " RNG_STATE_FILE);
}
if (write(fd, state, strlen(state)) < 0) {
err(errno, "write " RNG_STATE_FILE);
}
close(fd);
}

void set_prob_state(const char *state) {
int fd;
if ((fd = open(PROBABILITY_FILE, O_WRONLY)) < 0) {
err(errno, "open " PROBABILITY_FILE);
}
if (write(fd, state, strlen(state)) < 0) {
err(errno, "write " PROBABILITY_FILE);
}
close(fd);
}

void toggle_fault_logging(void) {
int fd;
char buf[32] = {0};
unsigned int state;
if ((fd = open(LOG_FAULTS_FILE, O_RDWR)) < 0) {
err(errno, "open " LOG_FAULTS_FILE);
}
if (read(fd, buf, sizeof(buf) - 1) < 0) {
err(errno, "read " LOG_FAULTS_FILE);
}
if (sscanf(buf, "%u", &state) != 1) {
errx(1, "weird logging state: %s", buf);
}
state = !state;
memset(buf, 0, sizeof(buf));
snprintf(buf, sizeof(buf), "%u", state);
if (write(fd, buf, strlen(buf)) < 0) {
err(errno, "write " LOG_FAULTS_FILE);
}
close(fd);
}

void set_targeting(unsigned int mode, const char *data) {
int fd;
char buf[32] = {0};
if ((fd = open(TARGETING_FILE, O_WRONLY)) < 0) {
err(errno, "open " TARGETING_FILE);
}
if (snprintf(buf, sizeof(buf), "%u %s", mode, data) < 0) {
err(errno, "snprintf");
}
if (write(fd, buf, strlen(buf)) < 0) {
err(errno, "write " TARGETING_FILE);
}
close(fd);
}

// 筛选模式数组
char *const targeting_opts[] = {[KRF_T_MODE_PERSONALITY] = "personality",
[KRF_T_MODE_PID] = "PID",
[KRF_T_MODE_UID] = "UID",
[KRF_T_MODE_GID] = "GID",
[KRF_T_MODE_INODE] = "INODE",
[KRF_T_NUM_MODES] = NULL};

// 主控函数,通过[/proc/krf/xxx]等文件的文件操作结构体中预先绑定的读写函数进行故障注入的配置
int main(int argc, char *argv[]) {
char *subopts, *value;
int c;
while ((c = getopt(argc, argv, "F:P:cr:p:LT:Ch")) != -1) {
switch (c) {
case 'F': {
fault_syscall_spec(optarg);
break;
}
case 'P': {
fault_syscall_profile(optarg);
break;
}
case 'c': {
clear_faulty_calls();
break;
}
case 'r': {
set_rng_state(optarg);
break;
}
case 'p': {
set_prob_state(optarg);
break;
}
case 'L': {
toggle_fault_logging();
break;
}
case 'T': {
subopts = optarg;
int ca;
while (*subopts != '\0') {
ca = getsubopt(&subopts, targeting_opts, &value);
if (value == NULL) {
printf("error: there must be a value input for the targeting option\n");
return 2;
}
if (ca >= KRF_T_NUM_MODES) {
printf("error: unknown targeting option %s\n", value);
return 3;
}
set_targeting(ca, value);
}
break;
}
case 'C': {
set_targeting(0, "0");
break;
}
case 'h':
default: {
printf("usage: krfctl <options>\n"
"options:\n"
" -h display this help message\n"
" -F <syscall> [syscall...] fault the given syscalls\n"
" -P <profile> fault the given syscall profile\n"
" -c clear the syscall table of faulty calls\n"
" -r <state> set the RNG state\n"
" -p <prob> set the fault probability\n"
" -L toggle faulty call logging\n"
" -T <variable>=<value> enable targeting option <variable> with value <value>\n"
" -C clear the targeting options\n"
"targeting options:\n"
" personality, PID, UID, GID, and INODE\n"
"available profiles (for -P flag):\n"
" ");
fault_profile_t *elem = fault_profile_table;
while (elem->profile != NULL) {
printf("\t%s\t%s\n", elem->profile, elem->description);
elem++;
}
return 1;
}
}
}

return 0;
}

4.3krfexec程序源码剖析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#define KRF_PERSONALITY 28
int main(int argc, char *argv[]) {
if (argc < 2 || !strcmp(argv[1], "-h")) {
printf("usage: krfexec <command or file> [args]\n");
return 1;
}
// 检查personality筛选模式是否选定且为期望值,设置进程的personality属性。
krfexec_prep();
// 进程镜像替换,执行目标程序如ls
if (execvp(argv[1], argv + 1) < 0) {
err(errno, "exec %s", argv[1]);
}
return 0; /* noreturn */
}

void krfexec_prep(void) {
// Check if personality is being targeted
int fd;
char buf[64] = {0};
int set = 0;
if ((fd = open(TARGETING_FILE, O_RDONLY)) < 0) {
err(errno, "open " TARGETING_FILE);
}
if (read(fd, buf, sizeof(buf) - 1) < 0) {
err(errno, "read" TARGETING_FILE);
}
unsigned mode, data;
while (sscanf(buf, "%u %u", &mode, &data) == 2) {
if (mode != KRF_T_MODE_PERSONALITY)
continue;
if (data == KRF_PERSONALITY) {
set = 1;
break;
} else {
errx(1, "Personality set to a value that krfexec does not recognize. Use `krfctl -T "
"personality=28` to properly set.");
}
}
if (!set) {
errx(1, "Personality targeting disabled. Run `krfctl -T personality=28` to enable.");
}
close(fd);
// 设置进程的personality
if (personality(KRF_PERSONALITY | ADDR_NO_RANDOMIZE) < 0) {
err(errno, "personality");
}
}

5.总结思考

KRF框架的设计思想其实有些类似于AOP,只不过针对的是内核系统调用级别的切面代理,通过重写系统调用表,通过预生成的代理系统调用函数进行替换,因此系统调用可以定制具体逻辑如返回指定的错误码,同时代理系统调用会根据特定的条件判断决定是走原始系统调用还是定制逻辑,这里说的特定条件就是故障注入触发条件如概率、进程号、用户ID、用户组ID、进程personality等。