2021-06-05 18:40:14

by Jarmo Tiitto

[permalink] [raw]
Subject: [PATCH v3 1/1] pgo: Fix sleep in atomic section in prf_open() v3

In prf_open() the required buffer size can be so large that
vzalloc() may sleep thus triggering bug:

======
BUG: sleeping function called from invalid context at include/linux/sched/mm.h:201
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 337, name: cat
CPU: 1 PID: 337 Comm: cat Not tainted 5.13.0-rc2-24-hack+ #154
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
Call Trace:
dump_stack+0xc7/0x134
___might_sleep+0x177/0x190
__might_sleep+0x5a/0x90
kmem_cache_alloc_node_trace+0x6b/0x3a0
? __get_vm_area_node+0xcd/0x1b0
? dput+0x283/0x300
__get_vm_area_node+0xcd/0x1b0
__vmalloc_node_range+0x7b/0x420
? prf_open+0x1da/0x580
? prf_open+0x32/0x580
? __llvm_profile_instrument_memop+0x36/0x50
vzalloc+0x54/0x60
? prf_open+0x1da/0x580
prf_open+0x1da/0x580
full_proxy_open+0x211/0x370
....
======

Since we can't vzalloc while holding pgo_lock,
split the code into steps:
* First get initial buffer size via prf_buffer_size()
and release the lock.

* Round up to the page size and allocate the buffer.

* Finally re-acquire the pgo_lock and call prf_serialize().
prf_serialize() will now check if the buffer is large enough
and returns -EAGAIN if it is not.

Note that prf_buffer_size() walks linked lists that
are modified by __llvm_profile_instrument_target(),
so we have to "guess" the buffer size ahead of time.
prf_serialize() will then return the actual data length.

Signed-off-by: Jarmo Tiitto <[email protected]>
---
v3: Go back the loop solution.
Explained why prf_buffer_size() need pgo_lock.
Cleanup the code a bit.
v2: Loopless attempt.
---
kernel/pgo/fs.c | 62 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 47 insertions(+), 15 deletions(-)

diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
index ef985159dad3..0ce0dc9caf7a 100644
--- a/kernel/pgo/fs.c
+++ b/kernel/pgo/fs.c
@@ -24,13 +24,14 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include "pgo.h"

static struct dentry *directory;

struct prf_private_data {
void *buffer;
- unsigned long size;
+ size_t size;
};

/*
@@ -213,6 +214,7 @@ static inline unsigned long prf_get_padding(unsigned long size)
return 7 & (sizeof(u64) - size % sizeof(u64));
}

+/* Note: caller *must* hold pgo_lock */
static unsigned long prf_buffer_size(void)
{
return sizeof(struct llvm_prf_header) +
@@ -225,18 +227,22 @@ static unsigned long prf_buffer_size(void)

/*
* Serialize the profiling data into a format LLVM's tools can understand.
+ * Returns actual buffer size in p->size.
+ * Note: p->buffer must point into vzalloc()'d
+ * area of at least prf_buffer_size() in size.
* Note: caller *must* hold pgo_lock.
*/
-static int prf_serialize(struct prf_private_data *p)
+static int prf_serialize(struct prf_private_data *p, size_t buf_size)
{
int err = 0;
void *buffer;

+ /* get buffer size, again. */
p->size = prf_buffer_size();
- p->buffer = vzalloc(p->size);

- if (!p->buffer) {
- err = -ENOMEM;
+ /* check for unlikely overflow. */
+ if (p->size > buf_size) {
+ err = -EAGAIN;
goto out;
}

@@ -259,27 +265,53 @@ static int prf_open(struct inode *inode, struct file *file)
{
struct prf_private_data *data;
unsigned long flags;
- int err;
+ size_t buf_size;
+ int err = 0;

data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data) {
err = -ENOMEM;
- goto out;
+ goto out_free;
}

+ /* get initial buffer size */
flags = prf_lock();
+ data->size = prf_buffer_size();
+ prf_unlock(flags);

- err = prf_serialize(data);
- if (unlikely(err)) {
- kfree(data);
- goto out_unlock;
- }
+ do {
+ if (data->buffer)
+ vfree(data->buffer);
+
+ /* allocate, round up to page size. */
+ buf_size = PAGE_ALIGN(data->size);
+ data->buffer = vzalloc(buf_size);
+
+ if (!data->buffer) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ /*
+ * try serialize and get actual
+ * data length in data->size
+ */
+ flags = prf_lock();
+ err = prf_serialize(data, buf_size);
+ prf_unlock(flags);
+ /* in unlikely case, try again. */
+ } while (err == -EAGAIN);
+
+ if (err)
+ goto out_free;

file->private_data = data;
+ return 0;

-out_unlock:
- prf_unlock(flags);
-out:
+out_free:
+ if (data)
+ vfree(data->buffer);
+ kfree(data);
return err;
}


base-commit: 46773f32ddf1d49a84eca5f19126d6dfaf08e8d9
--
2.31.1


2021-06-07 23:07:03

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pgo: Fix sleep in atomic section in prf_open() v3

On Sat, 5 Jun 2021 21:31:29 +0300, Jarmo Tiitto wrote:
> In prf_open() the required buffer size can be so large that
> vzalloc() may sleep thus triggering bug:
>
> ======
> BUG: sleeping function called from invalid context at include/linux/sched/mm.h:201
> in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 337, name: cat
> CPU: 1 PID: 337 Comm: cat Not tainted 5.13.0-rc2-24-hack+ #154
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
> Call Trace:
> dump_stack+0xc7/0x134
> ___might_sleep+0x177/0x190
> __might_sleep+0x5a/0x90
> kmem_cache_alloc_node_trace+0x6b/0x3a0
> ? __get_vm_area_node+0xcd/0x1b0
> ? dput+0x283/0x300
> __get_vm_area_node+0xcd/0x1b0
> __vmalloc_node_range+0x7b/0x420
> ? prf_open+0x1da/0x580
> ? prf_open+0x32/0x580
> ? __llvm_profile_instrument_memop+0x36/0x50
> vzalloc+0x54/0x60
> ? prf_open+0x1da/0x580
> prf_open+0x1da/0x580
> full_proxy_open+0x211/0x370
> ....
> ======
>
> [...]

Applied to for-next/clang/features, thanks! I made some additional tweaks
on top of this, in a separate patch I just sent out.

[1/1] pgo: Fix sleep in atomic section in prf_open() v3
https://git.kernel.org/kees/c/d13b0485a7bb

--
Kees Cook

2021-06-09 01:37:10

by Jarmo Tiitto

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] pgo: Fix sleep in atomic section in prf_open() v3

Kees Cook wrote tiistaina 8. kesäkuuta 2021 2.02.51 EEST:
> On Sat, 5 Jun 2021 21:31:29 +0300, Jarmo Tiitto wrote:
> > In prf_open() the required buffer size can be so large that
> > vzalloc() may sleep thus triggering bug:
> >
> > ======
> >
> > BUG: sleeping function called from invalid context at
> > include/linux/sched/mm.h:201 in_atomic(): 1, irqs_disabled(): 1,
> > non_block: 0, pid: 337, name: cat CPU: 1 PID: 337 Comm: cat Not tainted
> > 5.13.0-rc2-24-hack+ #154
> > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
> >
> > Call Trace:
> > dump_stack+0xc7/0x134
> > ___might_sleep+0x177/0x190
> > __might_sleep+0x5a/0x90
> > kmem_cache_alloc_node_trace+0x6b/0x3a0
> > ? __get_vm_area_node+0xcd/0x1b0
> > ? dput+0x283/0x300
> > __get_vm_area_node+0xcd/0x1b0
> > __vmalloc_node_range+0x7b/0x420
> > ? prf_open+0x1da/0x580
> > ? prf_open+0x32/0x580
> > ? __llvm_profile_instrument_memop+0x36/0x50
> > vzalloc+0x54/0x60
> > ? prf_open+0x1da/0x580
> > prf_open+0x1da/0x580
> > full_proxy_open+0x211/0x370
> > ....
> >
> > ======
> >
> > [...]
>
> Applied to for-next/clang/features, thanks! I made some additional tweaks
> on top of this, in a separate patch I just sent out.
>
> [1/1] pgo: Fix sleep in atomic section in prf_open() v3
> https://git.kernel.org/kees/c/d13b0485a7bb
>
> --
> Kees Cook

Oof. Looking now at your fixes, I'm sorry for the extra work needed. :-|
I should have cleaned up the error paths better, since that was mentioned
earlier.
My eyes were apparently pointed on the v2 module profiling code I have been
brewing the past weekend.

Thanks!
--
Jarmo