If we allocate perf ring buffer with the size of single page,
we will get memory corruption when releasing it. It's caused
by rb_free_work function (CONFIG_PERF_USE_VMALLOC option).
For single page sized ring buffer the page_order is -1 (because
nr_pages is 0). This needs to be recognized in the rb_free_work
function to release proper amount of pages.
Introducing page_nr function (CONFIG_PERF_USE_VMALLOC only)
that returns number of allocated pages. Using it in rb_free_work
and perf_mmap_to_page functions.
Also setting rb->nr_pages to 0 in case we have only user page
allocated, which will fail perf_output_begin function and
prevents sample storage.
v2 changes:
- fixed the perf_output_begin handling of single page buffer
Reported-by: Jan Stancek <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
Cc: Corey Ashford <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
---
kernel/events/ring_buffer.c | 40 +++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..a802151 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -154,7 +154,8 @@ int perf_output_begin(struct perf_output_handle *handle,
if (head - local_read(&rb->wakeup) > rb->watermark)
local_add(rb->watermark, &rb->wakeup);
- handle->page = offset >> (PAGE_SHIFT + page_order(rb));
+ /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
+ handle->page = offset >> PAGE_SHIFT;
handle->page &= rb->nr_pages - 1;
handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
handle->addr = rb->data_pages[handle->page];
@@ -312,11 +313,21 @@ void rb_free(struct ring_buffer *rb)
}
#else
+/*
+ * Returns the total number of pages allocated
+ * by ring buffer including the user page.
+ */
+static int page_nr(struct ring_buffer *rb)
+{
+ return page_order(rb) == -1 ?
+ 1 : /* no data, just user page */
+ 1 + (1 << page_order(rb)); /* user page + data pages */
+}
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ if (pgoff > page_nr(rb))
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -336,10 +347,10 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = page_nr(rb);
base = rb->user_page;
- for (i = 0; i < nr + 1; i++)
+ for (i = 0; i < nr; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
@@ -371,9 +382,24 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
goto fail_all_buf;
rb->user_page = all_buf;
- rb->data_pages[0] = all_buf + PAGE_SIZE;
- rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+
+ /*
+ * For special case nr_pages == 0 we have
+ * only the user page mmaped plus:
+ *
+ * rb->data_pages[0] = NULL
+ * rb->nr_pages = 0
+ * rb->page_order = -1
+ *
+ * The perf_output_begin function is guarded
+ * by (rb->nr_pages > 0) condition, so no
+ * output code touches above setup if we
+ * have only user page allocated.
+ */
+
+ rb->data_pages[0] = nr_pages ? all_buf + PAGE_SIZE : NULL;
+ rb->nr_pages = nr_pages ? 1 : 0;
+ rb->page_order = ilog2(nr_pages);
ring_buffer_init(rb, watermark, flags);
--
1.7.11.7
Commit-ID: 12bc915ee149ac31d17c513edc7303660d024239
Gitweb: http://git.kernel.org/tip/12bc915ee149ac31d17c513edc7303660d024239
Author: Jiri Olsa <[email protected]>
AuthorDate: Fri, 1 Mar 2013 17:34:49 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 6 Mar 2013 11:14:01 +0100
perf: Fix vmalloc ring buffer free function
If we allocate perf ring buffer with the size of single page,
we will get memory corruption when releasing it. It's caused
by rb_free_work function (CONFIG_PERF_USE_VMALLOC option).
For single page sized ring buffer the page_order is -1 (because
nr_pages is 0). This needs to be recognized in the rb_free_work
function to release proper amount of pages.
Introducing page_nr function (CONFIG_PERF_USE_VMALLOC only)
that returns number of allocated pages. Using it in rb_free_work
and perf_mmap_to_page functions.
Also setting rb->nr_pages to 0 in case we have only user page
allocated, which will fail perf_output_begin function and
prevents sample storage.
Reported-by: Jan Stancek <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
Cc: Corey Ashford <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: "David S. Miller" <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Andrew Morton <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/events/ring_buffer.c | 40 +++++++++++++++++++++++++++++++++-------
1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..a802151 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -154,7 +154,8 @@ int perf_output_begin(struct perf_output_handle *handle,
if (head - local_read(&rb->wakeup) > rb->watermark)
local_add(rb->watermark, &rb->wakeup);
- handle->page = offset >> (PAGE_SHIFT + page_order(rb));
+ /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
+ handle->page = offset >> PAGE_SHIFT;
handle->page &= rb->nr_pages - 1;
handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
handle->addr = rb->data_pages[handle->page];
@@ -312,11 +313,21 @@ void rb_free(struct ring_buffer *rb)
}
#else
+/*
+ * Returns the total number of pages allocated
+ * by ring buffer including the user page.
+ */
+static int page_nr(struct ring_buffer *rb)
+{
+ return page_order(rb) == -1 ?
+ 1 : /* no data, just user page */
+ 1 + (1 << page_order(rb)); /* user page + data pages */
+}
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ if (pgoff > page_nr(rb))
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -336,10 +347,10 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = page_nr(rb);
base = rb->user_page;
- for (i = 0; i < nr + 1; i++)
+ for (i = 0; i < nr; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
@@ -371,9 +382,24 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
goto fail_all_buf;
rb->user_page = all_buf;
- rb->data_pages[0] = all_buf + PAGE_SIZE;
- rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+
+ /*
+ * For special case nr_pages == 0 we have
+ * only the user page mmaped plus:
+ *
+ * rb->data_pages[0] = NULL
+ * rb->nr_pages = 0
+ * rb->page_order = -1
+ *
+ * The perf_output_begin function is guarded
+ * by (rb->nr_pages > 0) condition, so no
+ * output code touches above setup if we
+ * have only user page allocated.
+ */
+
+ rb->data_pages[0] = nr_pages ? all_buf + PAGE_SIZE : NULL;
+ rb->nr_pages = nr_pages ? 1 : 0;
+ rb->page_order = ilog2(nr_pages);
ring_buffer_init(rb, watermark, flags);
2013/3/1 Jiri Olsa <[email protected]>:
> If we allocate perf ring buffer with the size of single page,
> we will get memory corruption when releasing it. It's caused
> by rb_free_work function (CONFIG_PERF_USE_VMALLOC option).
>
> For single page sized ring buffer the page_order is -1 (because
> nr_pages is 0). This needs to be recognized in the rb_free_work
> function to release proper amount of pages.
>
> Introducing page_nr function (CONFIG_PERF_USE_VMALLOC only)
> that returns number of allocated pages. Using it in rb_free_work
> and perf_mmap_to_page functions.
>
> Also setting rb->nr_pages to 0 in case we have only user page
> allocated, which will fail perf_output_begin function and
> prevents sample storage.
>
> v2 changes:
> - fixed the perf_output_begin handling of single page buffer
>
> Reported-by: Jan Stancek <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> Cc: Corey Ashford <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Paul Mackerras <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> ---
> kernel/events/ring_buffer.c | 40 +++++++++++++++++++++++++++++++++-------
> 1 file changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 23cb34f..a802151 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -154,7 +154,8 @@ int perf_output_begin(struct perf_output_handle *handle,
> if (head - local_read(&rb->wakeup) > rb->watermark)
> local_add(rb->watermark, &rb->wakeup);
>
> - handle->page = offset >> (PAGE_SHIFT + page_order(rb));
> + /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
> + handle->page = offset >> PAGE_SHIFT;
> handle->page &= rb->nr_pages - 1;
> handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
> handle->addr = rb->data_pages[handle->page];
> @@ -312,11 +313,21 @@ void rb_free(struct ring_buffer *rb)
> }
>
> #else
> +/*
> + * Returns the total number of pages allocated
> + * by ring buffer including the user page.
> + */
> +static int page_nr(struct ring_buffer *rb)
> +{
> + return page_order(rb) == -1 ?
> + 1 : /* no data, just user page */
> + 1 + (1 << page_order(rb)); /* user page + data pages */
> +}
Could be simply rb->nr_page + 1 ?
Patch looks good in any case. Thanks.
On Wed, Mar 06, 2013 at 04:20:15PM +0100, Frederic Weisbecker wrote:
> 2013/3/1 Jiri Olsa <[email protected]>:
> > If we allocate perf ring buffer with the size of single page,
> > we will get memory corruption when releasing it. It's caused
> > by rb_free_work function (CONFIG_PERF_USE_VMALLOC option).
> >
> > For single page sized ring buffer the page_order is -1 (because
> > nr_pages is 0). This needs to be recognized in the rb_free_work
> > function to release proper amount of pages.
> >
> > Introducing page_nr function (CONFIG_PERF_USE_VMALLOC only)
> > that returns number of allocated pages. Using it in rb_free_work
> > and perf_mmap_to_page functions.
> >
> > Also setting rb->nr_pages to 0 in case we have only user page
> > allocated, which will fail perf_output_begin function and
> > prevents sample storage.
> >
> > v2 changes:
> > - fixed the perf_output_begin handling of single page buffer
> >
> > Reported-by: Jan Stancek <[email protected]>
> > Signed-off-by: Jiri Olsa <[email protected]>
> > Cc: Corey Ashford <[email protected]>
> > Cc: Frederic Weisbecker <[email protected]>
> > Cc: Ingo Molnar <[email protected]>
> > Cc: Namhyung Kim <[email protected]>
> > Cc: Paul Mackerras <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Cc: Arnaldo Carvalho de Melo <[email protected]>
> > ---
> > kernel/events/ring_buffer.c | 40 +++++++++++++++++++++++++++++++++-------
> > 1 file changed, 33 insertions(+), 7 deletions(-)
> >
> > diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> > index 23cb34f..a802151 100644
> > --- a/kernel/events/ring_buffer.c
> > +++ b/kernel/events/ring_buffer.c
> > @@ -154,7 +154,8 @@ int perf_output_begin(struct perf_output_handle *handle,
> > if (head - local_read(&rb->wakeup) > rb->watermark)
> > local_add(rb->watermark, &rb->wakeup);
> >
> > - handle->page = offset >> (PAGE_SHIFT + page_order(rb));
> > + /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
> > + handle->page = offset >> PAGE_SHIFT;
> > handle->page &= rb->nr_pages - 1;
> > handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
> > handle->addr = rb->data_pages[handle->page];
> > @@ -312,11 +313,21 @@ void rb_free(struct ring_buffer *rb)
> > }
> >
> > #else
> > +/*
> > + * Returns the total number of pages allocated
> > + * by ring buffer including the user page.
> > + */
> > +static int page_nr(struct ring_buffer *rb)
> > +{
> > + return page_order(rb) == -1 ?
> > + 1 : /* no data, just user page */
> > + 1 + (1 << page_order(rb)); /* user page + data pages */
> > +}
>
> Could be simply rb->nr_page + 1 ?
>
> Patch looks good in any case. Thanks.
nope, because CONFIG_PERF_USE_VMALLOC rb uses only 1st slot
of rg->data_pages[], so rb->nr_page is either 0 or 1
the actuall number of pages is counted via rb->page_order
(which is -1 for case with no data pages)
thanks,
jirka
2013/3/6 Jiri Olsa <[email protected]>:
> nope, because CONFIG_PERF_USE_VMALLOC rb uses only 1st slot
> of rg->data_pages[], so rb->nr_page is either 0 or 1
>
> the actuall number of pages is counted via rb->page_order
> (which is -1 for case with no data pages)
Ah right.
On Fri, 2013-03-01 at 17:34 +0100, Jiri Olsa wrote:
> If we allocate perf ring buffer with the size of single page,
> we will get memory corruption when releasing it. It's caused
> by rb_free_work function (CONFIG_PERF_USE_VMALLOC option).
>
> For single page sized ring buffer the page_order is -1 (because
> nr_pages is 0). This needs to be recognized in the rb_free_work
> function to release proper amount of pages.
>
> Introducing page_nr function (CONFIG_PERF_USE_VMALLOC only)
> that returns number of allocated pages. Using it in rb_free_work
> and perf_mmap_to_page functions.
>
> Also setting rb->nr_pages to 0 in case we have only user page
> allocated, which will fail perf_output_begin function and
> prevents sample storage.
>
> v2 changes:
> - fixed the perf_output_begin handling of single page buffer
>
> Reported-by: Jan Stancek <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> Cc: Corey Ashford <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Paul Mackerras <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> ---
> kernel/events/ring_buffer.c | 40 +++++++++++++++++++++++++++++++++-------
> 1 file changed, 33 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 23cb34f..a802151 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -154,7 +154,8 @@ int perf_output_begin(struct perf_output_handle *handle,
> if (head - local_read(&rb->wakeup) > rb->watermark)
> local_add(rb->watermark, &rb->wakeup);
>
> - handle->page = offset >> (PAGE_SHIFT + page_order(rb));
> + /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
> + handle->page = offset >> PAGE_SHIFT;
I don't get that comment.. also it makes the calculation for page
inconsistent with the below calculation for addr.
We basically want to split the offset into a page number and an offset
within that; this means we need:
pg_nr = offset >> page_shift;
pg_offset = offset & (1 << page_shift) - 1;
You just wrecked that.
> handle->page &= rb->nr_pages - 1;
> handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
> handle->addr = rb->data_pages[handle->page];
> @@ -312,11 +313,21 @@ void rb_free(struct ring_buffer *rb)
> }
>
> #else
> +/*
> + * Returns the total number of pages allocated
> + * by ring buffer including the user page.
> + */
> +static int page_nr(struct ring_buffer *rb)
> +{
> + return page_order(rb) == -1 ?
> + 1 : /* no data, just user page */
> + 1 + (1 << page_order(rb)); /* user page + data pages */
> +}
I think a number of the bugs below is due to the conflation of data
pages vs total pages. It might be best to call this data_page_nr() and
leave the +1 for the sites where its needed.
> struct page *
> perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> {
> - if (pgoff > (1UL << page_order(rb)))
> + if (pgoff > page_nr(rb))
> return NULL;
This is just wrong.. you have page_nr() be 1+2^n, but the comparison is
'>' not '>=', this means we get a range of 2+2^n, not the desired 1+2^n.
> return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> @@ -336,10 +347,10 @@ static void rb_free_work(struct work_struct *work)
> int i, nr;
>
> rb = container_of(work, struct ring_buffer, work);
> - nr = 1 << page_order(rb);
> + nr = page_nr(rb);
>
> base = rb->user_page;
> - for (i = 0; i < nr + 1; i++)
> + for (i = 0; i < nr; i++)
> perf_mmap_unmark_page(base + (i * PAGE_SIZE));
>
> vfree(base);
> @@ -371,9 +382,24 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> goto fail_all_buf;
>
> rb->user_page = all_buf;
> - rb->data_pages[0] = all_buf + PAGE_SIZE;
> - rb->page_order = ilog2(nr_pages);
> - rb->nr_pages = 1;
> +
> + /*
> + * For special case nr_pages == 0 we have
> + * only the user page mmaped plus:
> + *
> + * rb->data_pages[0] = NULL
> + * rb->nr_pages = 0
> + * rb->page_order = -1
> + *
> + * The perf_output_begin function is guarded
> + * by (rb->nr_pages > 0) condition, so no
> + * output code touches above setup if we
> + * have only user page allocated.
> + */
> +
> + rb->data_pages[0] = nr_pages ? all_buf + PAGE_SIZE : NULL;
> + rb->nr_pages = nr_pages ? 1 : 0;
> + rb->page_order = ilog2(nr_pages);
>
> ring_buffer_init(rb, watermark, flags);
>
heya ;)
great to hear from you again!
On Mon, Mar 11, 2013 at 10:40:43AM +0100, Peter Zijlstra wrote:
> On Fri, 2013-03-01 at 17:34 +0100, Jiri Olsa wrote:
> > If we allocate perf ring buffer with the size of single page,
SNIP
> > diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> > index 23cb34f..a802151 100644
> > --- a/kernel/events/ring_buffer.c
> > +++ b/kernel/events/ring_buffer.c
> > @@ -154,7 +154,8 @@ int perf_output_begin(struct perf_output_handle *handle,
> > if (head - local_read(&rb->wakeup) > rb->watermark)
> > local_add(rb->watermark, &rb->wakeup);
> >
> > - handle->page = offset >> (PAGE_SHIFT + page_order(rb));
> > + /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
> > + handle->page = offset >> PAGE_SHIFT;
>
> I don't get that comment.. also it makes the calculation for page
> inconsistent with the below calculation for addr.
>
> We basically want to split the offset into a page number and an offset
> within that; this means we need:
>
> pg_nr = offset >> page_shift;
> pg_offset = offset & (1 << page_shift) - 1;
>
> You just wrecked that.
>
> > handle->page &= rb->nr_pages - 1;
here's ^^^ where the handle->page becomes 0 due to (rb->nr_pages == 0)
for CONFIG_PERF_USE_VMALLOC we use only the first item in
rb->data_pages[] array, which holds the whole data memory,
and got accessed by 'offset' directly
> > handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
> > handle->addr = rb->data_pages[handle->page];
> > @@ -312,11 +313,21 @@ void rb_free(struct ring_buffer *rb)
> > }
> >
> > #else
> > +/*
> > + * Returns the total number of pages allocated
> > + * by ring buffer including the user page.
> > + */
> > +static int page_nr(struct ring_buffer *rb)
> > +{
> > + return page_order(rb) == -1 ?
> > + 1 : /* no data, just user page */
> > + 1 + (1 << page_order(rb)); /* user page + data pages */
> > +}
>
> I think a number of the bugs below is due to the conflation of data
> pages vs total pages. It might be best to call this data_page_nr() and
> leave the +1 for the sites where its needed.
both places using page_nr need total pages count,
maybe I can rename it into total_page_nr()
>
>
> > struct page *
> > perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> > {
> > - if (pgoff > (1UL << page_order(rb)))
> > + if (pgoff > page_nr(rb))
> > return NULL;
>
> This is just wrong.. you have page_nr() be 1+2^n, but the comparison is
> '>' not '>=', this means we get a range of 2+2^n, not the desired 1+2^n.
ouch, missed that one
thanks,
jirka
* Jiri Olsa <[email protected]> wrote:
> > > struct page *
> > > perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> > > {
> > > - if (pgoff > (1UL << page_order(rb)))
> > > + if (pgoff > page_nr(rb))
> > > return NULL;
> >
> > This is just wrong.. you have page_nr() be 1+2^n, but the comparison is
>
> > '>' not '>=', this means we get a range of 2+2^n, not the desired 1+2^n.
>
> ouch, missed that one
So, because this is perf/urgent which I want to send Linuswards, I removed
the commit to not hold up other bits - please resend the whole patch once
everything is fixed.
Thanks,
Ingo
On Mon, 2013-03-11 at 12:21 +0100, Jiri Olsa wrote:
> > pg_nr = offset >> page_shift;
> > pg_offset = offset & (1 << page_shift) - 1;
> >
> > You just wrecked that.
> >
> > > handle->page &= rb->nr_pages - 1;
>
> here's ^^^ where the handle->page becomes 0 due to (rb->nr_pages == 0)
then that'll be &= -1, which is a nop, no?
Also, you wrecked it for anything that uses some intermediate page
order (we currently don't).
On Mon, Mar 11, 2013 at 05:26:33PM +0100, Peter Zijlstra wrote:
> On Mon, 2013-03-11 at 12:21 +0100, Jiri Olsa wrote:
> > > pg_nr = offset >> page_shift;
> > > pg_offset = offset & (1 << page_shift) - 1;
> > >
> > > You just wrecked that.
> > >
> > > > handle->page &= rb->nr_pages - 1;
> >
> > here's ^^^ where the handle->page becomes 0 due to (rb->nr_pages == 0)
>
> then that'll be &= -1, which is a nop, no?
ugh.. should have been: '..due to (rb->nr_pages == 1)' as from:
+ rb->nr_pages = nr_pages ? 1 : 0;
if it's 0, it never make it throught the perf_output_begin
>
> Also, you wrecked it for anything that uses some intermediate page
> order (we currently don't).
I'll put that hunk back then:
- handle->page = offset >> (PAGE_SHIFT + page_order(rb));
+ /* page is allways 0 for CONFIG_PERF_USE_VMALLOC option */
+ handle->page = offset >> PAGE_SHIFT;
I did not know there were other plans for that, seemed useless
jirka
On Mon, 2013-03-11 at 17:43 +0100, Jiri Olsa wrote:
> I did not know there were other plans for that, seemed useless
I like to keep form, confuses me less :-)
Anyway, why are we getting to that part of perf_output_begin() if we
don't have any data pages to begin with? Shouldn't we bail before there
someplace? Like at the !rb->nr_pages check?
On Mon, Mar 11, 2013 at 06:44:45PM +0100, Peter Zijlstra wrote:
> On Mon, 2013-03-11 at 17:43 +0100, Jiri Olsa wrote:
>
> > I did not know there were other plans for that, seemed useless
>
> I like to keep form, confuses me less :-)
ook
>
> Anyway, why are we getting to that part of perf_output_begin() if we
> don't have any data pages to begin with? Shouldn't we bail before there
> someplace? Like at the !rb->nr_pages check?
>
perf_output_begin actually does the !rb->nr_pages check
looks like we could check it earlier in some casese, I'll
update my todo list ;-)
jirka
If we allocate perf ring buffer with the size of single page,
we will get memory corruption when releasing it. It's caused
by rb_free_work function (CONFIG_PERF_USE_VMALLOC option).
For single page sized ring buffer the page_order is -1 (because
nr_pages is 0). This needs to be recognized in the rb_free_work
function to release proper amount of pages.
Introducing page_nr function (CONFIG_PERF_USE_VMALLOC only)
that returns number of allocated pages. Using it in rb_free_work
and perf_mmap_to_page functions.
Also setting rb->nr_pages to 0 in case we have only user page
allocated, which will fail perf_output_begin function and
prevents sample storage.
v3 changes:
- fixed perf_mmap_to_page check
- keep page_order in handle->page computation
v2 changes:
- fixed the perf_output_begin handling of single page buffer
Reported-by: Jan Stancek <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
Cc: Corey Ashford <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
---
kernel/events/ring_buffer.c | 37 +++++++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..4219be6 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -312,11 +312,21 @@ void rb_free(struct ring_buffer *rb)
}
#else
+/*
+ * Returns the total number of pages allocated
+ * by ring buffer including the user page.
+ */
+static int page_nr(struct ring_buffer *rb)
+{
+ return page_order(rb) == -1 ?
+ 1 : /* no data, just user page */
+ 1 + (1 << page_order(rb)); /* user page + data pages */
+}
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ if (pgoff >= page_nr(rb))
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -336,10 +346,10 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = page_nr(rb);
base = rb->user_page;
- for (i = 0; i < nr + 1; i++)
+ for (i = 0; i < nr; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
@@ -371,9 +381,24 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
goto fail_all_buf;
rb->user_page = all_buf;
- rb->data_pages[0] = all_buf + PAGE_SIZE;
- rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+
+ /*
+ * For special case nr_pages == 0 we have
+ * only the user page mmaped plus:
+ *
+ * rb->data_pages[0] = NULL
+ * rb->nr_pages = 0
+ * rb->page_order = -1
+ *
+ * The perf_output_begin function is guarded
+ * by (rb->nr_pages > 0) condition, so no
+ * output code touches above setup if we
+ * have only user page allocated.
+ */
+
+ rb->data_pages[0] = nr_pages ? all_buf + PAGE_SIZE : NULL;
+ rb->nr_pages = nr_pages ? 1 : 0;
+ rb->page_order = ilog2(nr_pages);
ring_buffer_init(rb, watermark, flags);
--
1.7.11.7
On Tue, 2013-03-12 at 11:05 +0100, Jiri Olsa wrote:
> +/*
> + * Returns the total number of pages allocated
> + * by ring buffer including the user page.
> + */
> +static int page_nr(struct ring_buffer *rb)
> +{
> + return page_order(rb) == -1 ?
> + 1 : /* no data, just user page */
> + 1 + (1 << page_order(rb)); /* user page + data pages */
> +}
> @@ -371,9 +381,24 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> goto fail_all_buf;
>
> rb->user_page = all_buf;
> - rb->data_pages[0] = all_buf + PAGE_SIZE;
> - rb->page_order = ilog2(nr_pages);
> - rb->nr_pages = 1;
> +
> + rb->data_pages[0] = nr_pages ? all_buf + PAGE_SIZE : NULL;
> + rb->nr_pages = nr_pages ? 1 : 0;
> + rb->page_order = ilog2(nr_pages);
>
> ring_buffer_init(rb, watermark, flags);
Still somewhat confused by this.. wouldn't something 'simple' like the
below suffice?
Note how the !vmalloc branch also sets rb->nr_pages = nr_pages.
---
kernel/events/ring_buffer.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..4f48d02 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ if (pgoff > rb->nr_pages)
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -336,7 +336,7 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = rb->nr_pages;
base = rb->user_page;
for (i = 0; i < nr + 1; i++)
@@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
rb->user_page = all_buf;
rb->data_pages[0] = all_buf + PAGE_SIZE;
rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+ rb->nr_pages = nr_pages;
ring_buffer_init(rb, watermark, flags);
On Tue, Mar 12, 2013 at 11:27:10AM +0100, Peter Zijlstra wrote:
> On Tue, 2013-03-12 at 11:05 +0100, Jiri Olsa wrote:
> > +/*
> > + * Returns the total number of pages allocated
> > + * by ring buffer including the user page.
> > + */
> > +static int page_nr(struct ring_buffer *rb)
> > +{
> > + return page_order(rb) == -1 ?
> > + 1 : /* no data, just user page */
> > + 1 + (1 << page_order(rb)); /* user page + data pages */
> > +}
>
> > @@ -371,9 +381,24 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> > goto fail_all_buf;
> >
> > rb->user_page = all_buf;
> > - rb->data_pages[0] = all_buf + PAGE_SIZE;
> > - rb->page_order = ilog2(nr_pages);
> > - rb->nr_pages = 1;
>
> > +
> > + rb->data_pages[0] = nr_pages ? all_buf + PAGE_SIZE : NULL;
> > + rb->nr_pages = nr_pages ? 1 : 0;
> > + rb->page_order = ilog2(nr_pages);
> >
> > ring_buffer_init(rb, watermark, flags);
>
> Still somewhat confused by this.. wouldn't something 'simple' like the
> below suffice?
>
> Note how the !vmalloc branch also sets rb->nr_pages = nr_pages.
yep it would ;-) though I find it little confusing for:
>
> ---
> kernel/events/ring_buffer.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 23cb34f..4f48d02 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
> struct page *
> perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> {
> - if (pgoff > (1UL << page_order(rb)))
> + if (pgoff > rb->nr_pages)
> return NULL;
here it's the '>' that masks that there's actually (rb->nr_pages + 1) pages
>
> return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> @@ -336,7 +336,7 @@ static void rb_free_work(struct work_struct *work)
> int i, nr;
>
> rb = container_of(work, struct ring_buffer, work);
> - nr = 1 << page_order(rb);
> + nr = rb->nr_pages;
>
> base = rb->user_page;
> for (i = 0; i < nr + 1; i++)
and here's the user plus one
> @@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> rb->user_page = all_buf;
> rb->data_pages[0] = all_buf + PAGE_SIZE;
> rb->page_order = ilog2(nr_pages);
> - rb->nr_pages = 1;
> + rb->nr_pages = nr_pages;
>
> ring_buffer_init(rb, watermark, flags);
but I'll get used to it np.. ;)
jirka
On Tue, 2013-03-12 at 11:53 +0100, Jiri Olsa wrote:
> > @@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
> > struct page *
> > perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> > {
> > - if (pgoff > (1UL << page_order(rb)))
> > + if (pgoff > rb->nr_pages)
> > return NULL;
>
> here it's the '>' that masks that there's actually (rb->nr_pages + 1) pages
>
> >
> > return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> > @@ -336,7 +336,7 @@ static void rb_free_work(struct work_struct *work)
> > int i, nr;
> >
> > rb = container_of(work, struct ring_buffer, work);
> > - nr = 1 << page_order(rb);
> > + nr = rb->nr_pages;
> >
> > base = rb->user_page;
> > for (i = 0; i < nr + 1; i++)
>
> and here's the user plus one
Right, we could make that either '< nr' and '<= nr' or '<= nr+1' and '<
nr+1'. No real preference either way, but you're right in that being
consistent here makes sense.
On Tue, Mar 12, 2013 at 11:27:10AM +0100, Peter Zijlstra wrote:
> On Tue, 2013-03-12 at 11:05 +0100, Jiri Olsa wrote:
SNIP
> ---
> kernel/events/ring_buffer.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 23cb34f..4f48d02 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
> struct page *
> perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> {
> - if (pgoff > (1UL << page_order(rb)))
> + if (pgoff > rb->nr_pages)
> return NULL;
>
> return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> @@ -336,7 +336,7 @@ static void rb_free_work(struct work_struct *work)
> int i, nr;
>
> rb = container_of(work, struct ring_buffer, work);
> - nr = 1 << page_order(rb);
> + nr = rb->nr_pages;
>
> base = rb->user_page;
> for (i = 0; i < nr + 1; i++)
> @@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> rb->user_page = all_buf;
> rb->data_pages[0] = all_buf + PAGE_SIZE;
> rb->page_order = ilog2(nr_pages);
> - rb->nr_pages = 1;
> + rb->nr_pages = nr_pages;
>
hum, and this ^^^ breaks perf_data_size(rb) ;)
static inline unsigned long perf_data_size(struct ring_buffer *rb)
{
return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
}
we could make it ifdef-ed for CONFIG_PERF_USE_VMALLOC
jirka
On Tue, 2013-03-12 at 14:52 +0100, Jiri Olsa wrote:
> > @@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> > rb->user_page = all_buf;
> > rb->data_pages[0] = all_buf + PAGE_SIZE;
> > rb->page_order = ilog2(nr_pages);
> > - rb->nr_pages = 1;
> > + rb->nr_pages = nr_pages;
> >
>
> hum, and this ^^^ breaks perf_data_size(rb) ;)
>
> static inline unsigned long perf_data_size(struct ring_buffer *rb)
> {
> return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
> }
How so? 0 << n keeps being 0, right?
On Tue, Mar 12, 2013 at 04:26:12PM +0100, Peter Zijlstra wrote:
> On Tue, 2013-03-12 at 14:52 +0100, Jiri Olsa wrote:
> > > @@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> > > rb->user_page = all_buf;
> > > rb->data_pages[0] = all_buf + PAGE_SIZE;
> > > rb->page_order = ilog2(nr_pages);
> > > - rb->nr_pages = 1;
> > > + rb->nr_pages = nr_pages;
> > >
> >
> > hum, and this ^^^ breaks perf_data_size(rb) ;)
> >
> > static inline unsigned long perf_data_size(struct ring_buffer *rb)
> > {
> > return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
> > }
>
> How so? 0 << n keeps being 0, right?
>
you do 'rb->nr_pages = nr_pages' which was 1 before..
that will bump up the return value
should do '1 << x' but doing 'nr_pages << x' instead
jirka
Right you are..
How about something like the below; using that 0 << -1 is still 0.
---
kernel/events/ring_buffer.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..e72ca70 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ if (pgoff > (rb->nr_pages << page_order(rb)))
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -336,10 +336,10 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = rb->nr_pages << page_order(rb);
base = rb->user_page;
- for (i = 0; i < nr + 1; i++)
+ for (i = 0; i <= nr; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
@@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
rb->user_page = all_buf;
rb->data_pages[0] = all_buf + PAGE_SIZE;
rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+ rb->nr_pages = !!nr_pages;
ring_buffer_init(rb, watermark, flags);
On Tue, Mar 12, 2013 at 05:24:50PM +0100, Peter Zijlstra wrote:
> Right you are..
>
> How about something like the below; using that 0 << -1 is still 0.
hum.. we are obviously not on the same page wrt 'confusing' ;-)
looks ok, I'll test it
jirka
>
> ---
> kernel/events/ring_buffer.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 23cb34f..e72ca70 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
> struct page *
> perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> {
> - if (pgoff > (1UL << page_order(rb)))
> + if (pgoff > (rb->nr_pages << page_order(rb)))
> return NULL;
>
> return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> @@ -336,10 +336,10 @@ static void rb_free_work(struct work_struct *work)
> int i, nr;
>
> rb = container_of(work, struct ring_buffer, work);
> - nr = 1 << page_order(rb);
> + nr = rb->nr_pages << page_order(rb);
>
> base = rb->user_page;
> - for (i = 0; i < nr + 1; i++)
> + for (i = 0; i <= nr; i++)
> perf_mmap_unmark_page(base + (i * PAGE_SIZE));
>
> vfree(base);
> @@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> rb->user_page = all_buf;
> rb->data_pages[0] = all_buf + PAGE_SIZE;
> rb->page_order = ilog2(nr_pages);
> - rb->nr_pages = 1;
> + rb->nr_pages = !!nr_pages;
>
> ring_buffer_init(rb, watermark, flags);
>
>
>
On Tue, Mar 12, 2013 at 06:04:16PM +0100, Jiri Olsa wrote:
> On Tue, Mar 12, 2013 at 05:24:50PM +0100, Peter Zijlstra wrote:
> > Right you are..
> >
> > How about something like the below; using that 0 << -1 is still 0.
>
> hum.. we are obviously not on the same page wrt 'confusing' ;-)
>
> looks ok, I'll test it
hi, it work's fine..
Tested-by: Jiri Olsa <[email protected]>
>
> jirka
>
> >
> > ---
> > kernel/events/ring_buffer.c | 8 ++++----
> > 1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> > index 23cb34f..e72ca70 100644
> > --- a/kernel/events/ring_buffer.c
> > +++ b/kernel/events/ring_buffer.c
> > @@ -316,7 +316,7 @@ void rb_free(struct ring_buffer *rb)
> > struct page *
> > perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> > {
> > - if (pgoff > (1UL << page_order(rb)))
> > + if (pgoff > (rb->nr_pages << page_order(rb)))
> > return NULL;
> >
> > return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> > @@ -336,10 +336,10 @@ static void rb_free_work(struct work_struct *work)
> > int i, nr;
> >
> > rb = container_of(work, struct ring_buffer, work);
> > - nr = 1 << page_order(rb);
> > + nr = rb->nr_pages << page_order(rb);
> >
> > base = rb->user_page;
> > - for (i = 0; i < nr + 1; i++)
> > + for (i = 0; i <= nr; i++)
> > perf_mmap_unmark_page(base + (i * PAGE_SIZE));
> >
> > vfree(base);
> > @@ -373,7 +373,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> > rb->user_page = all_buf;
> > rb->data_pages[0] = all_buf + PAGE_SIZE;
just a nit pick.. in case of having just user page,
data_pages[0] holds wrong pointer, but it 'should'
be never touch in this case
On Wed, Mar 13, 2013 at 12:15:57PM +0100, Jiri Olsa wrote:
> On Tue, Mar 12, 2013 at 06:04:16PM +0100, Jiri Olsa wrote:
> > On Tue, Mar 12, 2013 at 05:24:50PM +0100, Peter Zijlstra wrote:
> > > Right you are..
> > >
> > > How about something like the below; using that 0 << -1 is still 0.
> >
> > hum.. we are obviously not on the same page wrt 'confusing' ;-)
> >
> > looks ok, I'll test it
>
> hi, it work's fine..
>
> Tested-by: Jiri Olsa <[email protected]>
are you going to include that, or should I repost it?
thanks,
jirka
> are you going to include that, or should I repost it?
Ah, please repost (and prettify) it, I'm still very limited in the
amount of work that I can do :/
On Tue, Mar 19, 2013 at 12:46:58PM +0100, Peter Zijlstra wrote:
>
> > are you going to include that, or should I repost it?
>
> Ah, please repost (and prettify) it, I'm still very limited in the
> amount of work that I can do :/
>
ok, attached
jirka
---
If we allocate perf ring buffer with the size of single (user)
page, we will get memory corruption when releasing itin rb_free_work
function (for CONFIG_PERF_USE_VMALLOC option).
For single page sized ring buffer the page_order is -1 (because
nr_pages is 0). This needs to be recognized in the rb_free_work
function to release proper amount of pages.
Adding data_page_nr function that returns number of allocated
data pages. Customizing the rest of the code to use it.
Reported-by: Jan Stancek <[email protected]>
Original-patch-by: Peter Zijlstra <[email protected]>
Cc: Corey Ashford <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
---
kernel/events/ring_buffer.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34f..27a1af4 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -312,11 +312,16 @@ void rb_free(struct ring_buffer *rb)
}
#else
+static int data_page_nr(struct ring_buffer *rb)
+{
+ return rb->nr_pages << page_order(rb);
+}
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ /* The '>' counts in the user page. */
+ if (pgoff > data_page_nr(rb))
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -336,10 +341,11 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = data_page_nr(rb);
base = rb->user_page;
- for (i = 0; i < nr + 1; i++)
+ /* The '<=' counts in the user page. */
+ for (i = 0; i <= nr; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
@@ -373,7 +379,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
rb->user_page = all_buf;
rb->data_pages[0] = all_buf + PAGE_SIZE;
rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+ rb->nr_pages = !!nr_pages;
ring_buffer_init(rb, watermark, flags);
--
1.7.11.7
On Tue, Mar 19, 2013 at 03:35:09PM +0100, Jiri Olsa wrote:
> On Tue, Mar 19, 2013 at 12:46:58PM +0100, Peter Zijlstra wrote:
> >
> > > are you going to include that, or should I repost it?
> >
> > Ah, please repost (and prettify) it, I'm still very limited in the
> > amount of work that I can do :/
> >
>
> ok, attached
Ingo, Peter,
could this be pulled in?
thanks,
jirka
>
> jirka
>
> ---
> If we allocate perf ring buffer with the size of single (user)
> page, we will get memory corruption when releasing itin rb_free_work
> function (for CONFIG_PERF_USE_VMALLOC option).
>
> For single page sized ring buffer the page_order is -1 (because
> nr_pages is 0). This needs to be recognized in the rb_free_work
> function to release proper amount of pages.
>
> Adding data_page_nr function that returns number of allocated
> data pages. Customizing the rest of the code to use it.
>
> Reported-by: Jan Stancek <[email protected]>
> Original-patch-by: Peter Zijlstra <[email protected]>
> Cc: Corey Ashford <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Paul Mackerras <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Signed-off-by: Jiri Olsa <[email protected]>
> ---
> kernel/events/ring_buffer.c | 14 ++++++++++----
> 1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 23cb34f..27a1af4 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -312,11 +312,16 @@ void rb_free(struct ring_buffer *rb)
> }
>
> #else
> +static int data_page_nr(struct ring_buffer *rb)
> +{
> + return rb->nr_pages << page_order(rb);
> +}
>
> struct page *
> perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
> {
> - if (pgoff > (1UL << page_order(rb)))
> + /* The '>' counts in the user page. */
> + if (pgoff > data_page_nr(rb))
> return NULL;
>
> return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
> @@ -336,10 +341,11 @@ static void rb_free_work(struct work_struct *work)
> int i, nr;
>
> rb = container_of(work, struct ring_buffer, work);
> - nr = 1 << page_order(rb);
> + nr = data_page_nr(rb);
>
> base = rb->user_page;
> - for (i = 0; i < nr + 1; i++)
> + /* The '<=' counts in the user page. */
> + for (i = 0; i <= nr; i++)
> perf_mmap_unmark_page(base + (i * PAGE_SIZE));
>
> vfree(base);
> @@ -373,7 +379,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
> rb->user_page = all_buf;
> rb->data_pages[0] = all_buf + PAGE_SIZE;
> rb->page_order = ilog2(nr_pages);
> - rb->nr_pages = 1;
> + rb->nr_pages = !!nr_pages;
>
> ring_buffer_init(rb, watermark, flags);
>
> --
> 1.7.11.7
>
* Jiri Olsa <[email protected]> wrote:
> On Tue, Mar 19, 2013 at 03:35:09PM +0100, Jiri Olsa wrote:
> > On Tue, Mar 19, 2013 at 12:46:58PM +0100, Peter Zijlstra wrote:
> > >
> > > > are you going to include that, or should I repost it?
> > >
> > > Ah, please repost (and prettify) it, I'm still very limited in the
> > > amount of work that I can do :/
> > >
> >
> > ok, attached
>
> Ingo, Peter,
>
> could this be pulled in?
Yeah, Peter acked it - I will pick it up.
Thanks,
Ingo
Commit-ID: 5919b30933d7c4fac1f214c59f26c5e990044f09
Gitweb: http://git.kernel.org/tip/5919b30933d7c4fac1f214c59f26c5e990044f09
Author: Jiri Olsa <[email protected]>
AuthorDate: Tue, 19 Mar 2013 15:35:09 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 1 May 2013 12:34:46 +0200
perf: Fix vmalloc ring buffer pages handling
If we allocate perf ring buffer with the size of single (user)
page, we will get memory corruption when releasing itin
rb_free_work function (for CONFIG_PERF_USE_VMALLOC option).
For single page sized ring buffer the page_order is -1 (because
nr_pages is 0). This needs to be recognized in the rb_free_work
function to release proper amount of pages.
Adding data_page_nr function that returns number of allocated
data pages. Customizing the rest of the code to use it.
Reported-by: Jan Stancek <[email protected]>
Original-patch-by: Peter Zijlstra <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Cc: Corey Ashford <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Signed-off-by: Jiri Olsa <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/events/ring_buffer.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 97fddb0..cd55144 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -326,11 +326,16 @@ void rb_free(struct ring_buffer *rb)
}
#else
+static int data_page_nr(struct ring_buffer *rb)
+{
+ return rb->nr_pages << page_order(rb);
+}
struct page *
perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(rb)))
+ /* The '>' counts in the user page. */
+ if (pgoff > data_page_nr(rb))
return NULL;
return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
@@ -350,10 +355,11 @@ static void rb_free_work(struct work_struct *work)
int i, nr;
rb = container_of(work, struct ring_buffer, work);
- nr = 1 << page_order(rb);
+ nr = data_page_nr(rb);
base = rb->user_page;
- for (i = 0; i < nr + 1; i++)
+ /* The '<=' counts in the user page. */
+ for (i = 0; i <= nr; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
@@ -387,7 +393,7 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
rb->user_page = all_buf;
rb->data_pages[0] = all_buf + PAGE_SIZE;
rb->page_order = ilog2(nr_pages);
- rb->nr_pages = 1;
+ rb->nr_pages = !!nr_pages;
ring_buffer_init(rb, watermark, flags);