2009-06-22 17:26:17

by Jerome Glisse

[permalink] [raw]
Subject: [PATCH] radeon: use vmalloc instead of kmalloc

We don't need to allocated contiguous pages in cs codepath
so use vmalloc instead.

Signed-off-by: Jerome Glisse <[email protected]>
---
drivers/gpu/drm/radeon/radeon_cs.c | 20 ++++++++++----------
1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index b843f9b..54a9740 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -45,11 +45,11 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
chunk = &p->chunks[p->chunk_relocs_idx];
/* FIXME: we assume that each relocs use 4 dwords */
p->nrelocs = chunk->length_dw / 4;
- p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
+ p->relocs_ptr = vmalloc(p->nrelocs * sizeof(void *));
if (p->relocs_ptr == NULL) {
return -ENOMEM;
}
- p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
+ p->relocs = vmalloc(p->nrelocs * sizeof(struct radeon_cs_reloc));
if (p->relocs == NULL) {
return -ENOMEM;
}
@@ -103,7 +103,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
p->idx = 0;
p->chunk_ib_idx = -1;
p->chunk_relocs_idx = -1;
- p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ p->chunks_array = vmalloc(cs->num_chunks * sizeof(uint64_t));
if (p->chunks_array == NULL) {
return -ENOMEM;
}
@@ -113,7 +113,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
return -EFAULT;
}
p->nchunks = cs->num_chunks;
- p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
+ p->chunks = vmalloc(p->nchunks * sizeof(struct radeon_cs_chunk));
if (p->chunks == NULL) {
return -ENOMEM;
}
@@ -139,7 +139,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)

p->chunks[i].kdata = NULL;
size = p->chunks[i].length_dw * sizeof(uint32_t);
- p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
+ p->chunks[i].kdata = vmalloc(size);
if (p->chunks[i].kdata == NULL) {
return -ENOMEM;
}
@@ -179,13 +179,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
mutex_unlock(&parser->rdev->ddev->struct_mutex);
}
}
- kfree(parser->relocs);
- kfree(parser->relocs_ptr);
+ vfree(parser->relocs);
+ vfree(parser->relocs_ptr);
for (i = 0; i < parser->nchunks; i++) {
- kfree(parser->chunks[i].kdata);
+ vfree(parser->chunks[i].kdata);
}
- kfree(parser->chunks);
- kfree(parser->chunks_array);
+ vfree(parser->chunks);
+ vfree(parser->chunks_array);
radeon_ib_free(parser->rdev, &parser->ib);
}

--
1.6.2.2


2009-06-23 11:35:51

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] radeon: use vmalloc instead of kmalloc

On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
> We don't need to allocated contiguous pages in cs codepath
> so use vmalloc instead.

Best would be to not require >PAGE_SIZE allocations at all of course.
But barring that, it would be great to have something like:

ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
if (!ptr)
ptr = vmalloc(size);

Also, how long do these allocations live? vmalloc space can be quite
limited (i386) and it can fragment too.

> Signed-off-by: Jerome Glisse <[email protected]>
> ---
> drivers/gpu/drm/radeon/radeon_cs.c | 20 ++++++++++----------
> 1 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index b843f9b..54a9740 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -45,11 +45,11 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
> chunk = &p->chunks[p->chunk_relocs_idx];
> /* FIXME: we assume that each relocs use 4 dwords */
> p->nrelocs = chunk->length_dw / 4;
> - p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
> + p->relocs_ptr = vmalloc(p->nrelocs * sizeof(void *));
> if (p->relocs_ptr == NULL) {
> return -ENOMEM;
> }
> - p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
> + p->relocs = vmalloc(p->nrelocs * sizeof(struct radeon_cs_reloc));
> if (p->relocs == NULL) {
> return -ENOMEM;
> }
> @@ -103,7 +103,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
> p->idx = 0;
> p->chunk_ib_idx = -1;
> p->chunk_relocs_idx = -1;
> - p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
> + p->chunks_array = vmalloc(cs->num_chunks * sizeof(uint64_t));
> if (p->chunks_array == NULL) {
> return -ENOMEM;
> }
> @@ -113,7 +113,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
> return -EFAULT;
> }
> p->nchunks = cs->num_chunks;
> - p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
> + p->chunks = vmalloc(p->nchunks * sizeof(struct radeon_cs_chunk));
> if (p->chunks == NULL) {
> return -ENOMEM;
> }
> @@ -139,7 +139,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>
> p->chunks[i].kdata = NULL;
> size = p->chunks[i].length_dw * sizeof(uint32_t);
> - p->chunks[i].kdata = kzalloc(size, GFP_KERNEL);
> + p->chunks[i].kdata = vmalloc(size);
> if (p->chunks[i].kdata == NULL) {
> return -ENOMEM;
> }
> @@ -179,13 +179,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
> mutex_unlock(&parser->rdev->ddev->struct_mutex);
> }
> }
> - kfree(parser->relocs);
> - kfree(parser->relocs_ptr);
> + vfree(parser->relocs);
> + vfree(parser->relocs_ptr);
> for (i = 0; i < parser->nchunks; i++) {
> - kfree(parser->chunks[i].kdata);
> + vfree(parser->chunks[i].kdata);
> }
> - kfree(parser->chunks);
> - kfree(parser->chunks_array);
> + vfree(parser->chunks);
> + vfree(parser->chunks_array);
> radeon_ib_free(parser->rdev, &parser->ib);
> }
>

2009-06-23 11:42:49

by Dave Airlie

[permalink] [raw]
Subject: Re: [PATCH] radeon: use vmalloc instead of kmalloc

On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<[email protected]> wrote:
> On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
>> We don't need to allocated contiguous pages in cs codepath
>> so use vmalloc instead.
>
> Best would be to not require >PAGE_SIZE allocations at all of course.

It gets messy when you have copy from user and spinlocks, it would be nice
to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
a lock we probably need to hold.

> But barring that, it would be great to have something like:
>
> ?ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
> ?if (!ptr)
> ? ?ptr = vmalloc(size);

we have a drm_calloc_large workaround already for the Intel driver which also
need this.
>
> Also, how long do these allocations live? vmalloc space can be quite
> limited (i386) and it can fragment too.

Only an ioctl lifetime so they aren't that bad. We however do have some vmaps
that might be quite large, mainly the fbcon framebuffer (up to 8MB in
some cases)

Dave.

2009-06-23 12:24:39

by Jerome Glisse

[permalink] [raw]
Subject: Re: [PATCH] radeon: use vmalloc instead of kmalloc

On Tue, 2009-06-23 at 21:42 +1000, Dave Airlie wrote:
> On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<[email protected]> wrote:
> > On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
> >> We don't need to allocated contiguous pages in cs codepath
> >> so use vmalloc instead.
> >
> > Best would be to not require >PAGE_SIZE allocations at all of course.
>
> It gets messy when you have copy from user and spinlocks, it would be nice
> to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
> a lock we probably need to hold.
>
> > But barring that, it would be great to have something like:
> >
> > ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
> > if (!ptr)
> > ptr = vmalloc(size);
>
> we have a drm_calloc_large workaround already for the Intel driver which also
> need this.
> >
> > Also, how long do these allocations live? vmalloc space can be quite
> > limited (i386) and it can fragment too.
>
> Only an ioctl lifetime so they aren't that bad. We however do have some vmaps
> that might be quite large, mainly the fbcon framebuffer (up to 8MB in
> some cases)
>
> Dave.


I will rework cs parsing a bit see with what i can come up with. Forget
this patch, i will get back with a new one.

Jerome

2009-06-23 12:34:52

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] radeon: use vmalloc instead of kmalloc

On Tue, 2009-06-23 at 21:42 +1000, Dave Airlie wrote:
> On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<[email protected]> wrote:
> > On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
> >> We don't need to allocated contiguous pages in cs codepath
> >> so use vmalloc instead.
> >
> > Best would be to not require >PAGE_SIZE allocations at all of course.
>
> It gets messy when you have copy from user and spinlocks, it would be nice
> to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
> a lock we probably need to hold.

Can't you play tricks with refcounts?

2009-06-23 15:14:37

by Thomas Hellstrom

[permalink] [raw]
Subject: Re: [PATCH] radeon: use vmalloc instead of kmalloc

Dave Airlie skrev:
> On Tue, Jun 23, 2009 at 9:35 PM, Peter Zijlstra<[email protected]> wrote:
>
>> On Mon, 2009-06-22 at 19:26 +0200, Jerome Glisse wrote:
>>
>>> We don't need to allocated contiguous pages in cs codepath
>>> so use vmalloc instead.
>>>
>> Best would be to not require >PAGE_SIZE allocations at all of course.
>>
>
> It gets messy when you have copy from user and spinlocks, it would be nice
> to just parse the userspace PAGE_SIZE at a time, but it would mean dropping
> a lock we probably need to hold.
>
>
>> But barring that, it would be great to have something like:
>>
>> ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
>> if (!ptr)
>> ptr = vmalloc(size);
>>
>
> we have a drm_calloc_large workaround already for the Intel driver which also
> need this.
>
One problem with multiple vmallocs per command submission is
performance. Judging from previous work, drivers that are doing this
tend to get very cpu-hungry. Since Radeon is only allowing a single
process into the command submission path at once, what about
pre-allocating a single larger vmalloced buffer at first command
submission and take care to flush user-space before the submitted
command stream gets too big.

>> Also, how long do these allocations live? vmalloc space can be quite
>> limited (i386) and it can fragment too.
>>
>
> Only an ioctl lifetime so they aren't that bad. We however do have some vmaps
> that might be quite large, mainly the fbcon framebuffer (up to 8MB in
> some cases)
>
That one would be ioremapped, not vmapped right? Not that it matters
because it's using vmalloc space anyway, but it wouldn't be worse than a
traditionally ioremapped framebuffer.

Thomas

> Dave.
>
> ------------------------------------------------------------------------------
> Are you an open source citizen? Join us for the Open Source Bridge conference!
> Portland, OR, June 17-19. Two days of sessions, one day of unconference: $250.
> Need another reason to go? 24-hour hacker lounge. Register today!
> http://ad.doubleclick.net/clk;215844324;13503038;v?http://opensourcebridge.org
> --
> _______________________________________________
> Dri-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/dri-devel
>