LinuxLists.cc - [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do

2007-01-11 19:12:24

Subject: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

Hello Roland and Christoph H.!
This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap()
when creating and destroying a completion queue respectively.
Thanks
Nam

Signed-off-by Hoang-Nam Nguyen <[email protected]>
---

ehca_cq.c | 65 +++++++++++++++-----------------------------------------------
1 files changed, 16 insertions(+), 49 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 93995b6..e86585a 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -267,7 +267,6 @@ struct ib_cq *ehca_create_cq(struct ib_d
if (context) {
struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
struct ehca_create_cq_resp resp;
- struct vm_area_struct *vma;
memset(&resp, 0, sizeof(resp));
resp.cq_number = my_cq->cq_number;
resp.token = my_cq->token;
@@ -276,40 +275,14 @@ struct ib_cq *ehca_create_cq(struct ib_d
resp.ipz_queue.queue_length = ipz_queue->queue_length;
resp.ipz_queue.pagesize = ipz_queue->pagesize;
resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
- ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
- ipz_queue->queue_length,
- (void**)&resp.ipz_queue.queue,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap queue pages");
- cq = ERR_PTR(ret);
- goto create_cq_exit4;
- }
- my_cq->uspace_queue = resp.ipz_queue.queue;
- resp.galpas = my_cq->galpas;
- ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
- (void**)&resp.galpas.kernel.fw_handle,
- &vma);
- if (ret) {
- ehca_err(device, "Could not mmap fw_handle");
- cq = ERR_PTR(ret);
- goto create_cq_exit5;
- }
- my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
- goto create_cq_exit6;
+ goto create_cq_exit4;
}
}

return cq;

-create_cq_exit6:
- ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
-
-create_cq_exit5:
- ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
-
create_cq_exit4:
ipz_queue_dtor(&my_cq->ipz_queue);

@@ -333,7 +306,6 @@ create_cq_exit1:
int ehca_destroy_cq(struct ib_cq *cq)
{
u64 h_ret;
- int ret;
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
int cq_num = my_cq->cq_number;
struct ib_device *device = cq->device;
@@ -343,6 +315,20 @@ int ehca_destroy_cq(struct ib_cq *cq)
u32 cur_pid = current->tgid;
unsigned long flags;

+ if (cq->uobject) {
+ if (my_cq->mm_count_galpa || my_cq->mm_count_queue) {
+ ehca_err(device, "Resources still referenced in "
+ "user space cq_num=%x", my_cq->cq_number);
+ return -EINVAL;
+ }
+ if (my_cq->ownpid != cur_pid) {
+ ehca_err(device, "Invalid caller pid=%x ownpid=%x "
+ "cq_num=%x",
+ cur_pid, my_cq->ownpid, my_cq->cq_number);
+ return -EINVAL;
+ }
+ }
+
spin_lock_irqsave(&ehca_cq_idr_lock, flags);
while (my_cq->nr_callbacks)
yield();
@@ -350,25 +336,6 @@ int ehca_destroy_cq(struct ib_cq *cq)
idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);

- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
- ehca_err(device, "Invalid caller pid=%x ownpid=%x",
- cur_pid, my_cq->ownpid);
- return -EINVAL;
- }
-
- /* un-mmap if vma alloc */
- if (my_cq->uspace_queue ) {
- ret = ehca_munmap(my_cq->uspace_queue,
- my_cq->ipz_queue.queue_length);
- if (ret)
- ehca_err(device, "Could not munmap queue ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
- if (ret)
- ehca_err(device, "Could not munmap fwh ehca_cq=%p "
- "cq_num=%x", my_cq, cq_num);
- }
-
h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
if (h_ret == H_R_STATE) {
/* cq in err: read err data and destroy it forcibly */
@@ -397,7 +364,7 @@ int ehca_resize_cq(struct ib_cq *cq, int
struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
u32 cur_pid = current->tgid;

- if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
+ if (cq->uobject && my_cq->ownpid != cur_pid) {
ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
cur_pid, my_cq->ownpid);
return -EINVAL;

2007-01-11 19:21:01

by Christoph Hellwig

[permalink] [raw]

Subject: Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote:
> Hello Roland and Christoph H.!
> This is a patch for ehca_cq.c. It removes all direct calls of do_mmap()/munmap()
> when creating and destroying a completion queue respectively.
> Thanks
> Nam

This patch looks good, but there are some issues with the surrounding code:

> + if (my_cq->ownpid != cur_pid) {
> + ehca_err(device, "Invalid caller pid=%x ownpid=%x "
> + "cq_num=%x",
> + cur_pid, my_cq->ownpid, my_cq->cq_number);
> + return -EINVAL;
> + }

(for other reviewers: this is not new code, just moved around)

Owner tracking by pid is really dangerous. File descriptors can be
passed around by unix sockets, a single process can have files open
more than once, etc..

It seems ehca wants to prevent threads other than the creating one
from performing most operations. Can you explain the reason for this?

> spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> while (my_cq->nr_callbacks)
> yield();

Calling yield is a very bad idea in general. You should probably
add a waitqueue that gets woken when nr_callbacks reaches zero to
sleep effectively here.

2007-01-11 19:41:08

by Nathan Lynch

[permalink] [raw]

Subject: Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

Christoph Hellwig wrote:
> On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote:
>
> > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > while (my_cq->nr_callbacks)
> > yield();
>
> Calling yield is a very bad idea in general. You should probably
> add a waitqueue that gets woken when nr_callbacks reaches zero to
> sleep effectively here.

Isn't that code outright buggy? Calling into the scheduler with a
spinlock held and local interrupts disabled...

2007-01-11 19:43:57

by Christoph Hellwig

[permalink] [raw]

Subject: Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

On Thu, Jan 11, 2007 at 01:40:54PM -0600, Nathan Lynch wrote:
> Christoph Hellwig wrote:
> > On Thu, Jan 11, 2007 at 08:08:36PM +0100, Hoang-Nam Nguyen wrote:
> >
> > > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > > while (my_cq->nr_callbacks)
> > > yield();
> >
> > Calling yield is a very bad idea in general. You should probably
> > add a waitqueue that gets woken when nr_callbacks reaches zero to
> > sleep effectively here.
>
> Isn't that code outright buggy? Calling into the scheduler with a
> spinlock held and local interrupts disabled...

Umm, yes - of course. I missed the spin_lock_irqsave line just above.

2007-01-11 19:56:25

by Roland Dreier

[permalink] [raw]

Subject: Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

> > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > while (my_cq->nr_callbacks)
> > yield();

> Isn't that code outright buggy? Calling into the scheduler with a
> spinlock held and local interrupts disabled...

Yes, absolutely -- if nr_callbacks is ever nonzero then this will
obviously crash instantly.

- R.

2007-01-12 15:27:01

by Hoang-Nam Nguyen

[permalink] [raw]

Subject: Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

Hi Roland!
> > > spin_lock_irqsave(&ehca_cq_idr_lock, flags);
> > > while (my_cq->nr_callbacks)
> > > yield();
>
> > Isn't that code outright buggy? Calling into the scheduler with a
> > spinlock held and local interrupts disabled...
>
> Yes, absolutely -- if nr_callbacks is ever nonzero then this will
> obviously crash instantly.
As Christoph R. mentioned in another thread I'm sending you a patch
to fix this bug. Thanks to all for this hint!
Purpose of the while loop is to wait until all completion entries
have been processed by a running completion handler. First then
the function continue with destroying completion queue. Thus, we do
unlock and lock around yield(), ie yield() is now called from a normal
process context without active lock. Hope that this pattern is ok.
In addition of yield issue this patch also fixes an unproper use of
spin_unlock() in ehca_irq.c.
Thanks
Nam

Signed-off-by Hoang-Nam Nguyen <[email protected]>
---

ehca_cq.c | 5 ++++-
ehca_irq.c | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)

diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-11 19:54:06.000000000 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_cq.c 2007-01-12 15:27:50.000000000 +0100
@@ -330,8 +330,11 @@ int ehca_destroy_cq(struct ib_cq *cq)
}

spin_lock_irqsave(&ehca_cq_idr_lock, flags);
- while (my_cq->nr_callbacks)
+ while (my_cq->nr_callbacks) {
+ spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
yield();
+ spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+ }

idr_remove(&ehca_cq_idr, my_cq->token);
spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
diff -Nurp infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c
--- infiniband_orig/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-11 19:53:33.000000000 +0100
+++ infiniband_work/drivers/infiniband/hw/ehca/ehca_irq.c 2007-01-12 15:27:50.000000000 +0100
@@ -440,7 +440,9 @@ void ehca_tasklet_eq(unsigned long data)
cq = idr_find(&ehca_cq_idr, token);

if (cq == NULL) {
- spin_unlock(&ehca_cq_idr_lock);
+ spin_unlock_irqrestore(
+ &ehca_cq_idr_lock,
+ flags);
break;
}

2007-01-12 15:40:01

by Hoang-Nam Nguyen

[permalink] [raw]

Subject: Re: [PATCH/RFC 2.6.21 3/5] ehca: completion queue: remove use of do_mmap()

Hi,
> > + if (my_cq->ownpid != cur_pid) {
> > + ehca_err(device, "Invalid caller pid=%x ownpid=%x "
> > + "cq_num=%x",
> > + cur_pid, my_cq->ownpid, my_cq->cq_number);
> > + return -EINVAL;
> > + }
>
> (for other reviewers: this is not new code, just moved around)
>
> Owner tracking by pid is really dangerous. File descriptors can be
> passed around by unix sockets, a single process can have files open
> more than once, etc..
>
> It seems ehca wants to prevent threads other than the creating one
> from performing most operations. Can you explain the reason for this?
you point to the right spot... This has a historic reason as we
have needed to support fork(), system("date") etc for kernel 2.6.9,
hence those vma flags manipulation and this pid checking as proactive
protection/restriction. For newer kernel, I guess >=2.6.12, this checking
were not necessary, but we would feel better after we had tested user
space stuff more thoroughly without this piece of code. Since this is
not new code, can we pls handle this later?
Regards
Nam