2006-02-07 20:00:28

by James Bottomley

[permalink] [raw]
Subject: [PATCH] add execute_in_process_context() API

SCSI needs this for our scheme to avoid executing generic device release
calls from interrupt context (SCSI patch using this to follow).

If no-one objects, I'd like to slide this into the scsi-rc-fixes-2.6
tree for 2.6.16.

James

--

[PATCH] add execute_in_process_context() API

We have several points in the SCSI stack (primarily for our device
functions) where we need to guarantee process context, but (given the
place where the last reference was released) we cannot guarantee this.

This API gets around the issue by executing the function directly if
the caller has process context, but scheduling a workqueue to execute
in process context if the caller doesn't have it.

Signed-off-by: James Bottomley <[email protected]>

Index: BUILD-2.6/include/linux/workqueue.h
===================================================================
--- BUILD-2.6.orig/include/linux/workqueue.h 2006-02-07 09:22:30.000000000 -0600
+++ BUILD-2.6/include/linux/workqueue.h 2006-02-07 10:22:29.000000000 -0600
@@ -74,6 +74,7 @@
void cancel_rearming_delayed_work(struct work_struct *work);
void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
struct work_struct *);
+int execute_in_process_context(void (*fn)(void *), void *);

/*
* Kill off a pending schedule_delayed_work(). Note that the work callback
Index: BUILD-2.6/kernel/workqueue.c
===================================================================
--- BUILD-2.6.orig/kernel/workqueue.c 2006-02-07 09:22:30.000000000 -0600
+++ BUILD-2.6/kernel/workqueue.c 2006-02-07 11:07:47.000000000 -0600
@@ -27,6 +27,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
+#include <linux/hardirq.h>

/*
* The per-CPU workqueue (if single thread, we always use the first
@@ -476,6 +477,63 @@
}
EXPORT_SYMBOL(cancel_rearming_delayed_work);

+struct work_queue_work {
+ struct work_struct work;
+ void (*fn)(void *);
+ void *data;
+};
+
+static void execute_in_process_context_work(void *data)
+{
+ void (*fn)(void *data);
+ struct work_queue_work *wqw = data;
+
+ fn = wqw->fn;
+ data = wqw->data;
+
+ kfree(wqw);
+
+ fn(data);
+}
+
+/**
+ * execute_in_process_context - reliably execute the routine with user context
+ * @fn: the function to execute
+ * @data: data to pass to the function
+ *
+ * Executes the function immediately if process context is available,
+ * otherwise schedules the function for delayed execution.
+ *
+ * Returns: 0 - function was executed
+ * 1 - function was scheduled for execution
+ * <0 - error
+ */
+int execute_in_process_context(void (*fn)(void *data), void *data)
+{
+ struct work_queue_work *wqw;
+
+ if (!in_interrupt()) {
+ fn(data);
+ return 0;
+ }
+
+ wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC);
+
+ if (unlikely(!wqw)) {
+ printk(KERN_ERR "Failed to allocate memory\n");
+ WARN_ON(1);
+ return -ENOMEM;
+ }
+
+ INIT_WORK(&wqw->work, execute_in_process_context_work, wqw);
+ wqw->fn = fn;
+ wqw->data = data;
+ schedule_work(&wqw->work);
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(execute_in_process_context);
+
int keventd_up(void)
{
return keventd_wq != NULL;



2006-02-07 20:08:45

by James Bottomley

[permalink] [raw]
Subject: [SCSI] fix wrong context bugs in SCSI

This is the second half of the execute_process_context() API addition:
an actual user

James

--

[SCSI] fix wrong context bugs in SCSI

There's a bug in releasing scsi_device where the release function
actually frees the block queue. However, the block queue release
calls flush_work(), which requires process context (the scsi_device
structure may release from irq context). Update the release function
to invoke via the execute_in_process_context() API.

Also clean up the scsi_target structure releasing via this API.

Signed-off-by: James Bottomley <[email protected]>

Index: BUILD-2.6/drivers/scsi/scsi_scan.c
===================================================================
--- BUILD-2.6.orig/drivers/scsi/scsi_scan.c 2006-02-07 09:23:44.000000000 -0600
+++ BUILD-2.6/drivers/scsi/scsi_scan.c 2006-02-07 11:35:46.000000000 -0600
@@ -385,19 +385,12 @@
return found_target;
}

-struct work_queue_wrapper {
- struct work_struct work;
- struct scsi_target *starget;
-};
-
-static void scsi_target_reap_work(void *data) {
- struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data;
- struct scsi_target *starget = wqw->starget;
+static void scsi_target_reap_usercontext(void *data)
+{
+ struct scsi_target *starget = data;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
unsigned long flags;

- kfree(wqw);
-
spin_lock_irqsave(shost->host_lock, flags);

if (--starget->reap_ref == 0 && list_empty(&starget->devices)) {
@@ -426,18 +419,7 @@
*/
void scsi_target_reap(struct scsi_target *starget)
{
- struct work_queue_wrapper *wqw =
- kzalloc(sizeof(struct work_queue_wrapper), GFP_ATOMIC);
-
- if (!wqw) {
- starget_printk(KERN_ERR, starget,
- "Failed to allocate memory in scsi_reap_target()\n");
- return;
- }
-
- INIT_WORK(&wqw->work, scsi_target_reap_work, wqw);
- wqw->starget = starget;
- schedule_work(&wqw->work);
+ execute_in_process_context(scsi_target_reap_usercontext, starget);
}

/**
Index: BUILD-2.6/drivers/scsi/scsi_sysfs.c
===================================================================
--- BUILD-2.6.orig/drivers/scsi/scsi_sysfs.c 2006-02-07 09:23:44.000000000 -0600
+++ BUILD-2.6/drivers/scsi/scsi_sysfs.c 2006-02-07 11:35:08.000000000 -0600
@@ -217,8 +217,9 @@
put_device(&sdev->sdev_gendev);
}

-static void scsi_device_dev_release(struct device *dev)
+static void scsi_device_dev_release_usercontext(void *data)
{
+ struct device *dev = data;
struct scsi_device *sdev;
struct device *parent;
struct scsi_target *starget;
@@ -237,6 +238,7 @@

if (sdev->request_queue) {
sdev->request_queue->queuedata = NULL;
+ /* user context needed to free queue */
scsi_free_queue(sdev->request_queue);
/* temporary expedient, try to catch use of queue lock
* after free of sdev */
@@ -252,6 +254,11 @@
put_device(parent);
}

+static void scsi_device_dev_release(struct device *dev)
+{
+ execute_in_process_context(scsi_device_dev_release_usercontext, dev);
+}
+
static struct class sdev_class = {
.name = "scsi_device",
.release = scsi_device_cls_release,


2006-02-07 20:27:11

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Tue, Feb 07, 2006 at 02:00:19PM -0600, James Bottomley wrote:
> +int execute_in_process_context(void (*fn)(void *data), void *data)
> +{
> + struct work_queue_work *wqw;
> +
> + if (!in_interrupt()) {
> + fn(data);
> + return 0;
> + }
> +
> + wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC);
> +
> + if (unlikely(!wqw)) {
> + printk(KERN_ERR "Failed to allocate memory\n");
> + WARN_ON(1);
> + return -ENOMEM;
> + }
> +
> + INIT_WORK(&wqw->work, execute_in_process_context_work, wqw);
> + wqw->fn = fn;
> + wqw->data = data;
> + schedule_work(&wqw->work);
> +
> + return 1;
> +}

After the workqueue has run, what free's wqw ?

Dave

2006-02-07 20:35:51

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

Dave Jones <[email protected]> wrote:
>
> On Tue, Feb 07, 2006 at 02:00:19PM -0600, James Bottomley wrote:
> > +int execute_in_process_context(void (*fn)(void *data), void *data)
> > +{
> > + struct work_queue_work *wqw;
> > +
> > + if (!in_interrupt()) {
> > + fn(data);
> > + return 0;
> > + }
> > +
> > + wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC);
> > +
> > + if (unlikely(!wqw)) {
> > + printk(KERN_ERR "Failed to allocate memory\n");
> > + WARN_ON(1);
> > + return -ENOMEM;
> > + }
> > +
> > + INIT_WORK(&wqw->work, execute_in_process_context_work, wqw);
> > + wqw->fn = fn;
> > + wqw->data = data;
> > + schedule_work(&wqw->work);
> > +
> > + return 1;
> > +}
>
> After the workqueue has run, what free's wqw ?
>

The callback (execute_in_process_context_work())

The trap with this patch is that the caller has to run
flush_scheduled_work() at the right time. But hopefully anyone who's using
it knows that by now.

2006-02-07 22:05:23

by Brian King

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

James Bottomley wrote:
> This is the second half of the execute_process_context() API addition:
> an actual user

I just tried this out on my ppc64 box. I recently noticed that
the target reaping via workqueue change that went in not too long ago
resulted in *really* slow user initiated wildcard scans for ipr
adapters - in the neighborhood of 6 minutes... Your patch brings
that back down to 8 seconds.


Brian


>
> James
>
> --
>
> [SCSI] fix wrong context bugs in SCSI
>
> There's a bug in releasing scsi_device where the release function
> actually frees the block queue. However, the block queue release
> calls flush_work(), which requires process context (the scsi_device
> structure may release from irq context). Update the release function
> to invoke via the execute_in_process_context() API.
>
> Also clean up the scsi_target structure releasing via this API.
>
> Signed-off-by: James Bottomley <[email protected]>
>
> Index: BUILD-2.6/drivers/scsi/scsi_scan.c
> ===================================================================
> --- BUILD-2.6.orig/drivers/scsi/scsi_scan.c 2006-02-07 09:23:44.000000000 -0600
> +++ BUILD-2.6/drivers/scsi/scsi_scan.c 2006-02-07 11:35:46.000000000 -0600
> @@ -385,19 +385,12 @@
> return found_target;
> }
>
> -struct work_queue_wrapper {
> - struct work_struct work;
> - struct scsi_target *starget;
> -};
> -
> -static void scsi_target_reap_work(void *data) {
> - struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data;
> - struct scsi_target *starget = wqw->starget;
> +static void scsi_target_reap_usercontext(void *data)
> +{
> + struct scsi_target *starget = data;
> struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
> unsigned long flags;
>
> - kfree(wqw);
> -
> spin_lock_irqsave(shost->host_lock, flags);
>
> if (--starget->reap_ref == 0 && list_empty(&starget->devices)) {
> @@ -426,18 +419,7 @@
> */
> void scsi_target_reap(struct scsi_target *starget)
> {
> - struct work_queue_wrapper *wqw =
> - kzalloc(sizeof(struct work_queue_wrapper), GFP_ATOMIC);
> -
> - if (!wqw) {
> - starget_printk(KERN_ERR, starget,
> - "Failed to allocate memory in scsi_reap_target()\n");
> - return;
> - }
> -
> - INIT_WORK(&wqw->work, scsi_target_reap_work, wqw);
> - wqw->starget = starget;
> - schedule_work(&wqw->work);
> + execute_in_process_context(scsi_target_reap_usercontext, starget);
> }
>
> /**
> Index: BUILD-2.6/drivers/scsi/scsi_sysfs.c
> ===================================================================
> --- BUILD-2.6.orig/drivers/scsi/scsi_sysfs.c 2006-02-07 09:23:44.000000000 -0600
> +++ BUILD-2.6/drivers/scsi/scsi_sysfs.c 2006-02-07 11:35:08.000000000 -0600
> @@ -217,8 +217,9 @@
> put_device(&sdev->sdev_gendev);
> }
>
> -static void scsi_device_dev_release(struct device *dev)
> +static void scsi_device_dev_release_usercontext(void *data)
> {
> + struct device *dev = data;
> struct scsi_device *sdev;
> struct device *parent;
> struct scsi_target *starget;
> @@ -237,6 +238,7 @@
>
> if (sdev->request_queue) {
> sdev->request_queue->queuedata = NULL;
> + /* user context needed to free queue */
> scsi_free_queue(sdev->request_queue);
> /* temporary expedient, try to catch use of queue lock
> * after free of sdev */
> @@ -252,6 +254,11 @@
> put_device(parent);
> }
>
> +static void scsi_device_dev_release(struct device *dev)
> +{
> + execute_in_process_context(scsi_device_dev_release_usercontext, dev);
> +}
> +
> static struct class sdev_class = {
> .name = "scsi_device",
> .release = scsi_device_cls_release,
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>


--
Brian King
eServer Storage I/O
IBM Linux Technology Center

2006-02-07 23:26:51

by James Bottomley

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

On Tue, 2006-02-07 at 16:05 -0600, Brian King wrote:
> I just tried this out on my ppc64 box. I recently noticed that
> the target reaping via workqueue change that went in not too long ago
> resulted in *really* slow user initiated wildcard scans for ipr
> adapters - in the neighborhood of 6 minutes... Your patch brings
> that back down to 8 seconds.

Yes, that's because the original fix used a workqueue for everything
(whether it needed it or not). The new API checks the context first
before invoking a workqueue. By and large, for SCSI, we actually have
user context most of the time, so the old fix was rather heavy handed.

James


2006-02-08 08:07:00

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

James Bottomley <[email protected]> writes:

In general this seems like a lot of code for a simple problem.
It might be simpler to just put the work structure into the parent
object and do the workqueue unconditionally


> + if (unlikely(!wqw)) {
> + printk(KERN_ERR "Failed to allocate memory\n");
> + WARN_ON(1);

WARN_ON for GFP_ATOMIC failure is bad. It is not really a bug.

-Andi

2006-02-08 08:19:04

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

Andi Kleen <[email protected]> wrote:
>
> James Bottomley <[email protected]> writes:
>
> In general this seems like a lot of code for a simple problem.
> It might be simpler to just put the work structure into the parent
> object and do the workqueue unconditionally
>

That apparently would have really bad performance problems. If we're
!in_interrupt() we want to do the work synchronously.

But yes, if we can embed the work_struct inside the structure which the
callback will operate on

> > + if (unlikely(!wqw)) {
> > + printk(KERN_ERR "Failed to allocate memory\n");
> > + WARN_ON(1);
>
> WARN_ON for GFP_ATOMIC failure is bad. It is not really a bug.

it will solve this problem. (And I think is is a problem: if the
allocation fails, we leak things?)

2006-02-08 08:50:38

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Wednesday 08 February 2006 09:18, Andrew Morton wrote:
> Andi Kleen <[email protected]> wrote:
> >
> > James Bottomley <[email protected]> writes:
> >
> > In general this seems like a lot of code for a simple problem.
> > It might be simpler to just put the work structure into the parent
> > object and do the workqueue unconditionally
> >
>
> That apparently would have really bad performance problems. If we're
> !in_interrupt() we want to do the work synchronously.

It depends if it's common or not. If it's uncommon then simpler code
is better.


-Andi

2006-02-08 08:54:07

by Jens Axboe

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

On Tue, Feb 07 2006, James Bottomley wrote:
> +static void scsi_device_dev_release(struct device *dev)
> +{
> + execute_in_process_context(scsi_device_dev_release_usercontext, dev);
> +}
> +

Hmm, this (and further up) could fail, yet you don't check.

I don't think this API is very nice to be honest, there's no good way to
handle failures - you can't just sleep and loop retry the execute if you
are in_interrupt(). I'd prefer passing in a work_queue_work (with a
better name :-) that has been allocated at a reliable time during
initialization.


--
Jens Axboe

2006-02-08 12:46:55

by Stefan Richter

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

James Bottomley wrote:
> SCSI needs this for our scheme to avoid executing generic device release
> calls from interrupt context (SCSI patch using this to follow).

Shouldn't we rather fix the SCSI low level drivers or SCSI transport
layers to trigger device releases only from process context? (Instead of
SCSI core transparently falling back to a workqueue, that is.)

I know only one of these drivers, hence I don't know which are actually
affected and how much work that would be.
--
Stefan Richter
-=====-=-==- --=- -=---
http://arcgraph.de/sr/

2006-02-08 15:16:12

by James Bottomley

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Wed, 2006-02-08 at 09:06 +0100, Andi Kleen wrote:
> In general this seems like a lot of code for a simple problem.
> It might be simpler to just put the work structure into the parent
> object and do the workqueue unconditionally

We can't do this. For the target release, there may be multiple calls
to the reap function ... if we embed in the structure we have no room
for more than one.

> > + if (unlikely(!wqw)) {
> > + printk(KERN_ERR "Failed to allocate memory\n");
> > + WARN_ON(1);
>
> WARN_ON for GFP_ATOMIC failure is bad. It is not really a bug.

Here, it means that the requested work wasn't executed. In SCSI that
would mean an object is now in place permanently. The problem is that
there's no real way to cope with failure in this case.

James


2006-02-08 15:18:39

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Wednesday 08 February 2006 16:15, James Bottomley wrote:
> The problem is that
> there's no real way to cope with failure in this case.

Then you can't use GFP_ATOMIC. You have to redesign.

-Andi

2006-02-08 15:30:13

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

James Bottomley wrote:
> On Wed, 2006-02-08 at 09:06 +0100, Andi Kleen wrote:
>
>>In general this seems like a lot of code for a simple problem.
>>It might be simpler to just put the work structure into the parent
>>object and do the workqueue unconditionally
>
>
> We can't do this. For the target release, there may be multiple calls
> to the reap function ... if we embed in the structure we have no room
> for more than one.
>
>
>>>+ if (unlikely(!wqw)) {
>>>+ printk(KERN_ERR "Failed to allocate memory\n");
>>>+ WARN_ON(1);
>>
>>WARN_ON for GFP_ATOMIC failure is bad. It is not really a bug.
>
>
> Here, it means that the requested work wasn't executed. In SCSI that
> would mean an object is now in place permanently. The problem is that
> there's no real way to cope with failure in this case.
>

Hi, James.

I haven't really looked at the code carefully, but I think one work
struct + atomic counter (say pending_reap_cnt) should do it.
queue_work() guarantees the work is run at least once after the call, so
bumping pending_reap_cnt and queueing the work in target reap and
reaping pending_reap_cnt times in the work should work.

--
tejun

2006-02-08 15:31:14

by James Bottomley

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

On Wed, 2006-02-08 at 09:56 +0100, Jens Axboe wrote:
> Hmm, this (and further up) could fail, yet you don't check.

By and large, you have process context, so this isn't going to be a
problem.

> I don't think this API is very nice to be honest, there's no good way to
> handle failures - you can't just sleep and loop retry the execute if you
> are in_interrupt(). I'd prefer passing in a work_queue_work (with a
> better name :-) that has been allocated at a reliable time during
> initialization.

Yes, I agree ... however, the failure is less prevalent in the new code
than the old. The problem is that we may need to execute multiple puts
for a single target from irq contex, so under this scheme you need a wqw
(potentially) for every get.

I could solve this by binding the API more tightly into the device
model, so the generic device contains the wqw and it is told that the
release function of the final put must be called in process context, but
that's an awful lot of code changes.

James


2006-02-08 15:36:39

by James Bottomley

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Thu, 2006-02-09 at 00:30 +0900, Tejun wrote:
> I haven't really looked at the code carefully, but I think one work
> struct + atomic counter (say pending_reap_cnt) should do it.
> queue_work() guarantees the work is run at least once after the call, so
> bumping pending_reap_cnt and queueing the work in target reap and
> reaping pending_reap_cnt times in the work should work.

Actually, no, unfortunately that doesn't work (and I did actually try
it). The problem is that the target_reap actually removes the target
from the sysfs namespace. So, if the reap is still pending but you
cannot find the target, you'll allocate one, but then you may be racing
to add it to the namespace (i.e. a quick succession of scsi
remove-single-device followed by scsi add-single-device of the same
H/C/T/L shows this). If you lose the race, the add fails because the
namespace is already occupied. And, unfortunately, the device_del that
does the namespace removal requires user context ...

James


2006-02-08 15:50:22

by Jens Axboe

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

On Wed, Feb 08 2006, James Bottomley wrote:
> On Wed, 2006-02-08 at 09:56 +0100, Jens Axboe wrote:
> > Hmm, this (and further up) could fail, yet you don't check.
>
> By and large, you have process context, so this isn't going to be a
> problem.
>
> > I don't think this API is very nice to be honest, there's no good way to
> > handle failures - you can't just sleep and loop retry the execute if you
> > are in_interrupt(). I'd prefer passing in a work_queue_work (with a
> > better name :-) that has been allocated at a reliable time during
> > initialization.
>
> Yes, I agree ... however, the failure is less prevalent in the new code
> than the old. The problem is that we may need to execute multiple puts
> for a single target from irq contex, so under this scheme you need a wqw
> (potentially) for every get.
>
> I could solve this by binding the API more tightly into the device
> model, so the generic device contains the wqw and it is told that the
> release function of the final put must be called in process context, but
> that's an awful lot of code changes.

Yeah it does get a lot more complicated. I guess I'm fine with the
current change, but please just keep it in SCSI then. It's not the sort
of thing you'd want to advertise as an exported API.

--
Jens Axboe

2006-02-08 15:58:04

by James Bottomley

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Wed, 2006-02-08 at 16:18 +0100, Andi Kleen wrote:
> On Wednesday 08 February 2006 16:15, James Bottomley wrote:
> > The problem is that
> > there's no real way to cope with failure in this case.
>
> Then you can't use GFP_ATOMIC. You have to redesign.

My mailer seems to have deleted the part of the email with your redesign
suggestion in it...

My initial point is that the current scsi_reap_target() infrastructure
is more broken than the new API, so it does represent an improvement. I
could also mitigate (but not solve) the problem by adding a wqw slab.
However, I really think the redesign would be along the lines I
suggested to Jens; do you agree?

James


2006-02-08 16:07:27

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] add execute_in_process_context() API

On Wednesday 08 February 2006 16:57, James Bottomley wrote:
> On Wed, 2006-02-08 at 16:18 +0100, Andi Kleen wrote:
> > On Wednesday 08 February 2006 16:15, James Bottomley wrote:
> > > The problem is that
> > > there's no real way to cope with failure in this case.
> >
> > Then you can't use GFP_ATOMIC. You have to redesign.
>
> My mailer seems to have deleted the part of the email with your redesign
> suggestion in it...
>
> My initial point is that the current scsi_reap_target() infrastructure
> is more broken than the new API, so it does represent an improvement.

An improvement that will randomly fail under high load.

-Andi

2006-02-14 16:42:22

by James Bottomley

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

On Wed, 2006-02-08 at 16:52 +0100, Jens Axboe wrote:
> Yeah it does get a lot more complicated. I guess I'm fine with the
> current change, but please just keep it in SCSI then. It's not the sort
> of thing you'd want to advertise as an exported API.

OK, this pulls the API into scsi for 2.6.16

James

---

[PATCH] add scsi_execute_in_process_context() API

We have several points in the SCSI stack (primarily for our device
functions) where we need to guarantee process context, but (given the
place where the last reference was released) we cannot guarantee this.

This API gets around the issue by executing the function directly if
the caller has process context, but scheduling a workqueue to execute
in process context if the caller doesn't have it. Unfortunately, it
requires memory allocation in interrupt context, but it's better than
what we have previously. The true solution will require a bit of
re-engineering, so isn't appropriate for 2.6.16.

Signed-off-by: James Bottomley <[email protected]>

Index: BUILD-2.6/drivers/scsi/scsi_lib.c
===================================================================
--- BUILD-2.6.orig/drivers/scsi/scsi_lib.c 2006-02-12 12:37:18.000000000 -0600
+++ BUILD-2.6/drivers/scsi/scsi_lib.c 2006-02-14 10:17:12.000000000 -0600
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/hardirq.h>

#include <scsi/scsi.h>
#include <scsi/scsi_dbg.h>
@@ -2248,3 +2249,61 @@
device_for_each_child(dev, NULL, target_unblock);
}
EXPORT_SYMBOL_GPL(scsi_target_unblock);
+
+
+struct work_queue_work {
+ struct work_struct work;
+ void (*fn)(void *);
+ void *data;
+};
+
+static void execute_in_process_context_work(void *data)
+{
+ void (*fn)(void *data);
+ struct work_queue_work *wqw = data;
+
+ fn = wqw->fn;
+ data = wqw->data;
+
+ kfree(wqw);
+
+ fn(data);
+}
+
+/**
+ * scsi_execute_in_process_context - reliably execute the routine with user context
+ * @fn: the function to execute
+ * @data: data to pass to the function
+ *
+ * Executes the function immediately if process context is available,
+ * otherwise schedules the function for delayed execution.
+ *
+ * Returns: 0 - function was executed
+ * 1 - function was scheduled for execution
+ * <0 - error
+ */
+int scsi_execute_in_process_context(void (*fn)(void *data), void *data)
+{
+ struct work_queue_work *wqw;
+
+ if (!in_interrupt()) {
+ fn(data);
+ return 0;
+ }
+
+ wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC);
+
+ if (unlikely(!wqw)) {
+ printk(KERN_ERR "Failed to allocate memory\n");
+ WARN_ON(1);
+ return -ENOMEM;
+ }
+
+ INIT_WORK(&wqw->work, execute_in_process_context_work, wqw);
+ wqw->fn = fn;
+ wqw->data = data;
+ schedule_work(&wqw->work);
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(scsi_execute_in_process_context);
Index: BUILD-2.6/include/scsi/scsi.h
===================================================================
--- BUILD-2.6.orig/include/scsi/scsi.h 2006-02-12 12:38:10.000000000 -0600
+++ BUILD-2.6/include/scsi/scsi.h 2006-02-14 09:07:54.000000000 -0600
@@ -433,4 +433,6 @@
/* Used to obtain the PCI location of a device */
#define SCSI_IOCTL_GET_PCI 0x5387

+int scsi_execute_in_process_context(void (*fn)(void *data), void *data);
+
#endif /* _SCSI_SCSI_H */


2006-02-14 16:49:01

by James Bottomley

[permalink] [raw]
Subject: Re: [SCSI] fix wrong context bugs in SCSI

And the second part

James

---

[SCSI] fix wrong context bugs in SCSI

There's a bug in releasing scsi_device where the release function
actually frees the block queue. However, the block queue release
calls flush_work(), which requires process context (the scsi_device
structure may release from irq context). Update the release function
to invoke via the execute_in_process_context() API.

Also clean up the scsi_target structure releasing via this API.

Signed-off-by: James Bottomley <[email protected]>

Index: BUILD-2.6/drivers/scsi/scsi_scan.c
===================================================================
--- BUILD-2.6.orig/drivers/scsi/scsi_scan.c 2006-02-12 12:37:18.000000000 -0600
+++ BUILD-2.6/drivers/scsi/scsi_scan.c 2006-02-14 09:08:43.000000000 -0600
@@ -387,19 +387,12 @@
return found_target;
}

-struct work_queue_wrapper {
- struct work_struct work;
- struct scsi_target *starget;
-};
-
-static void scsi_target_reap_work(void *data) {
- struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data;
- struct scsi_target *starget = wqw->starget;
+static void scsi_target_reap_usercontext(void *data)
+{
+ struct scsi_target *starget = data;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
unsigned long flags;

- kfree(wqw);
-
spin_lock_irqsave(shost->host_lock, flags);

if (--starget->reap_ref == 0 && list_empty(&starget->devices)) {
@@ -428,18 +421,7 @@
*/
void scsi_target_reap(struct scsi_target *starget)
{
- struct work_queue_wrapper *wqw =
- kzalloc(sizeof(struct work_queue_wrapper), GFP_ATOMIC);
-
- if (!wqw) {
- starget_printk(KERN_ERR, starget,
- "Failed to allocate memory in scsi_reap_target()\n");
- return;
- }
-
- INIT_WORK(&wqw->work, scsi_target_reap_work, wqw);
- wqw->starget = starget;
- schedule_work(&wqw->work);
+ scsi_execute_in_process_context(scsi_target_reap_usercontext, starget);
}

/**
Index: BUILD-2.6/drivers/scsi/scsi_sysfs.c
===================================================================
--- BUILD-2.6.orig/drivers/scsi/scsi_sysfs.c 2006-02-12 12:37:18.000000000 -0600
+++ BUILD-2.6/drivers/scsi/scsi_sysfs.c 2006-02-14 09:08:52.000000000 -0600
@@ -217,8 +217,9 @@
put_device(&sdev->sdev_gendev);
}

-static void scsi_device_dev_release(struct device *dev)
+static void scsi_device_dev_release_usercontext(void *data)
{
+ struct device *dev = data;
struct scsi_device *sdev;
struct device *parent;
struct scsi_target *starget;
@@ -237,6 +238,7 @@

if (sdev->request_queue) {
sdev->request_queue->queuedata = NULL;
+ /* user context needed to free queue */
scsi_free_queue(sdev->request_queue);
/* temporary expedient, try to catch use of queue lock
* after free of sdev */
@@ -252,6 +254,11 @@
put_device(parent);
}

+static void scsi_device_dev_release(struct device *dev)
+{
+ scsi_execute_in_process_context(scsi_device_dev_release_usercontext, dev);
+}
+
static struct class sdev_class = {
.name = "scsi_device",
.release = scsi_device_cls_release,