Guys,
this works dandy for us in RHL and RHEL, *especially* on bigger SMP boxes.
It does not fix all the bugs in that code, but I consider it a good start,
and unless someone speaks up, it goes to Marcelo after 2.4.26 is out.
Or maybe before, depending how long it takes him to release 2.4.26.
Please review, test, etc.
I am working on two new bugs in usb-storage right now, but I'd like
to have things going in gradually, in self-contained chunks, Viro style.
Those two are not fixed yet anyway.
-- Pete
diff -urN -X dontdiff linux-2.4.26-rc2/drivers/usb/storage/scsiglue.c linux-2.4.26-rc2-nip/drivers/usb/storage/scsiglue.c
--- linux-2.4.26-rc2/drivers/usb/storage/scsiglue.c 2004-04-09 17:28:30.000000000 -0700
+++ linux-2.4.26-rc2-nip/drivers/usb/storage/scsiglue.c 2004-04-09 17:31:44.000000000 -0700
@@ -218,7 +218,14 @@
US_DEBUGP("device_reset() called\n" );
spin_unlock_irq(&io_request_lock);
+ down(&(us->dev_semaphore));
+ if (!us->pusb_dev) {
+ up(&(us->dev_semaphore));
+ spin_lock_irq(&io_request_lock);
+ return SUCCESS;
+ }
rc = us->transport_reset(us);
+ up(&(us->dev_semaphore));
spin_lock_irq(&io_request_lock);
return rc;
}
@@ -235,27 +242,32 @@
/* we use the usb_reset_device() function to handle this for us */
US_DEBUGP("bus_reset() called\n");
+ spin_unlock_irq(&io_request_lock);
+
+ down(&(us->dev_semaphore));
+
/* if the device has been removed, this worked */
if (!us->pusb_dev) {
US_DEBUGP("-- device removed already\n");
+ up(&(us->dev_semaphore));
+ spin_lock_irq(&io_request_lock);
return SUCCESS;
}
- spin_unlock_irq(&io_request_lock);
-
/* release the IRQ, if we have one */
- down(&(us->irq_urb_sem));
if (us->irq_urb) {
US_DEBUGP("-- releasing irq URB\n");
result = usb_unlink_urb(us->irq_urb);
US_DEBUGP("-- usb_unlink_urb() returned %d\n", result);
}
- up(&(us->irq_urb_sem));
/* attempt to reset the port */
if (usb_reset_device(us->pusb_dev) < 0) {
- spin_lock_irq(&io_request_lock);
- return FAILED;
+ /*
+ * Do not return errors, or else the error handler might
+ * invoke host_reset, which is not implemented.
+ */
+ goto bail_out;
}
/* FIXME: This needs to lock out driver probing while it's working
@@ -286,28 +298,36 @@
up(&intf->driver->serialize);
}
+bail_out:
/* re-allocate the IRQ URB and submit it to restore connectivity
* for CBI devices
*/
if (us->protocol == US_PR_CBI) {
- down(&(us->irq_urb_sem));
us->irq_urb->dev = us->pusb_dev;
result = usb_submit_urb(us->irq_urb);
US_DEBUGP("usb_submit_urb() returns %d\n", result);
- up(&(us->irq_urb_sem));
}
-
+
+ up(&(us->dev_semaphore));
+
spin_lock_irq(&io_request_lock);
US_DEBUGP("bus_reset() complete\n");
return SUCCESS;
}
-/* FIXME: This doesn't do anything right now */
static int host_reset( Scsi_Cmnd *srb )
{
- printk(KERN_CRIT "usb-storage: host_reset() requested but not implemented\n" );
- return FAILED;
+ struct us_data *us = (struct us_data *)srb->host->hostdata[0];
+
+ spin_unlock_irq(&io_request_lock);
+ down(&(us->dev_semaphore));
+ printk(KERN_CRIT "usb-storage: host_reset() requested but hardly implemented\n" );
+ up(&(us->dev_semaphore));
+ spin_lock_irq(&io_request_lock);
+ US_DEBUGP("host_reset() complete\n");
+
+ return SUCCESS;
}
/***********************************************************************
diff -urN -X dontdiff linux-2.4.26-rc2/drivers/usb/storage/usb.c linux-2.4.26-rc2-nip/drivers/usb/storage/usb.c
--- linux-2.4.26-rc2/drivers/usb/storage/usb.c 2004-02-26 14:09:59.000000000 -0800
+++ linux-2.4.26-rc2-nip/drivers/usb/storage/usb.c 2004-04-09 17:31:45.000000000 -0700
@@ -501,6 +501,9 @@
* strucuture is current. This includes the ep_int field, which gives us
* the endpoint for the interrupt.
* Returns non-zero on failure, zero on success
+ *
+ * ss->dev_semaphore is expected taken, except for a newly minted,
+ * unregistered device.
*/
static int usb_stor_allocate_irq(struct us_data *ss)
{
@@ -510,13 +513,9 @@
US_DEBUGP("Allocating IRQ for CBI transport\n");
- /* lock access to the data structure */
- down(&(ss->irq_urb_sem));
-
/* allocate the URB */
ss->irq_urb = usb_alloc_urb(0);
if (!ss->irq_urb) {
- up(&(ss->irq_urb_sem));
US_DEBUGP("couldn't allocate interrupt URB");
return 1;
}
@@ -537,12 +536,9 @@
US_DEBUGP("usb_submit_urb() returns %d\n", result);
if (result) {
usb_free_urb(ss->irq_urb);
- up(&(ss->irq_urb_sem));
return 2;
}
- /* unlock the data structure and return success */
- up(&(ss->irq_urb_sem));
return 0;
}
@@ -772,7 +768,6 @@
init_completion(&(ss->notify));
init_MUTEX_LOCKED(&(ss->ip_waitq));
spin_lock_init(&(ss->queue_exclusion));
- init_MUTEX(&(ss->irq_urb_sem));
init_MUTEX(&(ss->current_urb_sem));
init_MUTEX(&(ss->dev_semaphore));
@@ -1063,7 +1058,6 @@
down(&(ss->dev_semaphore));
/* release the IRQ, if we have one */
- down(&(ss->irq_urb_sem));
if (ss->irq_urb) {
US_DEBUGP("-- releasing irq URB\n");
result = usb_unlink_urb(ss->irq_urb);
@@ -1071,7 +1065,6 @@
usb_free_urb(ss->irq_urb);
ss->irq_urb = NULL;
}
- up(&(ss->irq_urb_sem));
/* free up the main URB for this device */
US_DEBUGP("-- releasing main URB\n");
diff -urN -X dontdiff linux-2.4.26-rc2/drivers/usb/storage/usb.h linux-2.4.26-rc2-nip/drivers/usb/storage/usb.h
--- linux-2.4.26-rc2/drivers/usb/storage/usb.h 2003-11-29 19:23:15.000000000 -0800
+++ linux-2.4.26-rc2-nip/drivers/usb/storage/usb.h 2004-04-09 17:51:44.000000000 -0700
@@ -116,7 +116,7 @@
struct us_data *next; /* next device */
/* the device we're working with */
- struct semaphore dev_semaphore; /* protect pusb_dev */
+ struct semaphore dev_semaphore; /* protect many things */
struct usb_device *pusb_dev; /* this usb_device */
unsigned int flags; /* from filter initially */
@@ -162,7 +162,6 @@
atomic_t ip_wanted[1]; /* is an IRQ expected? */
/* interrupt communications data */
- struct semaphore irq_urb_sem; /* to protect irq_urb */
struct urb *irq_urb; /* for USB int requests */
unsigned char irqbuf[2]; /* buffer for USB IRQ */
unsigned char irqdata[2]; /* data from USB IRQ */
Off the cuff, this doesn't look bad... tho it does a lot.
The reordering of locking in device_reset and bus reset definately looks
good, especially for the corner-cases (yanked device, etc.).
I'm uncertain if your handling of the io_request_lock is right.... but
getting information on how to handle that has been like pulling teeth, so
I'm inclined to trust your wide-scale testing on this.
Was there a reason to add more do-nothing code to host_reset?
Is it really safe to remove the irq_urb_sem?
Also, please make sure everything is CC'ed to the usb-storage list (on this
message).
Matt
On Fri, Apr 09, 2004 at 07:59:43PM -0700, Pete Zaitcev wrote:
> Guys,
>
> this works dandy for us in RHL and RHEL, *especially* on bigger SMP boxes.
> It does not fix all the bugs in that code, but I consider it a good start,
> and unless someone speaks up, it goes to Marcelo after 2.4.26 is out.
> Or maybe before, depending how long it takes him to release 2.4.26.
> Please review, test, etc.
>
> I am working on two new bugs in usb-storage right now, but I'd like
> to have things going in gradually, in self-contained chunks, Viro style.
> Those two are not fixed yet anyway.
>
> -- Pete
>
> diff -urN -X dontdiff linux-2.4.26-rc2/drivers/usb/storage/scsiglue.c linux-2.4.26-rc2-nip/drivers/usb/storage/scsiglue.c
> --- linux-2.4.26-rc2/drivers/usb/storage/scsiglue.c 2004-04-09 17:28:30.000000000 -0700
> +++ linux-2.4.26-rc2-nip/drivers/usb/storage/scsiglue.c 2004-04-09 17:31:44.000000000 -0700
> @@ -218,7 +218,14 @@
> US_DEBUGP("device_reset() called\n" );
>
> spin_unlock_irq(&io_request_lock);
> + down(&(us->dev_semaphore));
> + if (!us->pusb_dev) {
> + up(&(us->dev_semaphore));
> + spin_lock_irq(&io_request_lock);
> + return SUCCESS;
> + }
> rc = us->transport_reset(us);
> + up(&(us->dev_semaphore));
> spin_lock_irq(&io_request_lock);
> return rc;
> }
> @@ -235,27 +242,32 @@
> /* we use the usb_reset_device() function to handle this for us */
> US_DEBUGP("bus_reset() called\n");
>
> + spin_unlock_irq(&io_request_lock);
> +
> + down(&(us->dev_semaphore));
> +
> /* if the device has been removed, this worked */
> if (!us->pusb_dev) {
> US_DEBUGP("-- device removed already\n");
> + up(&(us->dev_semaphore));
> + spin_lock_irq(&io_request_lock);
> return SUCCESS;
> }
>
> - spin_unlock_irq(&io_request_lock);
> -
> /* release the IRQ, if we have one */
> - down(&(us->irq_urb_sem));
> if (us->irq_urb) {
> US_DEBUGP("-- releasing irq URB\n");
> result = usb_unlink_urb(us->irq_urb);
> US_DEBUGP("-- usb_unlink_urb() returned %d\n", result);
> }
> - up(&(us->irq_urb_sem));
>
> /* attempt to reset the port */
> if (usb_reset_device(us->pusb_dev) < 0) {
> - spin_lock_irq(&io_request_lock);
> - return FAILED;
> + /*
> + * Do not return errors, or else the error handler might
> + * invoke host_reset, which is not implemented.
> + */
> + goto bail_out;
> }
>
> /* FIXME: This needs to lock out driver probing while it's working
> @@ -286,28 +298,36 @@
> up(&intf->driver->serialize);
> }
>
> +bail_out:
> /* re-allocate the IRQ URB and submit it to restore connectivity
> * for CBI devices
> */
> if (us->protocol == US_PR_CBI) {
> - down(&(us->irq_urb_sem));
> us->irq_urb->dev = us->pusb_dev;
> result = usb_submit_urb(us->irq_urb);
> US_DEBUGP("usb_submit_urb() returns %d\n", result);
> - up(&(us->irq_urb_sem));
> }
> -
> +
> + up(&(us->dev_semaphore));
> +
> spin_lock_irq(&io_request_lock);
>
> US_DEBUGP("bus_reset() complete\n");
> return SUCCESS;
> }
>
> -/* FIXME: This doesn't do anything right now */
> static int host_reset( Scsi_Cmnd *srb )
> {
> - printk(KERN_CRIT "usb-storage: host_reset() requested but not implemented\n" );
> - return FAILED;
> + struct us_data *us = (struct us_data *)srb->host->hostdata[0];
> +
> + spin_unlock_irq(&io_request_lock);
> + down(&(us->dev_semaphore));
> + printk(KERN_CRIT "usb-storage: host_reset() requested but hardly implemented\n" );
> + up(&(us->dev_semaphore));
> + spin_lock_irq(&io_request_lock);
> + US_DEBUGP("host_reset() complete\n");
> +
> + return SUCCESS;
> }
>
> /***********************************************************************
> diff -urN -X dontdiff linux-2.4.26-rc2/drivers/usb/storage/usb.c linux-2.4.26-rc2-nip/drivers/usb/storage/usb.c
> --- linux-2.4.26-rc2/drivers/usb/storage/usb.c 2004-02-26 14:09:59.000000000 -0800
> +++ linux-2.4.26-rc2-nip/drivers/usb/storage/usb.c 2004-04-09 17:31:45.000000000 -0700
> @@ -501,6 +501,9 @@
> * strucuture is current. This includes the ep_int field, which gives us
> * the endpoint for the interrupt.
> * Returns non-zero on failure, zero on success
> + *
> + * ss->dev_semaphore is expected taken, except for a newly minted,
> + * unregistered device.
> */
> static int usb_stor_allocate_irq(struct us_data *ss)
> {
> @@ -510,13 +513,9 @@
>
> US_DEBUGP("Allocating IRQ for CBI transport\n");
>
> - /* lock access to the data structure */
> - down(&(ss->irq_urb_sem));
> -
> /* allocate the URB */
> ss->irq_urb = usb_alloc_urb(0);
> if (!ss->irq_urb) {
> - up(&(ss->irq_urb_sem));
> US_DEBUGP("couldn't allocate interrupt URB");
> return 1;
> }
> @@ -537,12 +536,9 @@
> US_DEBUGP("usb_submit_urb() returns %d\n", result);
> if (result) {
> usb_free_urb(ss->irq_urb);
> - up(&(ss->irq_urb_sem));
> return 2;
> }
>
> - /* unlock the data structure and return success */
> - up(&(ss->irq_urb_sem));
> return 0;
> }
>
> @@ -772,7 +768,6 @@
> init_completion(&(ss->notify));
> init_MUTEX_LOCKED(&(ss->ip_waitq));
> spin_lock_init(&(ss->queue_exclusion));
> - init_MUTEX(&(ss->irq_urb_sem));
> init_MUTEX(&(ss->current_urb_sem));
> init_MUTEX(&(ss->dev_semaphore));
>
> @@ -1063,7 +1058,6 @@
> down(&(ss->dev_semaphore));
>
> /* release the IRQ, if we have one */
> - down(&(ss->irq_urb_sem));
> if (ss->irq_urb) {
> US_DEBUGP("-- releasing irq URB\n");
> result = usb_unlink_urb(ss->irq_urb);
> @@ -1071,7 +1065,6 @@
> usb_free_urb(ss->irq_urb);
> ss->irq_urb = NULL;
> }
> - up(&(ss->irq_urb_sem));
>
> /* free up the main URB for this device */
> US_DEBUGP("-- releasing main URB\n");
> diff -urN -X dontdiff linux-2.4.26-rc2/drivers/usb/storage/usb.h linux-2.4.26-rc2-nip/drivers/usb/storage/usb.h
> --- linux-2.4.26-rc2/drivers/usb/storage/usb.h 2003-11-29 19:23:15.000000000 -0800
> +++ linux-2.4.26-rc2-nip/drivers/usb/storage/usb.h 2004-04-09 17:51:44.000000000 -0700
> @@ -116,7 +116,7 @@
> struct us_data *next; /* next device */
>
> /* the device we're working with */
> - struct semaphore dev_semaphore; /* protect pusb_dev */
> + struct semaphore dev_semaphore; /* protect many things */
> struct usb_device *pusb_dev; /* this usb_device */
>
> unsigned int flags; /* from filter initially */
> @@ -162,7 +162,6 @@
> atomic_t ip_wanted[1]; /* is an IRQ expected? */
>
> /* interrupt communications data */
> - struct semaphore irq_urb_sem; /* to protect irq_urb */
> struct urb *irq_urb; /* for USB int requests */
> unsigned char irqbuf[2]; /* buffer for USB IRQ */
> unsigned char irqdata[2]; /* data from USB IRQ */
>
>
> -------------------------------------------------------
> This SF.Net email is sponsored by: IBM Linux Tutorials
> Free Linux tutorial presented by Daniel Robbins, President and CEO of
> GenToo technologies. Learn everything from fundamentals to system
> administration.http://ads.osdn.com/?ad_id=1470&alloc_id=3638&op=click
> _______________________________________________
> [email protected]
> To unsubscribe, use the last form field at:
> https://lists.sourceforge.net/lists/listinfo/linux-usb-devel
--
Matthew Dharm Home: [email protected]
Maintainer, Linux USB Mass Storage Driver
Department of Justice agent. I have come to purify the flock.
-- DOJ agent
User Friendly, 5/22/1998
On Sat, 10 Apr 2004 17:09:57 -0700
Matthew Dharm <[email protected]> wrote:
> I'm uncertain if your handling of the io_request_lock is right.... but
> getting information on how to handle that has been like pulling teeth, so
> I'm inclined to trust your wide-scale testing on this.
It's not wide-scale, unfortunately, that's why I need more review and
testing of it. Those "Enterprise" people are mostly interested in very
specific things, in particular Bladecenter and JS-20, Dell's OEMed CD-ROMs,
and Lexar memory key which Dell resells. Very likely not all transports
or protocols are tested, for instance UFI. But I think it's right to
call it "intensive" testing.
The main test is to put a CD and keyboard on a hub, and hub on a KVM, then
flip KVM several times quickly from one blade to another. All hell breaks
loose. IIRC, I had four different OOPS and lockup scenarios.
> Was there a reason to add more do-nothing code to host_reset?
Woopsie. I wanted to write it, but understood that if I return right
code from "bus" reset, it should never be called. Sorry about that...
I'll remove that part.
> Is it really safe to remove the irq_urb_sem?
The idea here is to have disconnect and resets locked against each other.
They happen on different threads, unfortunately (khubd and scsi_eh).
Initially I tried various orders, but then I thought, "Why am I making
it hard on myself?! Much better just to merge them". The dev_semaphore now
covers everything irq_urb_sem used to cover, except one path into
usb_stor_allocate_irq from initial probing.
Thanks for looking at it!
-- Pete
On Sat, Apr 10, 2004 at 06:36:38PM -0700, Pete Zaitcev wrote:
> The main test is to put a CD and keyboard on a hub, and hub on a KVM, then
> flip KVM several times quickly from one blade to another. All hell breaks
> loose. IIRC, I had four different OOPS and lockup scenarios.
Now that's interesting.... do you have to have the keyboard and hub on the
KVM?
Matt
--
Matthew Dharm Home: [email protected]
Maintainer, Linux USB Mass Storage Driver
I say, what are all those naked people doing?
-- Big client to Stef
User Friendly, 12/14/1997
On Sat, 10 Apr 2004, Pete Zaitcev wrote:
> On Sat, 10 Apr 2004 17:09:57 -0700
> Matthew Dharm <[email protected]> wrote:
>
> > Was there a reason to add more do-nothing code to host_reset?
>
> Woopsie. I wanted to write it, but understood that if I return right
> code from "bus" reset, it should never be called. Sorry about that...
> I'll remove that part.
I haven't had time to look at your patch in any detail yet. But I did
notice the extra stuff in the host-reset path. It looks like you're
worrying about nothing -- the easiest thing to do would be to remove the
host-reset pointer from the host template. Then the SCSI error handler
would know that host resets aren't implemented and would never try to
carry them out.
Alan Stern