2002-10-04 19:39:20

by Mike Anderson

[permalink] [raw]
Subject: [PATCH] scsi error update 1/3

This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
- Fix bug on incorrect check of scsi_eh_tur return value.
- Fix debug printk format problems.
- Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
- Forward port of Russell King's retry scsi cmd restore.
- Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
to provide increase time post bus_reset to allow door lock
command to succeed. This should be exported to driverfs so that
it can be adjusted if needed.
- Error Policy change: Error recovery command retry is now not
based on failed command retry value.
- Error Policy change: Failed command is not retried if retry
count is expired.

02_scsi-error-dr-lck-1.diff:
- Forward port of Russell King's door lock changes.

Testing:
- Current patches where tested on a SPI interconnect using both in
kernel and new versions of the aic driver. A Plextor SCSI cd-rom
was used for a door lock device. Cables where done during dd's
to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike
--
Michael Anderson
[email protected]

scsi_error.c | 78 +++++++++++++++++++++++++++++------------------------------
1 files changed, 39 insertions(+), 39 deletions(-)
-----

diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c Fri Oct 4 08:04:49 2002
+++ b/drivers/scsi/scsi_error.c Fri Oct 4 08:04:49 2002
@@ -91,9 +91,9 @@
scmd->eh_timeout.expires = jiffies + timeout;
scmd->eh_timeout.function = (void (*)(unsigned long)) complete;

- SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at"
- "%d (%p)\n", scmd, timeout,
- complete));
+ SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
+ " %d, (%p)\n", __FUNCTION__,
+ scmd, timeout, complete));

add_timer(&scmd->eh_timeout);

@@ -116,8 +116,9 @@

rtn = del_timer(&scmd->eh_timeout);

- SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p"
- " %d\n", scmd, rtn));
+ SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
+ " rtn: %d\n", __FUNCTION__,
+ scmd, rtn));

scmd->eh_timeout.data = (unsigned long) NULL;
scmd->eh_timeout.function = NULL;
@@ -150,7 +151,7 @@
scsi_host_failed_inc_and_test(scmd->host);

SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d "
- "failed=%d\n",
+ " failed=%d\n",
atomic_read(&scmd->host->host_active),
scmd->host->host_busy,
scmd->host->host_failed));
@@ -173,7 +174,7 @@

SCSI_SLEEP(&sdev->host->host_wait, sdev->host->in_recovery);

- SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n",
+ SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
sdev->online));

return sdev->online;
@@ -209,10 +210,10 @@

if (cmd_timed_out || cmd_failed) {
SCSI_LOG_ERROR_RECOVERY(3,
- printk("scsi_eh: %d:%d:%d:%d cmds failed: %d,"
- "timedout: %d\n",
- shost->host_no, sdev->channel,
- sdev->id, sdev->lun,
+ printk("%s: %d:%d:%d:%d cmds failed: %d,"
+ " timedout: %d\n",
+ __FUNCTION__, shost->host_no,
+ sdev->channel, sdev->id, sdev->lun,
cmd_failed, cmd_timed_out));
cmd_timed_out = 0;
cmd_failed = 0;
@@ -220,8 +221,8 @@
}
}

- SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d "
- "devices require eh work\n",
+ SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
+ " devices require eh work\n",
total_failures, devices_failed));
}
#endif
@@ -265,10 +266,10 @@
* queued and will be finished along the
* way.
*/
- SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr "
- "prematurely woken "
- "cmds still active "
- "(%p %x %d)\n",
+ SCSI_LOG_ERROR_RECOVERY(1, printk("Error hdlr"
+ " prematurely woken"
+ " cmds still active"
+ " (%p %x %d)\n",
scmd, scmd->state,
scmd->target));
}
@@ -440,12 +441,13 @@
static void scsi_eh_times_out(Scsi_Cmnd *scmd)
{
scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
- SCSI_LOG_ERROR_RECOVERY(3, printk("in scsi_eh_times_out %p\n", scmd));
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
+ scmd));

if (scmd->host->eh_action != NULL)
up(scmd->host->eh_action);
else
- printk("missing scsi error handler thread\n");
+ printk("%s: eh_action NULL\n", __FUNCTION__);
}

/**
@@ -471,8 +473,8 @@

scmd->owner = SCSI_OWNER_ERROR_HANDLER;

- SCSI_LOG_ERROR_RECOVERY(3, printk("in eh_done %p result:%x\n", scmd,
- scmd->result));
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
+ __FUNCTION__, scmd, scmd->result));

if (scmd->host->eh_action != NULL)
up(scmd->host->eh_action);
@@ -552,9 +554,8 @@

rtn = FAILED;
}
- SCSI_LOG_ERROR_RECOVERY(3, printk("%s: %p rtn:%x\n",
- __FUNCTION__, scmd,
- rtn));
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
+ __FUNCTION__, scmd, rtn));
} else {
int temp;

@@ -622,7 +623,7 @@
? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);

if (scsi_result == NULL) {
- printk("cannot allocate scsi_result in scsi_request_sense.\n");
+ printk("%s: cannot allocate scsi_result.\n", __FUNCTION__);
return FAILED;
}
/*
@@ -758,14 +759,14 @@
continue;

SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
- "for %d\n", __FUNCTION__,
- scmd->target));
+ " for tgt: %d\n",
+ __FUNCTION__, scmd->target));
rtn = scsi_request_sense(scmd);
if (rtn != SUCCESS)
continue;

SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
- "- result %x\n", scmd,
+ " result %x\n", scmd,
scmd->result));
SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", scmd));

@@ -929,7 +930,7 @@

rtn = scsi_try_to_abort_cmd(scmd);
if (rtn == SUCCESS) {
- if (scsi_eh_tur(scmd)) {
+ if (!scsi_eh_tur(scmd)) {
rtn = scsi_eh_retry_cmd(scmd);
if (rtn == SUCCESS)
scsi_eh_finish_cmd(scmd, shost);
@@ -999,7 +1000,7 @@
* a bus device reset to it.
*/
rtn = scsi_try_bus_device_reset(scmd);
- if ((rtn == SUCCESS) && (scsi_eh_tur(scmd)))
+ if ((rtn == SUCCESS) && (!scsi_eh_tur(scmd)))
for (scmd = sc_todo; scmd; scmd = scmd->bh_next)
if ((scmd->device == sdev) &&
scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)) {
@@ -1141,7 +1142,7 @@
if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR)
|| channel != scmd->channel)
continue;
- if (scsi_eh_tur(scmd)) {
+ if (!scsi_eh_tur(scmd)) {
rtn = scsi_eh_retry_cmd(scmd);

if (rtn == SUCCESS)
@@ -1168,10 +1169,10 @@
if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CMD_ERR))
continue;

- printk(KERN_INFO "%s: Device set offline - not"
- "ready or command retry failed"
- "after error recovery: host"
- "%d channel %d id %d lun %d\n",
+ printk(KERN_INFO "%s: Device offlined - not"
+ " ready or command retry failed"
+ " after error recovery: host"
+ " %d channel %d id %d lun %d\n",
__FUNCTION__, shost->host_no,
scmd->device->channel,
scmd->device->id,
@@ -1243,7 +1244,7 @@
*/
if (scmd->device->online == FALSE) {
SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
- "as SUCCESS\n",
+ " as SUCCESS\n",
__FUNCTION__));
return SUCCESS;
}
@@ -1362,7 +1363,7 @@
goto maybe_retry;

case RESERVATION_CONFLICT:
- printk("scsi%d (%d,%d,%d) : reservation conflict\n",
+ printk("scsi%d (%d,%d,%d) : reservation conflict\n",
scmd->host->host_no, scmd->channel,
scmd->device->id, scmd->device->lun);
return SUCCESS; /* causes immediate i/o error */
@@ -1558,8 +1559,7 @@
/*
* Wake up the thread that created us.
*/
- SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n",
- shost->eh_notify->count.counter));
+ SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent \n"));

up(shost->eh_notify);


2002-10-04 19:56:44

by Mike Anderson

[permalink] [raw]
Subject: [PATCH] scsi error update 3/3 (door lck)


This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
- Fix bug on incorrect check of scsi_eh_tur return value.
- Fix debug printk format problems.
- Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
- Forward port of Russell King's retry scsi cmd restore.
- Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
to provide increase time post bus_reset to allow door lock
command to succeed. This should be exported to driverfs so that
it can be adjusted if needed.
- Error Policy change: Error recovery command retry is now not
based on failed command retry value.
- Error Policy change: Failed command is not retried if retry
count is expired.

02_scsi-error-dr-lck-1.diff:
- Forward port of Russell King's door lock changes.

Testing:
- Current patches where tested on a SPI interconnect using both in
kernel and new versions of the aic driver. A Plextor SCSI cd-rom
was used for a door lock device. Cable pulls where done during dd's
to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike
--
Michael Anderson
[email protected]

drivers/scsi/scsi.h | 1
drivers/scsi/scsi_error.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++
drivers/scsi/scsi_ioctl.c | 42 ++++++++++++++----------
drivers/scsi/scsi_lib.c | 27 ---------------
drivers/scsi/scsi_syms.c | 1
drivers/scsi/sd.c | 4 +-
drivers/scsi/sr_ioctl.c | 4 +-
include/scsi/scsi_ioctl.h | 8 ++--
8 files changed, 115 insertions(+), 52 deletions(-)
------

diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
--- a/drivers/scsi/scsi.h Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/scsi.h Fri Oct 4 08:59:02 2002
@@ -597,6 +597,7 @@
unsigned changed:1; /* Data invalid due to media change */
unsigned busy:1; /* Used to prevent races */
unsigned lockable:1; /* Able to prevent media removal */
+ unsigned locked:1; /* Media removal disabled */
unsigned borken:1; /* Tell the Seagate driver to be
* painfully slow on this device */
unsigned tagged_supported:1; /* Supports SCSI-II tagged queuing */
diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/scsi_error.c Fri Oct 4 08:59:02 2002
@@ -39,6 +39,8 @@
#include "scsi.h"
#include "hosts.h"

+#include <scsi/scsi_ioctl.h> /* grr */
+
/*
* We must always allow SHUTDOWN_SIGS. Even if we are not a module,
* the host drivers that we are using may be loaded as modules, and
@@ -1361,6 +1363,75 @@
}

/**
+ * scsi_eh_lock_done - done function for eh door lock request
+ * @scmd: SCSI command block for the door lock request
+ *
+ * Notes:
+ * We completed the asynchronous door lock request, and it has either
+ * locked the door or failed. We must free the command structures
+ * associated with this request.
+ **/
+static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
+{
+ struct scsi_request *sreq = scmd->sc_request;
+
+ scmd->sc_request = NULL;
+ sreq->sr_command = NULL;
+
+ scsi_release_command(scmd);
+ scsi_release_request(sreq);
+}
+
+
+/**
+ * scsi_eh_lock_door - Prevent medium removal for the specified device
+ * @sdev: SCSI device to prevent medium removal
+ *
+ * Locking:
+ * We must be called from process context; scsi_allocate_request()
+ * may sleep.
+ *
+ * Notes:
+ * We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
+ * head of the devices request queue, and continue.
+ *
+ * Bugs:
+ * scsi_allocate_request() may sleep waiting for existing requests to
+ * be processed. However, since we haven't kicked off any request
+ * processing for this host, this may deadlock.
+ *
+ * If scsi_allocate_request() fails for what ever reason, we
+ * completely forget to lock the door.
+ **/
+static void scsi_eh_lock_door(struct scsi_device *sdev)
+{
+ struct scsi_request *sreq = scsi_allocate_request(sdev);
+
+ if (sreq == NULL) {
+ printk(KERN_ERR "%s: request allocate failed,"
+ "prevent media removal cmd not sent", __FUNCTION__);
+ return;
+ }
+
+ sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
+ sreq->sr_cmnd[1] = (sdev->scsi_level <= SCSI_2) ? (sdev->lun << 5) : 0;
+ sreq->sr_cmnd[2] = 0;
+ sreq->sr_cmnd[3] = 0;
+ sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
+ sreq->sr_cmnd[5] = 0;
+ sreq->sr_data_direction = SCSI_DATA_NONE;
+ sreq->sr_bufflen = 0;
+ sreq->sr_buffer = NULL;
+ sreq->sr_allowed = 5;
+ sreq->sr_done = scsi_eh_lock_done;
+ sreq->sr_timeout_per_command = 10 * HZ;
+ sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
+
+ scsi_insert_special_req(sreq, 1);
+}
+
+
+/**
* scsi_restart_operations - restart io operations to the specified host.
* @shost: Host we are restarting.
*
@@ -1374,6 +1445,15 @@
unsigned long flags;

ASSERT_LOCK(shost->host_lock, 0);
+
+ /*
+ * If the door was locked, we need to insert a door lock request
+ * onto the head of the SCSI request queue for the device. There
+ * is no point trying to lock the door of an off-line device.
+ */
+ for (sdev = shost->host_queue; sdev; sdev = sdev->next)
+ if (sdev->online && sdev->locked)
+ scsi_eh_lock_door(sdev);

/*
* next free up anything directly waiting upon the host. this
diff -Nru a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
--- a/drivers/scsi/scsi_ioctl.c Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/scsi_ioctl.c Fri Oct 4 08:59:02 2002
@@ -151,6 +151,29 @@
return result;
}

+int scsi_set_medium_removal(Scsi_Device *dev, char state)
+{
+ char scsi_cmd[MAX_COMMAND_SIZE];
+ int ret;
+
+ if (!dev->removable || !dev->lockable)
+ return 0;
+
+ scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
+ scsi_cmd[1] = (dev->scsi_level <= SCSI_2) ? (dev->lun << 5) : 0;
+ scsi_cmd[2] = 0;
+ scsi_cmd[3] = 0;
+ scsi_cmd[4] = state;
+ scsi_cmd[5] = 0;
+
+ ret = ioctl_internal_command(dev, scsi_cmd, IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+
+ if (ret == 0)
+ dev->locked = state == SCSI_REMOVAL_PREVENT;
+
+ return ret;
+}
+
/*
* This interface is deprecated - users should use the scsi generic (sg)
* interface instead, as this is a more flexible approach to performing
@@ -448,24 +471,9 @@
return scsi_ioctl_send_command((Scsi_Device *) dev,
(Scsi_Ioctl_Command *) arg);
case SCSI_IOCTL_DOORLOCK:
- if (!dev->removable || !dev->lockable)
- return 0;
- scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
- scsi_cmd[1] = cmd_byte1;
- scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
- scsi_cmd[4] = SCSI_REMOVAL_PREVENT;
- return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
- IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
- break;
+ return scsi_set_medium_removal(dev, SCSI_REMOVAL_PREVENT);
case SCSI_IOCTL_DOORUNLOCK:
- if (!dev->removable || !dev->lockable)
- return 0;
- scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
- scsi_cmd[1] = cmd_byte1;
- scsi_cmd[2] = scsi_cmd[3] = scsi_cmd[5] = 0;
- scsi_cmd[4] = SCSI_REMOVAL_ALLOW;
- return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
- IOCTL_NORMAL_TIMEOUT, NORMAL_RETRIES);
+ return scsi_set_medium_removal(dev, SCSI_REMOVAL_ALLOW);
case SCSI_IOCTL_TEST_UNIT_READY:
scsi_cmd[0] = TEST_UNIT_READY;
scsi_cmd[1] = cmd_byte1;
diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/scsi_lib.c Fri Oct 4 08:59:02 2002
@@ -804,33 +804,6 @@
SDpnt->starved = 0;
}

- /*
- * FIXME(eric)
- * I am not sure where the best place to do this is. We need
- * to hook in a place where we are likely to come if in user
- * space. Technically the error handling thread should be
- * doing this crap, but the error handler isn't used by
- * most hosts.
- */
- if (SDpnt->was_reset) {
- /*
- * We need to relock the door, but we might
- * be in an interrupt handler. Only do this
- * from user space, since we do not want to
- * sleep from an interrupt.
- *
- * FIXME(eric) - have the error handler thread do
- * this work.
- */
- SDpnt->was_reset = 0;
- if (SDpnt->removable && !in_interrupt()) {
- spin_unlock_irq(q->queue_lock);
- scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0);
- spin_lock_irq(q->queue_lock);
- continue;
- }
- }
-
/*
* If we couldn't find a request that could be queued, then we
* can also quit.
diff -Nru a/drivers/scsi/scsi_syms.c b/drivers/scsi/scsi_syms.c
--- a/drivers/scsi/scsi_syms.c Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/scsi_syms.c Fri Oct 4 08:59:02 2002
@@ -54,6 +54,7 @@
EXPORT_SYMBOL(print_Scsi_Cmnd);
EXPORT_SYMBOL(scsi_block_when_processing_errors);
EXPORT_SYMBOL(scsi_ioctl_send_command);
+EXPORT_SYMBOL(scsi_set_medium_removal);
#if defined(CONFIG_SCSI_LOGGING) /* { */
EXPORT_SYMBOL(scsi_logging_level);
#endif
diff -Nru a/drivers/scsi/sd.c b/drivers/scsi/sd.c
--- a/drivers/scsi/sd.c Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/sd.c Fri Oct 4 08:59:02 2002
@@ -524,7 +524,7 @@
if (sdp->removable)
if (sdp->access_count==1)
if (scsi_block_when_processing_errors(sdp))
- scsi_ioctl(sdp, SCSI_IOCTL_DOORLOCK, NULL);
+ scsi_set_medium_removal(sdp, SCSI_REMOVAL_PREVENT);

return 0;

@@ -568,7 +568,7 @@
if (sdp->removable) {
if (!sdp->access_count)
if (scsi_block_when_processing_errors(sdp))
- scsi_ioctl(sdp, SCSI_IOCTL_DOORUNLOCK, NULL);
+ scsi_set_medium_removal(sdp, SCSI_REMOVAL_ALLOW);
}
if (sdp->host->hostt->module)
__MOD_DEC_USE_COUNT(sdp->host->hostt->module);
diff -Nru a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
--- a/drivers/scsi/sr_ioctl.c Fri Oct 4 08:59:02 2002
+++ b/drivers/scsi/sr_ioctl.c Fri Oct 4 08:59:02 2002
@@ -218,8 +218,8 @@
{
Scsi_CD *cd = cdi->handle;

- return scsi_ioctl(cd->device, lock ? SCSI_IOCTL_DOORLOCK :
- SCSI_IOCTL_DOORUNLOCK, 0);
+ return scsi_set_medium_removal(cd->device, lock ?
+ SCSI_REMOVAL_PREVENT : SCSI_REMOVAL_ALLOW);
}

int sr_drive_status(struct cdrom_device_info *cdi, int slot)
diff -Nru a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h
--- a/include/scsi/scsi_ioctl.h Fri Oct 4 08:59:02 2002
+++ b/include/scsi/scsi_ioctl.h Fri Oct 4 08:59:02 2002
@@ -39,10 +39,10 @@
unsigned char host_wwn[8]; // include NULL term.
} Scsi_FCTargAddress;

-extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
-extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
-extern int scsi_ioctl_send_command(Scsi_Device *dev,
- Scsi_Ioctl_Command *arg);
+extern int scsi_ioctl (Scsi_Device *, int , void *);
+extern int kernel_scsi_ioctl (Scsi_Device *, int, void *);
+extern int scsi_ioctl_send_command(Scsi_Device *, Scsi_Ioctl_Command *);
+extern int scsi_set_medium_removal(Scsi_Device *, char);

#endif

2002-10-04 19:51:02

by Mike Anderson

[permalink] [raw]
Subject: [PATCH] scsi error update 2/3 (enh)


This series of patches is an update to scsi error handling.

00_scsi-error-base-1.diff:
- Fix bug on incorrect check of scsi_eh_tur return value.
- Fix debug printk format problems.
- Removed ref to arch specific semaphore value in debug printk

01_scsi-error-enh-1.diff:
- Forward port of Russell King's retry scsi cmd restore.
- Adjustment of BUS_RESET_SETTLE_TIME from 5 seconds to 10 seconds
to provide increase time post bus_reset to allow door lock
command to succeed. This should be exported to driverfs so that
it can be adjusted if needed.
- Error Policy change: Error recovery command retry is now not
based on failed command retry value.
- Error Policy change: Failed command is not retried if retry
count is expired.

02_scsi-error-dr-lck-1.diff:
- Forward port of Russell King's door lock changes.

Testing:
- Current patches where tested on a SPI interconnect using both in
kernel and new versions of the aic driver. A Plextor SCSI cd-rom
was used for a door lock device. Cables where done during dd's
to generates errors and verify recover / door re-lock.

The full patch is available at:
http://www-124.ibm.com/storageio/patches/2.5/scsi-error

-andmike
--
Michael Anderson
[email protected]

scsi.c | 12 ++----
scsi.h | 1
scsi_error.c | 117 +++++++++++++++++++++++------------------------------------
scsi_lib.c | 26 ++++++++++++-
4 files changed, 76 insertions(+), 80 deletions(-)
-----
diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
--- a/drivers/scsi/scsi.c Fri Oct 4 08:37:58 2002
+++ b/drivers/scsi/scsi.c Fri Oct 4 08:37:58 2002
@@ -1345,14 +1345,10 @@
*/
int scsi_retry_command(Scsi_Cmnd * SCpnt)
{
- memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
- sizeof(SCpnt->data_cmnd));
- SCpnt->request_buffer = SCpnt->buffer;
- SCpnt->request_bufflen = SCpnt->bufflen;
- SCpnt->use_sg = SCpnt->old_use_sg;
- SCpnt->cmd_len = SCpnt->old_cmd_len;
- SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
- SCpnt->underflow = SCpnt->old_underflow;
+ /*
+ * Restore the SCSI command state.
+ */
+ scsi_setup_cmd_retry(SCpnt);

/*
* Zero the sense information from the last time we tried
diff -Nru a/drivers/scsi/scsi.h b/drivers/scsi/scsi.h
--- a/drivers/scsi/scsi.h Fri Oct 4 08:37:58 2002
+++ b/drivers/scsi/scsi.h Fri Oct 4 08:37:58 2002
@@ -467,6 +467,7 @@
int sectors);
extern struct Scsi_Device_Template *scsi_get_request_dev(struct request *);
extern int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt);
+extern void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt);
extern int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int);
extern void scsi_io_completion(Scsi_Cmnd * SCpnt, int good_sectors,
int block_sectors);
diff -Nru a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c Fri Oct 4 08:37:58 2002
+++ b/drivers/scsi/scsi_error.c Fri Oct 4 08:37:58 2002
@@ -8,6 +8,10 @@
*
* Restructured scsi_unjam_host and associated functions.
* September 04, 2002 Mike Anderson ([email protected])
+ *
+ * Forward port of Russell King's ([email protected]) changes and
+ * minor cleanups.
+ * September 30, 2002 Mike Anderson ([email protected])
*/

#include <linux/module.h>
@@ -59,7 +63,7 @@
* These should *probably* be handled by the host itself.
* Since it is allowed to sleep, it probably should.
*/
-#define BUS_RESET_SETTLE_TIME 5*HZ
+#define BUS_RESET_SETTLE_TIME 10*HZ
#define HOST_RESET_SETTLE_TIME 10*HZ

/**
@@ -279,12 +283,17 @@

SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(*sc_list, shost));

- BUG_ON(shost->host_failed != found);
+ if (shost->host_failed != found)
+ printk(KERN_ERR "%s: host_failed: %d != found: %d\n",
+ __FUNCTION__, shost->host_failed, found);
}

/**
* scsi_check_sense - Examine scsi cmd sense
* @scmd: Cmd to have sense checked.
+ *
+ * Return value:
+ * SUCCESS or FAILED or NEEDS_RETRY
**/
static int scsi_check_sense(Scsi_Cmnd *scmd)
{
@@ -354,7 +363,6 @@
**/
static int scsi_eh_completed_normally(Scsi_Cmnd *scmd)
{
- int rtn;

/*
* first check the host byte, to see if there is anything in there
@@ -370,7 +378,7 @@
* SUCCESS.
*/
scmd->flags &= ~IS_RESETTING;
- goto maybe_retry;
+ return NEEDS_RETRY;
}
/*
* rats. we are already in the error handler, so we now
@@ -378,10 +386,7 @@
* is valid, we have a pretty good idea of what to do.
* if not, we mark it as FAILED.
*/
- rtn = scsi_check_sense(scmd);
- if (rtn == NEEDS_RETRY)
- goto maybe_retry;
- return rtn;
+ return scsi_check_sense(scmd);
}
if (host_byte(scmd->result) != DID_OK) {
return FAILED;
@@ -401,10 +406,7 @@
case COMMAND_TERMINATED:
return SUCCESS;
case CHECK_CONDITION:
- rtn = scsi_check_sense(scmd);
- if (rtn == NEEDS_RETRY)
- goto maybe_retry;
- return rtn;
+ return scsi_check_sense(scmd);
case CONDITION_GOOD:
case INTERMEDIATE_GOOD:
case INTERMEDIATE_C_GOOD:
@@ -419,14 +421,6 @@
return FAILED;
}
return FAILED;
-
- maybe_retry:
- if ((++scmd->retries) < scmd->allowed) {
- return NEEDS_RETRY;
- } else {
- /* no more retries - report this one back to upper level */
- return SUCCESS;
- }
}

/**
@@ -490,7 +484,7 @@
* this case, and furthermore, there is a different completion handler
* vs scsi_dispatch_cmd.
* Return value:
- * SUCCESS/FAILED
+ * SUCCESS or FAILED or NEEDS_RETRY
**/
static int scsi_send_eh_cmnd(Scsi_Cmnd *scmd, int timeout)
{
@@ -500,7 +494,6 @@

ASSERT_LOCK(host->host_lock, 0);

-retry:
/*
* we will use a queued command if possible, otherwise we will
* emulate the queuing and calling of completion function ourselves.
@@ -577,16 +570,15 @@
* actually did complete normally.
*/
if (rtn == SUCCESS) {
- int ret = scsi_eh_completed_normally(scmd);
+ int rtn = scsi_eh_completed_normally(scmd);
SCSI_LOG_ERROR_RECOVERY(3,
printk("%s: scsi_eh_completed_normally %x\n",
- __FUNCTION__, ret));
- switch (ret) {
+ __FUNCTION__, rtn));
+ switch (rtn) {
case SUCCESS:
- break;
case NEEDS_RETRY:
- goto retry;
case FAILED:
+ break;
default:
rtn = FAILED;
break;
@@ -658,15 +650,8 @@
* when we eventually call scsi_finish, we really wish to complete
* the original request, so let's restore the original data. (db)
*/
- memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
- sizeof(scmd->data_cmnd));
+ scsi_setup_cmd_retry(scmd);
scmd->result = saved_result;
- scmd->request_buffer = scmd->buffer;
- scmd->request_bufflen = scmd->bufflen;
- scmd->use_sg = scmd->old_use_sg;
- scmd->cmd_len = scmd->old_cmd_len;
- scmd->sc_data_direction = scmd->sc_old_data_direction;
- scmd->underflow = scmd->old_underflow;

/*
* hey, we are done. let's look to see what happened.
@@ -684,16 +669,16 @@
**/
static int scsi_eh_retry_cmd(Scsi_Cmnd *scmd)
{
- memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
- sizeof(scmd->data_cmnd));
- scmd->request_buffer = scmd->buffer;
- scmd->request_bufflen = scmd->bufflen;
- scmd->use_sg = scmd->old_use_sg;
- scmd->cmd_len = scmd->old_cmd_len;
- scmd->sc_data_direction = scmd->sc_old_data_direction;
- scmd->underflow = scmd->old_underflow;
+ int rtn = SUCCESS;
+
+ for (; scmd->retries < scmd->allowed; scmd->retries++) {
+ scsi_setup_cmd_retry(scmd);
+ rtn = scsi_send_eh_cmnd(scmd, scmd->timeout_per_command);
+ if (rtn != NEEDS_RETRY)
+ break;
+ }

- return scsi_send_eh_cmnd(scmd, scmd->timeout_per_command);
+ return rtn;
}

/**
@@ -718,9 +703,7 @@
* set this back so that the upper level can correctly free up
* things.
*/
- scmd->use_sg = scmd->old_use_sg;
- scmd->sc_data_direction = scmd->sc_old_data_direction;
- scmd->underflow = scmd->old_underflow;
+ scsi_setup_cmd_retry(scmd);
}

/**
@@ -848,7 +831,9 @@
static unsigned char tur_command[6] =
{TEST_UNIT_READY, 0, 0, 0, 0, 0};
int rtn;
+ int retry_cnt = 1;

+retry_tur:
memcpy((void *) scmd->cmnd, (void *) tur_command,
sizeof(tur_command));

@@ -874,32 +859,18 @@
* when we eventually call scsi_finish, we really wish to complete
* the original request, so let's restore the original data. (db)
*/
- memcpy((void *) scmd->cmnd, (void *) scmd->data_cmnd,
- sizeof(scmd->data_cmnd));
- scmd->request_buffer = scmd->buffer;
- scmd->request_bufflen = scmd->bufflen;
- scmd->use_sg = scmd->old_use_sg;
- scmd->cmd_len = scmd->old_cmd_len;
- scmd->sc_data_direction = scmd->sc_old_data_direction;
- scmd->underflow = scmd->old_underflow;
+ scsi_setup_cmd_retry(scmd);

/*
* hey, we are done. let's look to see what happened.
*/
- SCSI_LOG_ERROR_RECOVERY(3,
- printk("%s: scmd %p rtn %x\n",
+ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
__FUNCTION__, scmd, rtn));
- if ((rtn == SUCCESS) && scmd->result) {
- if (((driver_byte(scmd->result) & DRIVER_SENSE) ||
- (status_byte(scmd->result) & CHECK_CONDITION)) &&
- (SCSI_SENSE_VALID(scmd))) {
- if (((scmd->sense_buffer[2] & 0xf) != NOT_READY) &&
- ((scmd->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
- ((scmd->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
- return 0;
- }
- }
- }
+ if (rtn == SUCCESS)
+ return 0;
+ else if (rtn == NEEDS_RETRY)
+ if (retry_cnt--)
+ goto retry_tur;
return 1;
}

@@ -964,6 +935,11 @@
rtn = scmd->host->hostt->eh_device_reset_handler(scmd);
spin_unlock_irqrestore(scmd->host->host_lock, flags);

+ if (rtn == SUCCESS) {
+ scmd->device->was_reset = 1;
+ scmd->device->expecting_cc_ua = 1;
+ }
+
return rtn;
}

@@ -1422,8 +1398,7 @@
if ((shost->can_queue > 0 &&
(shost->host_busy >= shost->can_queue))
|| (shost->host_blocked)
- || (shost->host_self_blocked)
- || (sdev->device_blocked)) {
+ || (shost->host_self_blocked)) {
break;
}

@@ -1471,7 +1446,7 @@
if (scsi_eh_get_sense(sc_todo, shost))
if (scsi_eh_abort_cmd(sc_todo, shost))
if (scsi_eh_bus_device_reset(sc_todo, shost))
- if(scsi_eh_bus_host_reset(sc_todo, shost))
+ if (scsi_eh_bus_host_reset(sc_todo, shost))
scsi_eh_offline_sdevs(sc_todo, shost);

BUG_ON(shost->host_failed);
diff -Nru a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c Fri Oct 4 08:37:58 2002
+++ b/drivers/scsi/scsi_lib.c Fri Oct 4 08:37:58 2002
@@ -160,6 +160,30 @@
}

/*
+ * Function: scsi_setup_cmd_retry()
+ *
+ * Purpose: Restore the command state for a retry
+ *
+ * Arguments: SCpnt - command to be restored
+ *
+ * Returns: Nothing
+ *
+ * Notes: Immediately prior to retrying a command, we need
+ * to restore certain fields that we saved above.
+ */
+void scsi_setup_cmd_retry(Scsi_Cmnd *SCpnt)
+{
+ memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
+ sizeof(SCpnt->data_cmnd));
+ SCpnt->request_buffer = SCpnt->buffer;
+ SCpnt->request_bufflen = SCpnt->bufflen;
+ SCpnt->use_sg = SCpnt->old_use_sg;
+ SCpnt->cmd_len = SCpnt->old_cmd_len;
+ SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
+ SCpnt->underflow = SCpnt->old_underflow;
+}
+
+/*
* Function: scsi_queue_next_request()
*
* Purpose: Handle post-processing of completed commands.
@@ -614,7 +638,7 @@
printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
SCpnt->host->host_no, (int) SCpnt->channel,
(int) SCpnt->target, (int) SCpnt->lun);
- print_command(SCpnt->cmnd);
+ print_command(SCpnt->data_cmnd);
print_sense("sd", SCpnt);
SCpnt = scsi_end_request(SCpnt, 0, block_sectors);
return;