2021-06-04 17:23:25

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: [PATCH 3/3] scsi: storvsc: Correctly handle multiple flags in srb_status

Hyper-V is observed to sometimes set multiple flags in the
srb_status, such as ABORTED and ERROR. Current code in
storvsc_handle_error() handles only a single flag being set,
and does nothing when multiple flags are set. Fix this by
changing the case statement into a series of "if" statements
testing individual flags. The functionality for handling each
flag is unchanged.

Signed-off-by: Michael Kelley <[email protected]>
---
drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++++++++---------------------
1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fff9441..e96d2aa 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1009,17 +1009,40 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
struct storvsc_scan_work *wrk;
void (*process_err_fn)(struct work_struct *work);
struct hv_host_device *host_dev = shost_priv(host);
- bool do_work = false;

- switch (SRB_STATUS(vm_srb->srb_status)) {
- case SRB_STATUS_ERROR:
+ /*
+ * In some situations, Hyper-V sets multiple bits in the
+ * srb_status, such as ABORTED and ERROR. So process them
+ * individually, with the most specific bits first.
+ */
+
+ if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
+ set_host_byte(scmnd, DID_NO_CONNECT);
+ process_err_fn = storvsc_remove_lun;
+ goto do_work;
+ }
+
+ if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
+ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
+ /* Capacity data has changed */
+ (asc == 0x2a) && (ascq == 0x9)) {
+ process_err_fn = storvsc_device_scan;
+ /*
+ * Retry the I/O that triggered this.
+ */
+ set_host_byte(scmnd, DID_REQUEUE);
+ goto do_work;
+ }
+ }
+
+ if (vm_srb->srb_status & SRB_STATUS_ERROR) {
/*
* Let upper layer deal with error when
* sense message is present.
*/
-
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
- break;
+ return;
+
/*
* If there is an error; offline the device since all
* error recovery strategies would have already been
@@ -1032,37 +1055,19 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
set_host_byte(scmnd, DID_PASSTHROUGH);
break;
/*
- * On Some Windows hosts TEST_UNIT_READY command can return
- * SRB_STATUS_ERROR, let the upper level code deal with it
- * based on the sense information.
+ * On some Hyper-V hosts TEST_UNIT_READY command can
+ * return SRB_STATUS_ERROR. Let the upper level code
+ * deal with it based on the sense information.
*/
case TEST_UNIT_READY:
break;
default:
set_host_byte(scmnd, DID_ERROR);
}
- break;
- case SRB_STATUS_INVALID_LUN:
- set_host_byte(scmnd, DID_NO_CONNECT);
- do_work = true;
- process_err_fn = storvsc_remove_lun;
- break;
- case SRB_STATUS_ABORTED:
- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
- (asc == 0x2a) && (ascq == 0x9)) {
- do_work = true;
- process_err_fn = storvsc_device_scan;
- /*
- * Retry the I/O that triggered this.
- */
- set_host_byte(scmnd, DID_REQUEUE);
- }
- break;
}
+ return;

- if (!do_work)
- return;
-
+do_work:
/*
* We need to schedule work to process this error; schedule it.
*/
--
1.8.3.1


2021-06-07 22:26:46

by Long Li

[permalink] [raw]
Subject: RE: [PATCH 3/3] scsi: storvsc: Correctly handle multiple flags in srb_status

> Subject: [PATCH 3/3] scsi: storvsc: Correctly handle multiple flags in
> srb_status
>
> Hyper-V is observed to sometimes set multiple flags in the srb_status, such
> as ABORTED and ERROR. Current code in
> storvsc_handle_error() handles only a single flag being set, and does nothing
> when multiple flags are set. Fix this by changing the case statement into a
> series of "if" statements testing individual flags. The functionality for handling
> each flag is unchanged.
>
> Signed-off-by: Michael Kelley <[email protected]>
> ---
> drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++++++++----------------
> -----
> 1 file changed, 33 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index
> fff9441..e96d2aa 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -1009,17 +1009,40 @@ static void storvsc_handle_error(struct
> vmscsi_request *vm_srb,
> struct storvsc_scan_work *wrk;
> void (*process_err_fn)(struct work_struct *work);
> struct hv_host_device *host_dev = shost_priv(host);
> - bool do_work = false;
>
> - switch (SRB_STATUS(vm_srb->srb_status)) {
> - case SRB_STATUS_ERROR:
> + /*
> + * In some situations, Hyper-V sets multiple bits in the
> + * srb_status, such as ABORTED and ERROR. So process them
> + * individually, with the most specific bits first.
> + */
> +
> + if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
> + set_host_byte(scmnd, DID_NO_CONNECT);
> + process_err_fn = storvsc_remove_lun;
> + goto do_work;
> + }
> +
> + if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
> + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID
> &&
> + /* Capacity data has changed */
> + (asc == 0x2a) && (ascq == 0x9)) {
> + process_err_fn = storvsc_device_scan;
> + /*
> + * Retry the I/O that triggered this.
> + */
> + set_host_byte(scmnd, DID_REQUEUE);
> + goto do_work;
> + }
> + }
> +
> + if (vm_srb->srb_status & SRB_STATUS_ERROR) {

I'm curious on SRB_STATUS_INVALID_LUN and SRB_STATUS_ABORTED, the actions on those two codes are different.

It doesn't look like we can get those two in the same status code.

> /*
> * Let upper layer deal with error when
> * sense message is present.
> */
> -
> if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
> - break;
> + return;
> +
> /*
> * If there is an error; offline the device since all
> * error recovery strategies would have already been @@ -
> 1032,37 +1055,19 @@ static void storvsc_handle_error(struct vmscsi_request
> *vm_srb,
> set_host_byte(scmnd, DID_PASSTHROUGH);
> break;
> /*
> - * On Some Windows hosts TEST_UNIT_READY command can
> return
> - * SRB_STATUS_ERROR, let the upper level code deal with it
> - * based on the sense information.
> + * On some Hyper-V hosts TEST_UNIT_READY command can
> + * return SRB_STATUS_ERROR. Let the upper level code
> + * deal with it based on the sense information.
> */
> case TEST_UNIT_READY:
> break;
> default:
> set_host_byte(scmnd, DID_ERROR);
> }
> - break;
> - case SRB_STATUS_INVALID_LUN:
> - set_host_byte(scmnd, DID_NO_CONNECT);
> - do_work = true;
> - process_err_fn = storvsc_remove_lun;
> - break;
> - case SRB_STATUS_ABORTED:
> - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID
> &&
> - (asc == 0x2a) && (ascq == 0x9)) {
> - do_work = true;
> - process_err_fn = storvsc_device_scan;
> - /*
> - * Retry the I/O that triggered this.
> - */
> - set_host_byte(scmnd, DID_REQUEUE);
> - }
> - break;
> }
> + return;
>
> - if (!do_work)
> - return;
> -
> +do_work:
> /*
> * We need to schedule work to process this error; schedule it.
> */
> --
> 1.8.3.1

2021-06-09 03:16:03

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 3/3] scsi: storvsc: Correctly handle multiple flags in srb_status

From: Long Li <[email protected]> Sent: Monday, June 7, 2021 3:25 PM
> >
> > Hyper-V is observed to sometimes set multiple flags in the srb_status, such
> > as ABORTED and ERROR. Current code in
> > storvsc_handle_error() handles only a single flag being set, and does nothing
> > when multiple flags are set. Fix this by changing the case statement into a
> > series of "if" statements testing individual flags. The functionality for handling
> > each flag is unchanged.
> >
> > Signed-off-by: Michael Kelley <[email protected]>
> > ---
> > drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++++++++----------------
> > -----
> > 1 file changed, 33 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index
> > fff9441..e96d2aa 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -1009,17 +1009,40 @@ static void storvsc_handle_error(struct
> > vmscsi_request *vm_srb,
> > struct storvsc_scan_work *wrk;
> > void (*process_err_fn)(struct work_struct *work);
> > struct hv_host_device *host_dev = shost_priv(host);
> > - bool do_work = false;
> >
> > - switch (SRB_STATUS(vm_srb->srb_status)) {
> > - case SRB_STATUS_ERROR:
> > + /*
> > + * In some situations, Hyper-V sets multiple bits in the
> > + * srb_status, such as ABORTED and ERROR. So process them
> > + * individually, with the most specific bits first.
> > + */
> > +
> > + if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) {
> > + set_host_byte(scmnd, DID_NO_CONNECT);
> > + process_err_fn = storvsc_remove_lun;
> > + goto do_work;
> > + }
> > +
> > + if (vm_srb->srb_status & SRB_STATUS_ABORTED) {
> > + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
> > + /* Capacity data has changed */
> > + (asc == 0x2a) && (ascq == 0x9)) {
> > + process_err_fn = storvsc_device_scan;
> > + /*
> > + * Retry the I/O that triggered this.
> > + */
> > + set_host_byte(scmnd, DID_REQUEUE);
> > + goto do_work;
> > + }
> > + }
> > +
> > + if (vm_srb->srb_status & SRB_STATUS_ERROR) {
>
> I'm curious on SRB_STATUS_INVALID_LUN and SRB_STATUS_ABORTED, the actions on
> those two codes are different.
>
> It doesn't look like we can get those two in the same status code.

Agreed. I've only observed having ERROR and ABORTED in the same status code.
That happens when a bad PFN is passed on a I/O request, which can be easily
tested. With the old code, having ERROR and ABORTED together resulted in doing
nothing.

The behavior of INVALID_LUN is unchanged by this patch. But with this patch,
if INVALID_LUN and ABORTED should ever occur together, we would process the
INVALID_LUN error, and ignore ABORTED, which I think makes sense.
INVALID_LUN is the most serious error, so we process it first. Then ABORTED is
next in priority for the specific case of "Capacity data has changed". Finally we
handle ERROR.

Michael

>
> > /*
> > * Let upper layer deal with error when
> > * sense message is present.
> > */
> > -
> > if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
> > - break;
> > + return;
> > +
> > /*
> > * If there is an error; offline the device since all
> > * error recovery strategies would have already been @@ -
> > 1032,37 +1055,19 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
> > set_host_byte(scmnd, DID_PASSTHROUGH);
> > break;
> > /*
> > - * On Some Windows hosts TEST_UNIT_READY command can
> > return
> > - * SRB_STATUS_ERROR, let the upper level code deal with it
> > - * based on the sense information.
> > + * On some Hyper-V hosts TEST_UNIT_READY command can
> > + * return SRB_STATUS_ERROR. Let the upper level code
> > + * deal with it based on the sense information.
> > */
> > case TEST_UNIT_READY:
> > break;
> > default:
> > set_host_byte(scmnd, DID_ERROR);
> > }
> > - break;
> > - case SRB_STATUS_INVALID_LUN:
> > - set_host_byte(scmnd, DID_NO_CONNECT);
> > - do_work = true;
> > - process_err_fn = storvsc_remove_lun;
> > - break;
> > - case SRB_STATUS_ABORTED:
> > - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
> > - (asc == 0x2a) && (ascq == 0x9)) {
> > - do_work = true;
> > - process_err_fn = storvsc_device_scan;
> > - /*
> > - * Retry the I/O that triggered this.
> > - */
> > - set_host_byte(scmnd, DID_REQUEUE);
> > - }
> > - break;
> > }
> > + return;
> >
> > - if (!do_work)
> > - return;
> > -
> > +do_work:
> > /*
> > * We need to schedule work to process this error; schedule it.
> > */
> > --
> > 1.8.3.1