2021-02-25 21:45:44

by Tyrel Datwyler

[permalink] [raw]
Subject: [PATCH v3 0/5] ibmvfc: hard reset fixes

This series contains a minor simplification of ibmvfc_init_sub_crqs() followed
by a couple fixes for sub-CRQ handling which effect hard reset of the
client/host adapter CRQ pair.

changes in v3:
* Patch 1 & 5: moved ibmvfc_init_sub_crqs out of locked patch

changes in v2:
* added Reviewed-by tags for patches 1-3
* Patch 4: use rtas_busy_delay to test rc and delay correct amount of time
* Patch 5: (new) similar fix for LPM case where CRQ pair needs re-enablement

Tyrel Datwyler (5):
powerpc/pseries: extract host bridge from pci_bus prior to bus removal
ibmvfc: simplify handling of sub-CRQ initialization
ibmvfc: fix invalid sub-CRQ handles after hard reset
ibmvfc: treat H_CLOSED as success during sub-CRQ registration
ibmvfc: store return code of H_FREE_SUB_CRQ during cleanup

arch/powerpc/platforms/pseries/pci_dlpar.c | 4 +-
drivers/scsi/ibmvscsi/ibmvfc.c | 49 ++++++++++------------
2 files changed, 26 insertions(+), 27 deletions(-)

--
2.27.0


2021-02-25 21:46:02

by Tyrel Datwyler

[permalink] [raw]
Subject: [PATCH v3 2/5] ibmvfc: fix invalid sub-CRQ handles after hard reset

A hard reset results in a complete transport disconnect such that the
CRQ connection with the partner VIOS is broken. This has the side effect
of also invalidating the associated sub-CRQs. The current code assumes
that the sub-CRQs are perserved resulting in a protocol violation after
trying to reconnect them with the VIOS. This introduces an infinite loop
such that the VIOS forces a disconnect after each subsequent attempt to
re-register with invalid handles.

Avoid the aforementioned issue by releasing the sub-CRQs prior to CRQ
disconnect, and driving a reinitialization of the sub-CRQs once a new
CRQ is registered with the hypervisor.

fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ Channels")
Signed-off-by: Tyrel Datwyler <[email protected]>
Reviewed-by: Brian King <[email protected]>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 384960036f8b..2cca55f2e464 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -158,6 +158,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
static void ibmvfc_tgt_move_login(struct ibmvfc_target *);

+static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
+
static const char *unknown_error = "unknown error";

static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba,
@@ -926,8 +929,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
unsigned long flags;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
struct ibmvfc_queue *crq = &vhost->crq;
- struct ibmvfc_queue *scrq;
- int i;
+
+ ibmvfc_release_sub_crqs(vhost);

/* Close the CRQ */
do {
@@ -936,6 +939,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));

+ ibmvfc_init_sub_crqs(vhost);
+
spin_lock_irqsave(vhost->host->host_lock, flags);
spin_lock(vhost->crq.q_lock);
vhost->state = IBMVFC_NO_CRQ;
@@ -947,16 +952,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
memset(crq->msgs.crq, 0, PAGE_SIZE);
crq->cur = 0;

- if (vhost->scsi_scrqs.scrqs) {
- for (i = 0; i < nr_scsi_hw_queues; i++) {
- scrq = &vhost->scsi_scrqs.scrqs[i];
- spin_lock(scrq->q_lock);
- memset(scrq->msgs.scrq, 0, PAGE_SIZE);
- scrq->cur = 0;
- spin_unlock(scrq->q_lock);
- }
- }
-
/* And re-open it again */
rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
crq->msg_token, PAGE_SIZE);
@@ -966,6 +961,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
dev_warn(vhost->dev, "Partner adapter not ready\n");
else if (rc != 0)
dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
+
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);

@@ -5692,6 +5688,7 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)

free_irq(scrq->irq, scrq);
irq_dispose_mapping(scrq->irq);
+ scrq->irq = 0;

do {
rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address,
--
2.27.0

2021-02-25 21:46:20

by Tyrel Datwyler

[permalink] [raw]
Subject: [PATCH v3 3/5] ibmvfc: treat H_CLOSED as success during sub-CRQ registration

A non-zero return code for H_REG_SUB_CRQ is currently treated as a
failure resulting in failing sub-CRQ setup. The case of H_CLOSED should
not be treated as a failure. This return code translates to a successful
sub-CRQ registration by the hypervisor, and is meant to communicate back
that there is currently no partner VIOS CRQ connection established as of
yet. This is a common occurrence during a disconnect where the client
adapter can possibly come back up prior to the partner adapter.

For non-zero return code from H_REG_SUB_CRQ treat a H_CLOSED as success
so that sub-CRQs are successfully setup.

Fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ Channels")
Signed-off-by: Tyrel Datwyler <[email protected]>
Reviewed-by: Brian King <[email protected]>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 2cca55f2e464..274c5a1fac9c 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5636,7 +5636,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
&scrq->cookie, &scrq->hw_irq);

- if (rc) {
+ /* H_CLOSED indicates successful register, but no CRQ partner */
+ if (rc && rc != H_CLOSED) {
dev_warn(dev, "Error registering sub-crq: %d\n", rc);
if (rc == H_PARAMETER)
dev_warn_once(dev, "Firmware may not support MQ\n");
--
2.27.0

2021-02-25 21:46:59

by Tyrel Datwyler

[permalink] [raw]
Subject: [PATCH v3 4/5] ibmvfc: store return code of H_FREE_SUB_CRQ during cleanup

The H_FREE_SUB_CRQ hypercall can return a retry delay return code that
indicates the call needs to be retried after a specific amount of time
delay. The error path to free a sub-CRQ in case of a failure during
channel registration fails to capture the return code of H_FREE_SUB_CRQ
which will result in the delay loop being skipped in the case of a retry
delay return code.

Store the return code result of the H_FREE_SUB_CRQ call such that the
return code check in the delay loop evaluates a meaningful value. Also,
use the rtas_busy_delay() to check the rc value and delay for the
appropriate amount of time.

Fixes: 9288d35d70b5 ("ibmvfc: map/request irq and register Sub-CRQ interrupt handler")
Signed-off-by: Tyrel Datwyler <[email protected]>
Reviewed-by: Brian King <[email protected]>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 274c5a1fac9c..1bb08e5f3674 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -21,6 +21,7 @@
#include <linux/bsg-lib.h>
#include <asm/firmware.h>
#include <asm/irq.h>
+#include <asm/rtas.h>
#include <asm/vio.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -5670,8 +5671,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,

irq_failed:
do {
- plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
- } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+ rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
+ } while (rtas_busy_delay(rc));
reg_failed:
ibmvfc_free_queue(vhost, scrq);
LEAVE;
--
2.27.0

2021-02-25 21:49:08

by Tyrel Datwyler

[permalink] [raw]
Subject: [PATCH v3 5/5] ibmvfc: reinitialize sub-CRQs and perform channel enquiry after LPM

A live partition migration (LPM) results in a CRQ disconnect similar to
a hard reset. In this LPM case the hypervisor moslty perserves the CRQ
transport such that it simply needs to be reenabled. However, the
capabilities may have changed such as fewer channels, or no channels at
all. Further, its possible that there may be sub-CRQ support, but no
channel support. The CRQ reenable path currently doesn't take any of
this into consideration.

For simpilicty release and reinitialize sub-CRQs during reenable, and
set do_enquiry and using_channels with the appropriate values to trigger
channel renegotiation.

Signed-off-by: Tyrel Datwyler <[email protected]>
---
drivers/scsi/ibmvscsi/ibmvfc.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 1bb08e5f3674..6bbc2697ad5a 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -903,6 +903,9 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
{
int rc = 0;
struct vio_dev *vdev = to_vio_dev(vhost->dev);
+ unsigned long flags;
+
+ ibmvfc_release_sub_crqs(vhost);

/* Re-enable the CRQ */
do {
@@ -914,6 +917,15 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
if (rc)
dev_err(vhost->dev, "Error enabling adapter (rc=%d)\n", rc);

+ ibmvfc_init_sub_crqs(vhost);
+
+ spin_lock_irqsave(vhost->host->host_lock, flags);
+ spin_lock(vhost->crq.q_lock);
+ vhost->do_enquiry = 1;
+ vhost->using_channels = 0;
+ spin_unlock(vhost->crq.q_lock);
+ spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
return rc;
}

--
2.27.0

2021-02-25 22:14:05

by Tyrel Datwyler

[permalink] [raw]
Subject: Re: [PATCH v3 5/5] ibmvfc: reinitialize sub-CRQs and perform channel enquiry after LPM

On 2/25/21 1:42 PM, Tyrel Datwyler wrote:
> A live partition migration (LPM) results in a CRQ disconnect similar to
> a hard reset. In this LPM case the hypervisor moslty perserves the CRQ
> transport such that it simply needs to be reenabled. However, the
> capabilities may have changed such as fewer channels, or no channels at
> all. Further, its possible that there may be sub-CRQ support, but no
> channel support. The CRQ reenable path currently doesn't take any of
> this into consideration.
>
> For simpilicty release and reinitialize sub-CRQs during reenable, and
> set do_enquiry and using_channels with the appropriate values to trigger
> channel renegotiation.
>
> Signed-off-by: Tyrel Datwyler <[email protected]>
> ---
> drivers/scsi/ibmvscsi/ibmvfc.c | 12 ++++++++++++
> 1 file changed, 12 insertions(+)
>
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 1bb08e5f3674..6bbc2697ad5a 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -903,6 +903,9 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
> {
> int rc = 0;
> struct vio_dev *vdev = to_vio_dev(vhost->dev);
> + unsigned long flags;
> +
> + ibmvfc_release_sub_crqs(vhost);
>
> /* Re-enable the CRQ */
> do {
> @@ -914,6 +917,15 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
> if (rc)
> dev_err(vhost->dev, "Error enabling adapter (rc=%d)\n", rc);
>
> + ibmvfc_init_sub_crqs(vhost);

Realized that if this fails it set the do_enquiry flag to zero which the locked
region below will then flip back to one. Need to move sub-crq init to after
locked region.

-T

> +
> + spin_lock_irqsave(vhost->host->host_lock, flags);
> + spin_lock(vhost->crq.q_lock);
> + vhost->do_enquiry = 1;
> + vhost->using_channels = 0;
> + spin_unlock(vhost->crq.q_lock);
> + spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +
> return rc;
> }
>
>

2021-02-25 22:19:21

by Tyrel Datwyler

[permalink] [raw]
Subject: Re: [PATCH v3 2/5] ibmvfc: fix invalid sub-CRQ handles after hard reset

On 2/25/21 1:42 PM, Tyrel Datwyler wrote:
> A hard reset results in a complete transport disconnect such that the
> CRQ connection with the partner VIOS is broken. This has the side effect
> of also invalidating the associated sub-CRQs. The current code assumes
> that the sub-CRQs are perserved resulting in a protocol violation after
> trying to reconnect them with the VIOS. This introduces an infinite loop
> such that the VIOS forces a disconnect after each subsequent attempt to
> re-register with invalid handles.
>
> Avoid the aforementioned issue by releasing the sub-CRQs prior to CRQ
> disconnect, and driving a reinitialization of the sub-CRQs once a new
> CRQ is registered with the hypervisor.
>
> fixes: faacf8c5f1d5 ("ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ Channels")
> Signed-off-by: Tyrel Datwyler <[email protected]>
> Reviewed-by: Brian King <[email protected]>
> ---
> drivers/scsi/ibmvscsi/ibmvfc.c | 21 +++++++++------------
> 1 file changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 384960036f8b..2cca55f2e464 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -158,6 +158,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
> static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
> static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
>
> +static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
> +static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
> +
> static const char *unknown_error = "unknown error";
>
> static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba,
> @@ -926,8 +929,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
> unsigned long flags;
> struct vio_dev *vdev = to_vio_dev(vhost->dev);
> struct ibmvfc_queue *crq = &vhost->crq;
> - struct ibmvfc_queue *scrq;
> - int i;
> +
> + ibmvfc_release_sub_crqs(vhost);
>
> /* Close the CRQ */
> do {
> @@ -936,6 +939,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
> rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address);
> } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
>
> + ibmvfc_init_sub_crqs(vhost);

This has the same issue as patch 5 in that if fail to set up sub-crqs do_enquiry
will be set to zero, but the locked code region below will then flip it back to
one which we don't want.

-T

> +
> spin_lock_irqsave(vhost->host->host_lock, flags);
> spin_lock(vhost->crq.q_lock);
> vhost->state = IBMVFC_NO_CRQ;
> @@ -947,16 +952,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
> memset(crq->msgs.crq, 0, PAGE_SIZE);
> crq->cur = 0;
>
> - if (vhost->scsi_scrqs.scrqs) {
> - for (i = 0; i < nr_scsi_hw_queues; i++) {
> - scrq = &vhost->scsi_scrqs.scrqs[i];
> - spin_lock(scrq->q_lock);
> - memset(scrq->msgs.scrq, 0, PAGE_SIZE);
> - scrq->cur = 0;
> - spin_unlock(scrq->q_lock);
> - }
> - }
> -
> /* And re-open it again */
> rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
> crq->msg_token, PAGE_SIZE);
> @@ -966,6 +961,7 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
> dev_warn(vhost->dev, "Partner adapter not ready\n");
> else if (rc != 0)
> dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
> +
> spin_unlock(vhost->crq.q_lock);
> spin_unlock_irqrestore(vhost->host->host_lock, flags);
>
> @@ -5692,6 +5688,7 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)
>
> free_irq(scrq->irq, scrq);
> irq_dispose_mapping(scrq->irq);
> + scrq->irq = 0;
>
> do {
> rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address,
>