2013-07-19 23:38:39

by Jörn Engel

[permalink] [raw]
Subject: [PATCH] mpt2sas: don't handle broadcast primitives

The handling of broadcast primitives involves
_scsih_block_io_all_device(), which does what the name implies. I have
observed cases with >60s of blocking io on all devices, caused by a
single bad device. The downsides of this code are obvious, while the
upsides are more elusive.

Signed-off-by: Joern Engel <[email protected]>
---
drivers/scsi/mpt2sas/mpt2sas_base.c | 4 -
drivers/scsi/mpt2sas/mpt2sas_scsih.c | 238 ----------------------------------
2 files changed, 242 deletions(-)

diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 21c0a27..bba2209 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -586,9 +586,6 @@ _base_display_event_data(struct MPT2SAS_ADAPTER *ioc,
printk("\n");
return;
}
- case MPI2_EVENT_SAS_BROADCAST_PRIMITIVE:
- desc = "SAS Broadcast Primitive";
- break;
case MPI2_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE:
desc = "SAS Init Device Status Change";
break;
@@ -4370,7 +4367,6 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc)

/* here we enable the events we care about */
_base_unmask_events(ioc, MPI2_EVENT_SAS_DISCOVERY);
- _base_unmask_events(ioc, MPI2_EVENT_SAS_BROADCAST_PRIMITIVE);
_base_unmask_events(ioc, MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST);
_base_unmask_events(ioc, MPI2_EVENT_SAS_DEVICE_STATUS_CHANGE);
_base_unmask_events(ioc, MPI2_EVENT_SAS_ENCL_DEVICE_STATUS_CHANGE);
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 3fffb95..856a502 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -2915,31 +2915,6 @@ _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc)
}

/**
- * _scsih_ublock_io_all_device - unblock every device
- * @ioc: per adapter object
- *
- * change the device state from block to running
- */
-static void
-_scsih_ublock_io_all_device(struct MPT2SAS_ADAPTER *ioc)
-{
- struct MPT2SAS_DEVICE *sas_device_priv_data;
- struct scsi_device *sdev;
-
- shost_for_each_device(sdev, ioc->shost) {
- sas_device_priv_data = sdev->hostdata;
- if (!sas_device_priv_data)
- continue;
- if (!sas_device_priv_data->block)
- continue;
- sas_device_priv_data->block = 0;
- dewtprintk(ioc, sdev_printk(KERN_INFO, sdev, "device_running, "
- "handle(0x%04x)\n",
- sas_device_priv_data->sas_target->handle));
- scsi_internal_device_unblock(sdev, SDEV_RUNNING);
- }
-}
-/**
* _scsih_ublock_io_device - set the device state to SDEV_RUNNING
* @ioc: per adapter object
* @handle: device handle
@@ -2971,34 +2946,6 @@ _scsih_ublock_io_device(struct MPT2SAS_ADAPTER *ioc, u64 sas_address)
}

/**
- * _scsih_block_io_all_device - set the device state to SDEV_BLOCK
- * @ioc: per adapter object
- * @handle: device handle
- *
- * During device pull we need to appropiately set the sdev state.
- */
-static void
-_scsih_block_io_all_device(struct MPT2SAS_ADAPTER *ioc)
-{
- struct MPT2SAS_DEVICE *sas_device_priv_data;
- struct scsi_device *sdev;
-
- shost_for_each_device(sdev, ioc->shost) {
- sas_device_priv_data = sdev->hostdata;
- if (!sas_device_priv_data)
- continue;
- if (sas_device_priv_data->block)
- continue;
- sas_device_priv_data->block = 1;
- dewtprintk(ioc, sdev_printk(KERN_INFO, sdev, "device_blocked, "
- "handle(0x%04x)\n",
- sas_device_priv_data->sas_target->handle));
- scsi_internal_device_block(sdev);
- }
-}
-
-
-/**
* _scsih_block_io_device - set the device state to SDEV_BLOCK
* @ioc: per adapter object
* @handle: device handle
@@ -5829,166 +5776,6 @@ _scsih_sas_enclosure_dev_status_change_event(struct MPT2SAS_ADAPTER *ioc,
}

/**
- * _scsih_sas_broadcast_primitive_event - handle broadcast events
- * @ioc: per adapter object
- * @fw_event: The fw_event_work object
- * Context: user.
- *
- * Return nothing.
- */
-static void
-_scsih_sas_broadcast_primitive_event(struct MPT2SAS_ADAPTER *ioc,
- struct fw_event_work *fw_event)
-{
- struct scsi_cmnd *scmd;
- unsigned long ser_no;
- struct scsi_device *sdev;
- u16 smid, handle;
- u32 lun;
- struct MPT2SAS_DEVICE *sas_device_priv_data;
- u32 termination_count;
- u32 query_count;
- Mpi2SCSITaskManagementReply_t *mpi_reply;
- Mpi2EventDataSasBroadcastPrimitive_t *event_data = fw_event->event_data;
- u16 ioc_status;
- unsigned long flags;
- int r;
- u8 max_retries = 0;
- u8 task_abort_retries;
-
- mutex_lock(&ioc->tm_cmds.mutex);
- dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: enter: phy number(%d), "
- "width(%d)\n", ioc->name, __func__, event_data->PhyNum,
- event_data->PortWidth));
-
- _scsih_block_io_all_device(ioc);
-
- spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- mpi_reply = ioc->tm_cmds.reply;
-broadcast_aen_retry:
-
- /* sanity checks for retrying this loop */
- if (max_retries++ == 5) {
- dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: giving up\n",
- ioc->name, __func__));
- goto out;
- } else if (max_retries > 1)
- dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: %d retry\n",
- ioc->name, __func__, max_retries - 1));
-
- termination_count = 0;
- query_count = 0;
- for (smid = 1; smid <= ioc->scsiio_depth; smid++) {
- if (ioc->shost_recovery)
- goto out;
- scmd = _scsih_scsi_lookup_get(ioc, smid);
- if (!scmd)
- continue;
- sdev = scmd->device;
- ser_no = scmd->serial_number;
- sas_device_priv_data = sdev->hostdata;
- if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
- continue;
- /* skip hidden raid components */
- if (sas_device_priv_data->sas_target->flags &
- MPT_TARGET_FLAGS_RAID_COMPONENT)
- continue;
- /* skip volumes */
- if (sas_device_priv_data->sas_target->flags &
- MPT_TARGET_FLAGS_VOLUME)
- continue;
-
- handle = sas_device_priv_data->sas_target->handle;
- lun = sas_device_priv_data->lun;
- query_count++;
-
- if (ioc->shost_recovery)
- goto out;
-
- spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
- r = mpt2sas_scsih_issue_tm(ioc, handle, 0, 0, lun,
- MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30, 0,
- TM_MUTEX_OFF);
- if (r == FAILED) {
- sdev_printk(KERN_WARNING, sdev,
- "mpt2sas_scsih_issue_tm: FAILED when sending "
- "QUERY_TASK: scmd(%p)\n", scmd);
- spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- goto broadcast_aen_retry;
- }
- ioc_status = le16_to_cpu(mpi_reply->IOCStatus)
- & MPI2_IOCSTATUS_MASK;
- if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
- sdev_printk(KERN_WARNING, sdev, "query task: FAILED "
- "with IOCSTATUS(0x%04x), scmd(%p)\n", ioc_status,
- scmd);
- spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- goto broadcast_aen_retry;
- }
-
- /* see if IO is still owned by IOC and target */
- if (mpi_reply->ResponseCode ==
- MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED ||
- mpi_reply->ResponseCode ==
- MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC) {
- spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- continue;
- }
- task_abort_retries = 0;
- tm_retry:
- if (task_abort_retries++ == 60) {
- dewtprintk(ioc, printk(MPT2SAS_INFO_FMT
- "%s: ABORT_TASK: giving up\n", ioc->name,
- __func__));
- spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- goto broadcast_aen_retry;
- }
-
- if (ioc->shost_recovery)
- goto out_no_lock;
-
- r = mpt2sas_scsih_issue_tm(ioc, handle, sdev->channel, sdev->id,
- sdev->lun, MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30,
- ser_no, TM_MUTEX_OFF);
- if (r == FAILED) {
- sdev_printk(KERN_WARNING, sdev,
- "mpt2sas_scsih_issue_tm: ABORT_TASK: FAILED : "
- "scmd(%p)\n", scmd);
- goto tm_retry;
- }
-
- if (task_abort_retries > 1)
- sdev_printk(KERN_WARNING, sdev,
- "mpt2sas_scsih_issue_tm: ABORT_TASK: RETRIES (%d):"
- " scmd(%p)\n",
- task_abort_retries - 1, scmd);
-
- termination_count += le32_to_cpu(mpi_reply->TerminationCount);
- spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- }
-
- if (ioc->broadcast_aen_pending) {
- dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: loop back due to"
- " pending AEN\n", ioc->name, __func__));
- ioc->broadcast_aen_pending = 0;
- goto broadcast_aen_retry;
- }
-
- out:
- spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
- out_no_lock:
-
- dewtprintk(ioc, printk(MPT2SAS_INFO_FMT
- "%s - exit, query_count = %d termination_count = %d\n",
- ioc->name, __func__, query_count, termination_count));
-
- ioc->broadcast_aen_busy = 0;
- if (!ioc->shost_recovery)
- _scsih_ublock_io_all_device(ioc);
- mutex_unlock(&ioc->tm_cmds.mutex);
-}
-
-/**
* _scsih_sas_discovery_event - handle discovery events
* @ioc: per adapter object
* @fw_event: The fw_event_work object
@@ -7081,8 +6868,6 @@ _scsih_remove_unresponding_sas_devices(struct MPT2SAS_ADAPTER *ioc)
}
printk(MPT2SAS_INFO_FMT "removing unresponding devices: complete\n",
ioc->name);
- /* unblock devices */
- _scsih_ublock_io_all_device(ioc);
}

static void
@@ -7358,10 +7143,6 @@ _firmware_event_work(struct work_struct *work)
_scsih_sas_discovery_event(ioc,
fw_event);
break;
- case MPI2_EVENT_SAS_BROADCAST_PRIMITIVE:
- _scsih_sas_broadcast_primitive_event(ioc,
- fw_event);
- break;
case MPI2_EVENT_SAS_ENCL_DEVICE_STATUS_CHANGE:
_scsih_sas_enclosure_dev_status_change_event(ioc,
fw_event);
@@ -7419,25 +7200,6 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
event = le16_to_cpu(mpi_reply->Event);

switch (event) {
- /* handle these */
- case MPI2_EVENT_SAS_BROADCAST_PRIMITIVE:
- {
- Mpi2EventDataSasBroadcastPrimitive_t *baen_data =
- (Mpi2EventDataSasBroadcastPrimitive_t *)
- mpi_reply->EventData;
-
- if (baen_data->Primitive !=
- MPI2_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT)
- return 1;
-
- if (ioc->broadcast_aen_busy) {
- ioc->broadcast_aen_pending++;
- return 1;
- } else
- ioc->broadcast_aen_busy = 1;
- break;
- }
-
case MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST:
_scsih_check_topo_delete_events(ioc,
(Mpi2EventDataSasTopologyChangeList_t *)
--
1.7.10.4


2013-07-19 23:43:22

by Jörn Engel

[permalink] [raw]
Subject: Re: [PATCH] mpt2sas: don't handle broadcast primitives

On Fri, 19 July 2013 18:06:59 -0400, Jörn Engel wrote:
>
> The handling of broadcast primitives involves
> _scsih_block_io_all_device(), which does what the name implies. I have
> observed cases with >60s of blocking io on all devices, caused by a
> single bad device. The downsides of this code are obvious, while the
> upsides are more elusive.

And since this patch looks more like an April fools joke: I have
gathered a few machine-months of testing, including tortures that
specifically stress the removed codepaths. This is a serious
submission and unless someone can show me a _very_ good reason for
keeping the deleted code, I would like to get it merged.

Jörn

--
Computer system analysis is like child-rearing; you can do grievous damage,
but you cannot ensure success."
-- Tom DeMarco

2013-07-24 20:42:46

by Baruch Even

[permalink] [raw]
Subject: Re: [PATCH] mpt2sas: don't handle broadcast primitives

On Sat, Jul 20, 2013 at 1:11 AM, Jörn Engel <[email protected]> wrote:
> On Fri, 19 July 2013 18:06:59 -0400, Jörn Engel wrote:
>>
>> The handling of broadcast primitives involves
>> _scsih_block_io_all_device(), which does what the name implies. I have
>> observed cases with >60s of blocking io on all devices, caused by a
>> single bad device. The downsides of this code are obvious, while the
>> upsides are more elusive.
>
> And since this patch looks more like an April fools joke: I have
> gathered a few machine-months of testing, including tortures that
> specifically stress the removed codepaths. This is a serious
> submission and unless someone can show me a _very_ good reason for
> keeping the deleted code, I would like to get it merged.

This would seem to cause an IO pause through the host whenever there
is a disk removal/insertion or SES (SAS expander) change which seems
like a bad proposition indeed. The part of the work that this code
seems to handle is that when such a change happens something needs to
detect the dead IOs (f.ex. surprise disk removal) but I believe that
the SAS HBA firmware will do that internally already so I do think
this code is needless.

The only thing I'd like not to lose is the actual notification and
ability to log the fact that there was a broadcast notification on the
SAS network.

Baruch

2013-07-24 20:55:56

by Jörn Engel

[permalink] [raw]
Subject: Re: [PATCH] mpt2sas: don't handle broadcast primitives

On Wed, 24 July 2013 23:42:22 +0300, Baruch Even wrote:
> On Sat, Jul 20, 2013 at 1:11 AM, Jörn Engel <[email protected]> wrote:
> > On Fri, 19 July 2013 18:06:59 -0400, Jörn Engel wrote:
> >>
> >> The handling of broadcast primitives involves
> >> _scsih_block_io_all_device(), which does what the name implies. I have
> >> observed cases with >60s of blocking io on all devices, caused by a
> >> single bad device. The downsides of this code are obvious, while the
> >> upsides are more elusive.
> >
> > And since this patch looks more like an April fools joke: I have
> > gathered a few machine-months of testing, including tortures that
> > specifically stress the removed codepaths. This is a serious
> > submission and unless someone can show me a _very_ good reason for
> > keeping the deleted code, I would like to get it merged.
>
> This would seem to cause an IO pause through the host whenever there
> is a disk removal/insertion or SES (SAS expander) change which seems
> like a bad proposition indeed. The part of the work that this code
> seems to handle is that when such a change happens something needs to
> detect the dead IOs (f.ex. surprise disk removal) but I believe that
> the SAS HBA firmware will do that internally already so I do think
> this code is needless.
>
> The only thing I'd like not to lose is the actual notification and
> ability to log the fact that there was a broadcast notification on the
> SAS network.

I agree logging would be nice. However my attempts to keep logging and
remove the IO pause were unsuccessful. Apparently something inside
_scsih_sas_broadcast_primitive_event() is required to get future
events. If someone from LSI with data sheets and understanding of the
firmware can do a better patch, I would be happy.

Jörn

--
The story so far:
In the beginning the Universe was created. This has made a lot
of people very angry and been widely regarded as a bad move.
-- Douglas Adams