This patchset is based Johannes's patch
"scsi: sas: scsi_queue_work can fail, so make callers aware"
Now the libsas hotplug has some issues, Dan Williams report
a similar bug here before
https://www.mail-archive.com/[email protected]/msg39187.html
The issues we have found
1. if LLDD burst reports lots of phy-up/phy-down sas events, some events
may lost because a same sas events is pending now, finally libsas topo
may different the hardware.
2. receive a phy down sas event, libsas call sas_deform_port to remove
devices, it would first delete the sas port, then put a destruction
discovery event in a new work, and queue it at the tail of workqueue,
once the sas port be deleted, its children device will be deleted too,
when the destruction work start, it will found the target device has
been removed, and report a sysfs warnning.
3. since a hotplug process will be devided into several works, if a phy up
sas event insert into phydown works, like
destruction work ---> PORTE_BYTES_DMAED (sas_form_port) ---->PHYE_LOSS_OF_SIGNAL
the hot remove flow would broken by PORTE_BYTES_DMAED event, it's not
we expected, and issues would occur.
The first patch fix the sas events lost, and the second one introudce wait-complete
to fix the hotplug order issues.
v2->v3: some code improvements suggested by Johannes and John,
split v2 patch 2 into several small pathes.
v1->v2: some code improvements suggested by John Garry
Yijing Wang (7):
libsas: Use static sas event pool to appease sas event lost
libsas: remove unused port_gone_completion
libsas: Use new workqueue to run sas event
libsas: add sas event wait-complete support
libsas: add a new workqueue to run probe/destruct discovery event
libsas: add wait-complete support to sync discovery event
libsas: release disco mutex during waiting in sas_ex_discover_end_dev
drivers/scsi/libsas/sas_discover.c | 58 +++++++---
drivers/scsi/libsas/sas_event.c | 212 ++++++++++++++++++++++++++++++++-----
drivers/scsi/libsas/sas_expander.c | 22 +++-
drivers/scsi/libsas/sas_init.c | 21 ++--
drivers/scsi/libsas/sas_internal.h | 64 +++++++++++
drivers/scsi/libsas/sas_phy.c | 48 +++------
drivers/scsi/libsas/sas_port.c | 22 ++--
include/scsi/libsas.h | 27 +++--
8 files changed, 373 insertions(+), 101 deletions(-)
--
2.5.0
Introduce wait-complete for libsas sas event processing,
execute sas port create/destruct in sync.
Signed-off-by: Yijing Wang <[email protected]>
CC: John Garry <[email protected]>
CC: Johannes Thumshirn <[email protected]>
CC: Ewan Milne <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: Tomas Henzl <[email protected]>
CC: Dan Williams <[email protected]>
---
drivers/scsi/libsas/sas_discover.c | 41 ++++++++++++++++++++++++++++----------
drivers/scsi/libsas/sas_internal.h | 34 +++++++++++++++++++++++++++++++
drivers/scsi/libsas/sas_port.c | 4 ++++
include/scsi/libsas.h | 5 ++++-
4 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 60de662..5d4a3a8 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -525,16 +525,43 @@ static void sas_revalidate_domain(struct work_struct *work)
mutex_unlock(&ha->disco_mutex);
}
+static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
+ [DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
+ [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
+ [DISCE_PROBE] = sas_probe_devices,
+ [DISCE_SUSPEND] = sas_suspend_devices,
+ [DISCE_RESUME] = sas_resume_devices,
+ [DISCE_DESTRUCT] = sas_destruct_devices,
+};
+
+/* a simple wrapper for sas discover event funtions */
+static void sas_discover_common_fn(struct work_struct *work)
+{
+ struct sas_discovery_event *ev = to_sas_discovery_event(work);
+ struct asd_sas_port *port = ev->port;
+
+ sas_event_fns[ev->type](work);
+ sas_port_put(port);
+}
+
+
/* ---------- Events ---------- */
static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
{
+ int ret;
+ struct sas_discovery_event *ev = to_sas_discovery_event(&sw->work);
+ struct asd_sas_port *port = ev->port;
+
/* chained work is not subject to SA_HA_DRAINING or
* SAS_HA_REGISTERED, because it is either submitted in the
* workqueue, or known to be submitted from a context that is
* not racing against draining
*/
- scsi_queue_work(ha->core.shost, &sw->work);
+ sas_port_get(port);
+ ret = scsi_queue_work(ha->core.shost, &sw->work);
+ if (ret != 1)
+ sas_port_put(port);
}
static void sas_chain_event(int event, unsigned long *pending,
@@ -575,18 +602,10 @@ void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
{
int i;
- static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
- [DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
- [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
- [DISCE_PROBE] = sas_probe_devices,
- [DISCE_SUSPEND] = sas_suspend_devices,
- [DISCE_RESUME] = sas_resume_devices,
- [DISCE_DESTRUCT] = sas_destruct_devices,
- };
-
disc->pending = 0;
for (i = 0; i < DISC_NUM_EVENTS; i++) {
- INIT_SAS_WORK(&disc->disc_work[i].work, sas_event_fns[i]);
+ INIT_SAS_WORK(&disc->disc_work[i].work, sas_discover_common_fn);
disc->disc_work[i].port = port;
+ disc->disc_work[i].type = i;
}
}
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index f03ce64..890b5d26 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -100,6 +100,40 @@ void sas_free_device(struct kref *kref);
extern const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS];
extern const work_func_t sas_port_event_fns[PORT_NUM_EVENTS];
+static void sas_complete_event(struct kref *kref)
+{
+ struct asd_sas_port *port = container_of(kref, struct asd_sas_port, ref);
+
+ complete_all(&port->completion);
+}
+
+static inline void sas_port_put(struct asd_sas_port *port)
+{
+ if (port->is_sync)
+ kref_put(&port->ref, sas_complete_event);
+}
+
+static inline void sas_port_wait_init(struct asd_sas_port *port)
+{
+ init_completion(&port->completion);
+ kref_init(&port->ref);
+ port->is_sync = true;
+}
+
+static inline void sas_port_wait_completion(
+ struct asd_sas_port *port)
+{
+ sas_port_put(port);
+ wait_for_completion(&port->completion);
+ port->is_sync = false;
+}
+
+static inline void sas_port_get(struct asd_sas_port *port)
+{
+ if (port && port->is_sync)
+ kref_get(&port->ref);
+}
+
#ifdef CONFIG_SCSI_SAS_HOST_SMP
extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
struct request *rsp);
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 9326628..d589adb 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -191,7 +191,9 @@ static void sas_form_port(struct asd_sas_phy *phy)
if (si->dft->lldd_port_formed)
si->dft->lldd_port_formed(phy);
+ sas_port_wait_init(port);
sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN);
+ sas_port_wait_completion(port);
}
/**
@@ -218,7 +220,9 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
dev->pathways--;
if (port->num_phys == 1) {
+ sas_port_wait_init(port);
sas_unregister_domain_devices(port, gone);
+ sas_port_wait_completion(port);
sas_port_delete(port->port);
port->port = NULL;
} else {
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index a01ca42..c2ef05e 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -242,6 +242,7 @@ static inline void INIT_SAS_WORK(struct sas_work *sw, void (*fn)(struct work_str
struct sas_discovery_event {
struct sas_work work;
struct asd_sas_port *port;
+ enum discover_event type;
};
static inline struct sas_discovery_event *to_sas_discovery_event(struct work_struct *work)
@@ -273,7 +274,9 @@ struct asd_sas_port {
struct sas_work work;
int suspended;
-
+ struct kref ref;
+ struct completion completion;
+ bool is_sync;
/* public: */
int id;
--
2.5.0
Introduce a sync flag to tag discovery event whether need to
sync execute, per-event wait-complete ensure sync.
Signed-off-by: Yijing Wang <[email protected]>
CC: John Garry <[email protected]>
CC: Johannes Thumshirn <[email protected]>
CC: Ewan Milne <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: Tomas Henzl <[email protected]>
CC: Dan Williams <[email protected]>
---
drivers/scsi/libsas/sas_discover.c | 8 ++++++--
drivers/scsi/libsas/sas_expander.c | 12 +++++++++++-
drivers/scsi/libsas/sas_internal.h | 27 +++++++++++++++++++++++++++
include/scsi/libsas.h | 2 ++
4 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index a25d648..d68f8dd 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -378,6 +378,7 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
list_del_init(&dev->disco_list_node);
sas_rphy_free(dev->rphy);
sas_unregister_common_dev(port, dev);
+ sas_disc_cancel_sync(&port->disc.disc_work[DISCE_DESTRUCT]);
return;
}
@@ -541,6 +542,7 @@ static void sas_discover_common_fn(struct work_struct *work)
struct asd_sas_port *port = ev->port;
sas_event_fns[ev->type](work);
+ sas_disc_wakeup(ev);
sas_port_put(port);
}
@@ -571,8 +573,10 @@ static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
else
ret = scsi_queue_work(ha->core.shost, &sw->work);
- if (ret != 1)
+ if (ret != 1) {
sas_port_put(port);
+ sas_disc_cancel_sync(ev);
+ }
}
static void sas_chain_event(int event, unsigned long *pending,
@@ -592,9 +596,9 @@ int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
{
struct sas_discovery *disc;
+ disc = &port->disc;
if (!port)
return 0;
- disc = &port->disc;
BUG_ON(ev >= DISC_NUM_EVENTS);
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 570b2cb..9d26c28 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -822,14 +822,18 @@ static struct domain_device *sas_ex_discover_end_dev(
list_add_tail(&child->disco_list_node, &parent->port->disco_list);
+ sas_disc_wait_init(child->port, DISCE_PROBE);
res = sas_discover_sata(child);
if (res) {
+ sas_disc_cancel_sync(&child->port->disc.disc_work[DISCE_PROBE]);
SAS_DPRINTK("sas_discover_sata() for device %16llx at "
"%016llx:0x%x returned 0x%x\n",
SAS_ADDR(child->sas_addr),
SAS_ADDR(parent->sas_addr), phy_id, res);
goto out_list_del;
}
+ sas_disc_wait_completion(child->port, DISCE_PROBE);
+
} else
#endif
if (phy->attached_tproto & SAS_PROTOCOL_SSP) {
@@ -847,14 +851,17 @@ static struct domain_device *sas_ex_discover_end_dev(
list_add_tail(&child->disco_list_node, &parent->port->disco_list);
+ sas_disc_wait_init(child->port, DISCE_PROBE);
res = sas_discover_end_dev(child);
if (res) {
+ sas_disc_cancel_sync(&child->port->disc.disc_work[DISCE_PROBE]);
SAS_DPRINTK("sas_discover_end_dev() for device %16llx "
"at %016llx:0x%x returned 0x%x\n",
SAS_ADDR(child->sas_addr),
SAS_ADDR(parent->sas_addr), phy_id, res);
goto out_list_del;
}
+ sas_disc_wait_completion(child->port, DISCE_PROBE);
} else {
SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
phy->attached_tproto, SAS_ADDR(parent->sas_addr),
@@ -1890,8 +1897,11 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
if (child->dev_type == SAS_EDGE_EXPANDER_DEVICE ||
child->dev_type == SAS_FANOUT_EXPANDER_DEVICE)
sas_unregister_ex_tree(parent->port, child);
- else
+ else {
+ sas_disc_wait_init(parent->port, DISCE_DESTRUCT);
sas_unregister_dev(parent->port, child);
+ sas_disc_wait_completion(parent->port, DISCE_DESTRUCT);
+ }
found = child;
break;
}
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 890b5d26..09a9b10 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -134,6 +134,33 @@ static inline void sas_port_get(struct asd_sas_port *port)
kref_get(&port->ref);
}
+static inline void sas_disc_cancel_sync(struct sas_discovery_event *event)
+{
+ event->is_sync = false;
+}
+
+static inline void sas_disc_wakeup(struct sas_discovery_event *event)
+{
+ if (event->is_sync)
+ complete(&event->completion);
+}
+
+static inline void sas_disc_wait_init(struct asd_sas_port *port,
+ enum discover_event event)
+{
+ port->disc.disc_work[event].is_sync = true;
+ init_completion(&port->disc.disc_work[event].completion);
+}
+
+static inline void sas_disc_wait_completion(struct asd_sas_port *port,
+ enum discover_event event)
+{
+ if (port->disc.disc_work[event].is_sync) {
+ wait_for_completion(&port->disc.disc_work[event].completion);
+ port->disc.disc_work[event].is_sync = false;
+ }
+}
+
#ifdef CONFIG_SCSI_SAS_HOST_SMP
extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
struct request *rsp);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 4bcb9fe..21e9fb140 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -243,6 +243,8 @@ struct sas_discovery_event {
struct sas_work work;
struct asd_sas_port *port;
enum discover_event type;
+ bool is_sync;
+ struct completion completion;
};
static inline struct sas_discovery_event *to_sas_discovery_event(struct work_struct *work)
--
2.5.0
Disco mutex was introudced to prevent domain rediscovery competing
with ata error handling(87c8331). If we have already hold the lock
in sas_revalidate_domain and sync executing probe, deadlock caused,
because, sas_probe_sata() also need hold disco_mutex. Since disco mutex
use to prevent revalidata domain happen during ata error handler,
it should be safe to release disco mutex when sync probe, because
no new revalidate domain event would be process until the sync return,
and the current sas revalidate domain finish.
Signed-off-by: Yijing Wang <[email protected]>
CC: John Garry <[email protected]>
CC: Johannes Thumshirn <[email protected]>
CC: Ewan Milne <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: Tomas Henzl <[email protected]>
CC: Dan Williams <[email protected]>
---
drivers/scsi/libsas/sas_expander.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 9d26c28..077024e 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -776,6 +776,7 @@ static struct domain_device *sas_ex_discover_end_dev(
struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
struct domain_device *child = NULL;
struct sas_rphy *rphy;
+ bool prev_lock;
int res;
if (phy->attached_sata_host || phy->attached_sata_ps)
@@ -803,6 +804,7 @@ static struct domain_device *sas_ex_discover_end_dev(
sas_ex_get_linkrate(parent, child, phy);
sas_device_set_phy(child, phy->port);
+ prev_lock = mutex_is_locked(&child->port->ha->disco_mutex);
#ifdef CONFIG_SCSI_SAS_ATA
if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) {
res = sas_get_ata_info(child, phy);
@@ -832,7 +834,11 @@ static struct domain_device *sas_ex_discover_end_dev(
SAS_ADDR(parent->sas_addr), phy_id, res);
goto out_list_del;
}
+ if (prev_lock)
+ mutex_unlock(&child->port->ha->disco_mutex);
sas_disc_wait_completion(child->port, DISCE_PROBE);
+ if (prev_lock)
+ mutex_lock(&child->port->ha->disco_mutex);
} else
#endif
@@ -861,7 +867,11 @@ static struct domain_device *sas_ex_discover_end_dev(
SAS_ADDR(parent->sas_addr), phy_id, res);
goto out_list_del;
}
+ if (prev_lock)
+ mutex_unlock(&child->port->ha->disco_mutex);
sas_disc_wait_completion(child->port, DISCE_PROBE);
+ if (prev_lock)
+ mutex_lock(&child->port->ha->disco_mutex);
} else {
SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
phy->attached_tproto, SAS_ADDR(parent->sas_addr),
--
2.5.0
Sometimes, we want sync libsas probe or destruct in sas discovery work,
like when libsas revalidate domain. We need to split probe and destruct
work from the scsi host workqueue.
Signed-off-by: Yijing Wang <[email protected]>
CC: John Garry <[email protected]>
CC: Johannes Thumshirn <[email protected]>
CC: Ewan Milne <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: Tomas Henzl <[email protected]>
CC: Dan Williams <[email protected]>
---
drivers/scsi/libsas/sas_discover.c | 13 ++++++++++++-
drivers/scsi/libsas/sas_init.c | 8 ++++++++
include/scsi/libsas.h | 1 +
3 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 5d4a3a8..a25d648 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -559,7 +559,18 @@ static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
* not racing against draining
*/
sas_port_get(port);
- ret = scsi_queue_work(ha->core.shost, &sw->work);
+ /*
+ * discovery event probe and destruct would be called in other
+ * discovery event like discover domain and revalidate domain
+ * events, in some cases, we need to sync execute probe and destruct
+ * events, so run probe and destruct discover events except in a new
+ * workqueue.
+ */
+ if (ev->type == DISCE_PROBE || ev->type == DISCE_DESTRUCT)
+ ret = queue_work(ha->disc_q, &sw->work);
+ else
+ ret = scsi_queue_work(ha->core.shost, &sw->work);
+
if (ret != 1)
sas_port_put(port);
}
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 2f3b736..df1d78b 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -152,6 +152,13 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
if (!sas_ha->event_q)
goto Undo_ports;
+ snprintf(name, 64, "%s_disc_q", dev_name(sas_ha->dev));
+ sas_ha->disc_q = create_singlethread_workqueue(name);
+ if(!sas_ha->disc_q) {
+ destroy_workqueue(sas_ha->event_q);
+ goto Undo_ports;
+ }
+
INIT_LIST_HEAD(&sas_ha->eh_done_q);
INIT_LIST_HEAD(&sas_ha->eh_ata_q);
@@ -187,6 +194,7 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha)
__sas_drain_work(sas_ha);
mutex_unlock(&sas_ha->drain_mutex);
destroy_workqueue(sas_ha->event_q);
+ destroy_workqueue(sas_ha->disc_q);
return 0;
}
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index c2ef05e..4bcb9fe 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -406,6 +406,7 @@ struct sas_ha_struct {
struct device *dev; /* should be set */
struct module *lldd_module; /* should be set */
struct workqueue_struct *event_q;
+ struct workqueue_struct *disc_q;
u8 *sas_addr; /* must be set */
u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
--
2.5.0
Now all libsas works are queued to scsi host workqueue,
include sas event work post by LLDD and sas discovery
work, and a sas hotplug flow may be divided into several
works, e.g libsas receive a PORTE_BYTES_DMAED event,
now we process it as following steps:
sas_form_port --- run in work in shot workq
sas_discover_domain --- run in another work in shost workq
...
sas_probe_devices --- run in new work in shost workq
We found during hot-add a device, libsas may need run several
works in same workqueue to add device in system, the process is
not atomic, it may interrupt by other sas event works, like
PHYE_LOSS_OF_SIGNAL. Finally, we would found lots unexpected
errors. This patch is preparation of execute libsas sas event
in sync.
Signed-off-by: Yijing Wang <[email protected]>
CC: John Garry <[email protected]>
CC: Johannes Thumshirn <[email protected]>
CC: Ewan Milne <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: Tomas Henzl <[email protected]>
CC: Dan Williams <[email protected]>
---
drivers/scsi/libsas/sas_event.c | 4 ++--
drivers/scsi/libsas/sas_init.c | 7 +++++++
include/scsi/libsas.h | 1 +
3 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index a1370bd..a72a089 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -46,7 +46,7 @@ int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw)
if (list_empty(&sw->drain_node))
list_add(&sw->drain_node, &ha->defer_q);
} else
- rc = scsi_queue_work(ha->core.shost, &sw->work);
+ rc = queue_work(ha->event_q, &sw->work);
return rc;
}
@@ -69,7 +69,7 @@ void __sas_drain_work(struct sas_ha_struct *ha)
{
int ret;
unsigned long flags;
- struct workqueue_struct *wq = ha->core.shost->work_q;
+ struct workqueue_struct *wq = ha->event_q;
struct sas_work *sw, *_sw;
set_bit(SAS_HA_DRAINING, &ha->state);
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index c227a8b..2f3b736 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -115,6 +115,7 @@ void sas_hae_reset(struct work_struct *work)
int sas_register_ha(struct sas_ha_struct *sas_ha)
{
+ char name[64];
int error = 0;
mutex_init(&sas_ha->disco_mutex);
@@ -146,6 +147,11 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
goto Undo_ports;
}
+ snprintf(name, 64, "%s_event_q", dev_name(sas_ha->dev));
+ sas_ha->event_q = create_singlethread_workqueue(name);
+ if (!sas_ha->event_q)
+ goto Undo_ports;
+
INIT_LIST_HEAD(&sas_ha->eh_done_q);
INIT_LIST_HEAD(&sas_ha->eh_ata_q);
@@ -180,6 +186,7 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha)
mutex_lock(&sas_ha->drain_mutex);
__sas_drain_work(sas_ha);
mutex_unlock(&sas_ha->drain_mutex);
+ destroy_workqueue(sas_ha->event_q);
return 0;
}
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 628f48b..a01ca42 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -402,6 +402,7 @@ struct sas_ha_struct {
char *sas_ha_name;
struct device *dev; /* should be set */
struct module *lldd_module; /* should be set */
+ struct workqueue_struct *event_q;
u8 *sas_addr; /* must be set */
u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
--
2.5.0
No one uses the port_gone_completion in struct asd_sas_port,
clean it out.
Signed-off-by: Yijing Wang <[email protected]>
---
include/scsi/libsas.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index c41328d..628f48b 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -263,8 +263,6 @@ struct sas_discovery {
/* The port struct is Class:RW, driver:RO */
struct asd_sas_port {
/* private: */
- struct completion port_gone_completion;
-
struct sas_discovery disc;
struct domain_device *port_dev;
spinlock_t dev_list_lock;
--
2.5.0
Now libsas hotplug work is static, every sas event type has its own
static work, LLDD driver queue the hotplug work into shost->work_q.
If LLDD driver burst post lots hotplug events to libsas, the hotplug
events may pending in the workqueue like
shost->work_q
new work[PORTE_BYTES_DMAED] --> |[PHYE_LOSS_OF_SIGNAL][PORTE_BYTES_DMAED] -> processing
|<-------wait worker to process-------->|
In this case, a new PORTE_BYTES_DMAED event coming, libsas try to queue it
to shost->work_q, but this work is already pending, so it would be lost.
Finally, libsas delete the related sas port and sas devices, but LLDD driver
expect libsas add the sas port and devices(last sas event).
This patch and use static sas event work pool to appease this issue, since
it's static work pool, it won't make memory exhaust.
Signed-off-by: Yijing Wang <[email protected]>
CC: John Garry <[email protected]>
CC: Johannes Thumshirn <[email protected]>
CC: Ewan Milne <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: Tomas Henzl <[email protected]>
CC: Dan Williams <[email protected]>
---
drivers/scsi/libsas/sas_event.c | 208 ++++++++++++++++++++++++++++++++-----
drivers/scsi/libsas/sas_init.c | 6 --
drivers/scsi/libsas/sas_internal.h | 3 +
drivers/scsi/libsas/sas_phy.c | 48 +++------
drivers/scsi/libsas/sas_port.c | 18 ++--
include/scsi/libsas.h | 16 +--
6 files changed, 216 insertions(+), 83 deletions(-)
diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
index c0d0d97..a1370bd 100644
--- a/drivers/scsi/libsas/sas_event.c
+++ b/drivers/scsi/libsas/sas_event.c
@@ -27,13 +27,20 @@
#include "sas_internal.h"
#include "sas_dump.h"
+static DEFINE_SPINLOCK(sas_event_lock);
+
+static const work_func_t sas_ha_event_fns[HA_NUM_EVENTS] = {
+ [HAE_RESET] = sas_hae_reset,
+};
+
int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw)
{
int rc = 0;
if (!test_bit(SAS_HA_REGISTERED, &ha->state))
- return 0;
+ return rc;
+ rc = 1;
if (test_bit(SAS_HA_DRAINING, &ha->state)) {
/* add it to the defer list, if not already pending */
if (list_empty(&sw->drain_node))
@@ -44,19 +51,15 @@ int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw)
return rc;
}
-static int sas_queue_event(int event, unsigned long *pending,
- struct sas_work *work,
+static int sas_queue_event(int event, struct sas_work *work,
struct sas_ha_struct *ha)
{
int rc = 0;
+ unsigned long flags;
- if (!test_and_set_bit(event, pending)) {
- unsigned long flags;
-
- spin_lock_irqsave(&ha->lock, flags);
- rc = sas_queue_work(ha, work);
- spin_unlock_irqrestore(&ha->lock, flags);
- }
+ spin_lock_irqsave(&ha->lock, flags);
+ rc = sas_queue_work(ha, work);
+ spin_unlock_irqrestore(&ha->lock, flags);
return rc;
}
@@ -64,6 +67,8 @@ static int sas_queue_event(int event, unsigned long *pending,
void __sas_drain_work(struct sas_ha_struct *ha)
{
+ int ret;
+ unsigned long flags;
struct workqueue_struct *wq = ha->core.shost->work_q;
struct sas_work *sw, *_sw;
@@ -78,7 +83,12 @@ void __sas_drain_work(struct sas_ha_struct *ha)
clear_bit(SAS_HA_DRAINING, &ha->state);
list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) {
list_del_init(&sw->drain_node);
- sas_queue_work(ha, sw);
+ ret = sas_queue_work(ha, sw);
+ if (ret != 1) {
+ spin_lock_irqsave(&sas_event_lock, flags);
+ sw->used = false;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ }
}
spin_unlock_irq(&ha->lock);
}
@@ -119,51 +129,197 @@ void sas_enable_revalidation(struct sas_ha_struct *ha)
if (!test_and_clear_bit(ev, &d->pending))
continue;
- sas_queue_event(ev, &d->pending, &d->disc_work[ev].work, ha);
+ sas_queue_event(ev, &d->disc_work[ev].work, ha);
}
mutex_unlock(&ha->disco_mutex);
}
+static void sas_free_ha_event(struct sas_ha_event *event)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&sas_event_lock, flags);
+ event->work.used = false;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+}
+
+static void sas_free_port_event(struct asd_sas_event *event)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&sas_event_lock, flags);
+ event->work.used = false;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+}
+
+static void sas_free_phy_event(struct asd_sas_event *event)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&sas_event_lock, flags);
+ event->work.used = false;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+}
+
+static void sas_ha_event_worker(struct work_struct *work)
+{
+ struct sas_ha_event *ev = to_sas_ha_event(work);
+
+ sas_ha_event_fns[ev->type](work);
+ sas_free_ha_event(ev);
+}
+
+static void sas_port_event_worker(struct work_struct *work)
+{
+ struct asd_sas_event *ev = to_asd_sas_event(work);
+
+ sas_port_event_fns[ev->type](work);
+ sas_free_port_event(ev);
+}
+
+static void sas_phy_event_worker(struct work_struct *work)
+{
+ struct asd_sas_event *ev = to_asd_sas_event(work);
+
+ sas_phy_event_fns[ev->type](work);
+ sas_free_phy_event(ev);
+}
+
+static struct sas_ha_event *sas_alloc_ha_event(struct sas_ha_struct *sas_ha)
+{
+ int i;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sas_event_lock, flags);
+ for (i = 0; i < HA_NUM_EVENTS; i++)
+ if (!sas_ha->ha_events[i].work.used)
+ break;
+
+ if (i == HA_NUM_EVENTS) {
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ return NULL;
+ }
+
+ sas_ha->ha_events[i].work.used = true;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ return &sas_ha->ha_events[i];
+}
+
static int notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event)
{
+ int ret;
+ struct sas_ha_event *ev;
+
BUG_ON(event >= HA_NUM_EVENTS);
- return sas_queue_event(event, &sas_ha->pending,
- &sas_ha->ha_events[event].work, sas_ha);
+ ev = sas_alloc_ha_event(sas_ha);
+ if (!ev) {
+ pr_err("%s: alloc sas ha event fail!\n", __func__);
+ return 0;
+ }
+
+ INIT_SAS_WORK(&ev->work, sas_ha_event_worker);
+ ev->ha = sas_ha;
+ ev->type = event;
+ ret = sas_queue_event(event, &ev->work, sas_ha);
+ if (ret != 1)
+ sas_free_ha_event(ev);
+
+ return ret;
+}
+
+struct asd_sas_event *sas_alloc_port_event(struct asd_sas_phy *phy)
+{
+ int i;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sas_event_lock, flags);
+ for (i = 0; i < PORT_POOL_SIZE; i++)
+ {
+ if (!phy->port_events[i].work.used)
+ break;
+ }
+
+ if (i == PORT_POOL_SIZE) {
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ return NULL;
+ }
+
+ phy->port_events[i].work.used = true;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ return &phy->port_events[i];
}
static int notify_port_event(struct asd_sas_phy *phy, enum port_event event)
{
+ int ret;
+ struct asd_sas_event *ev;
struct sas_ha_struct *ha = phy->ha;
BUG_ON(event >= PORT_NUM_EVENTS);
- return sas_queue_event(event, &phy->port_events_pending,
- &phy->port_events[event].work, ha);
+ ev = sas_alloc_port_event(phy);
+ if (!ev) {
+ pr_err("%s: alloc sas port event fail!\n", __func__);
+ return 0;
+ }
+
+ INIT_SAS_WORK(&ev->work, sas_port_event_worker);
+ ev->phy = phy;
+ ev->type = event;
+ ret = sas_queue_event(event, &ev->work, ha);
+ if (ret != 1)
+ sas_free_port_event(ev);
+
+ return ret;
}
+struct asd_sas_event *sas_alloc_phy_event(struct asd_sas_phy *phy)
+{
+ int i;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sas_event_lock, flags);
+ for (i = 0; i < PHY_POOL_SIZE; i++)
+ if (!phy->phy_events[i].work.used)
+ break;
+
+ if (i == PHY_POOL_SIZE) {
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ return NULL;
+ }
+
+ phy->phy_events[i].work.used = true;
+ spin_unlock_irqrestore(&sas_event_lock, flags);
+ return &phy->phy_events[i];
+}
int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
{
+ int ret;
+ struct asd_sas_event *ev;
struct sas_ha_struct *ha = phy->ha;
BUG_ON(event >= PHY_NUM_EVENTS);
- return sas_queue_event(event, &phy->phy_events_pending,
- &phy->phy_events[event].work, ha);
+ ev = sas_alloc_phy_event(phy);
+ if (!ev) {
+ pr_err("%s: alloc sas phy event fail!\n", __func__);
+ return 0;
+ }
+
+ INIT_SAS_WORK(&ev->work, sas_phy_event_worker);
+ ev->phy = phy;
+ ev->type = event;
+ ret = sas_queue_event(event, &ev->work, ha);
+ if (ret != 1)
+ sas_free_phy_event(ev);
+
+ return ret;
}
int sas_init_events(struct sas_ha_struct *sas_ha)
{
- static const work_func_t sas_ha_event_fns[HA_NUM_EVENTS] = {
- [HAE_RESET] = sas_hae_reset,
- };
-
int i;
- for (i = 0; i < HA_NUM_EVENTS; i++) {
- INIT_SAS_WORK(&sas_ha->ha_events[i].work, sas_ha_event_fns[i]);
- sas_ha->ha_events[i].ha = sas_ha;
- }
+ for (i = 0; i < HA_NUM_EVENTS; i++)
+ sas_ha->ha_events[i].work.used = false;
sas_ha->notify_ha_event = notify_ha_event;
sas_ha->notify_port_event = notify_port_event;
diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index 64e9cdd..c227a8b 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -111,10 +111,6 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
void sas_hae_reset(struct work_struct *work)
{
- struct sas_ha_event *ev = to_sas_ha_event(work);
- struct sas_ha_struct *ha = ev->ha;
-
- clear_bit(HAE_RESET, &ha->pending);
}
int sas_register_ha(struct sas_ha_struct *sas_ha)
@@ -375,8 +371,6 @@ void sas_prep_resume_ha(struct sas_ha_struct *ha)
struct asd_sas_phy *phy = ha->sas_phy[i];
memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
- phy->port_events_pending = 0;
- phy->phy_events_pending = 0;
phy->frame_rcvd_size = 0;
}
}
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index a216c95..f03ce64 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -97,6 +97,9 @@ void sas_hae_reset(struct work_struct *work);
void sas_free_device(struct kref *kref);
+extern const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS];
+extern const work_func_t sas_port_event_fns[PORT_NUM_EVENTS];
+
#ifdef CONFIG_SCSI_SAS_HOST_SMP
extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
struct request *rsp);
diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
index cdee446c..07766ad 100644
--- a/drivers/scsi/libsas/sas_phy.c
+++ b/drivers/scsi/libsas/sas_phy.c
@@ -35,7 +35,6 @@ static void sas_phye_loss_of_signal(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PHYE_LOSS_OF_SIGNAL, &phy->phy_events_pending);
phy->error = 0;
sas_deform_port(phy, 1);
}
@@ -45,7 +44,6 @@ static void sas_phye_oob_done(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PHYE_OOB_DONE, &phy->phy_events_pending);
phy->error = 0;
}
@@ -58,8 +56,6 @@ static void sas_phye_oob_error(struct work_struct *work)
struct sas_internal *i =
to_sas_internal(sas_ha->core.shost->transportt);
- clear_bit(PHYE_OOB_ERROR, &phy->phy_events_pending);
-
sas_deform_port(phy, 1);
if (!port && phy->enabled && i->dft->lldd_control_phy) {
@@ -88,8 +84,6 @@ static void sas_phye_spinup_hold(struct work_struct *work)
struct sas_internal *i =
to_sas_internal(sas_ha->core.shost->transportt);
- clear_bit(PHYE_SPINUP_HOLD, &phy->phy_events_pending);
-
phy->error = 0;
i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD, NULL);
}
@@ -99,8 +93,6 @@ static void sas_phye_resume_timeout(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PHYE_RESUME_TIMEOUT, &phy->phy_events_pending);
-
/* phew, lldd got the phy back in the nick of time */
if (!phy->suspended) {
dev_info(&phy->phy->dev, "resume timeout cancelled\n");
@@ -112,30 +104,12 @@ static void sas_phye_resume_timeout(struct work_struct *work)
sas_deform_port(phy, 1);
}
-
/* ---------- Phy class registration ---------- */
int sas_register_phys(struct sas_ha_struct *sas_ha)
{
int i;
- static const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
- [PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
- [PHYE_OOB_DONE] = sas_phye_oob_done,
- [PHYE_OOB_ERROR] = sas_phye_oob_error,
- [PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
- [PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
-
- };
-
- static const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = {
- [PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
- [PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
- [PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
- [PORTE_TIMER_EVENT] = sas_porte_timer_event,
- [PORTE_HARD_RESET] = sas_porte_hard_reset,
- };
-
/* Now register the phys. */
for (i = 0; i < sas_ha->num_phys; i++) {
int k;
@@ -143,15 +117,12 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
phy->error = 0;
INIT_LIST_HEAD(&phy->port_phy_el);
- for (k = 0; k < PORT_NUM_EVENTS; k++) {
- INIT_SAS_WORK(&phy->port_events[k].work, sas_port_event_fns[k]);
- phy->port_events[k].phy = phy;
- }
- for (k = 0; k < PHY_NUM_EVENTS; k++) {
- INIT_SAS_WORK(&phy->phy_events[k].work, sas_phy_event_fns[k]);
- phy->phy_events[k].phy = phy;
- }
+ for (k = 0; k < PORT_POOL_SIZE; k++)
+ phy->port_events[k].work.used = false;
+
+ for (k = 0; k < PHY_POOL_SIZE; k++)
+ phy->phy_events[k].work.used = false;
phy->port = NULL;
phy->ha = sas_ha;
@@ -179,3 +150,12 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
return 0;
}
+
+const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
+ [PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
+ [PHYE_OOB_DONE] = sas_phye_oob_done,
+ [PHYE_OOB_ERROR] = sas_phye_oob_error,
+ [PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
+ [PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
+
+};
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index d3c5297..9326628 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -261,8 +261,6 @@ void sas_porte_bytes_dmaed(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PORTE_BYTES_DMAED, &phy->port_events_pending);
-
sas_form_port(phy);
}
@@ -273,8 +271,6 @@ void sas_porte_broadcast_rcvd(struct work_struct *work)
unsigned long flags;
u32 prim;
- clear_bit(PORTE_BROADCAST_RCVD, &phy->port_events_pending);
-
spin_lock_irqsave(&phy->sas_prim_lock, flags);
prim = phy->sas_prim;
spin_unlock_irqrestore(&phy->sas_prim_lock, flags);
@@ -288,8 +284,6 @@ void sas_porte_link_reset_err(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PORTE_LINK_RESET_ERR, &phy->port_events_pending);
-
sas_deform_port(phy, 1);
}
@@ -298,8 +292,6 @@ void sas_porte_timer_event(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PORTE_TIMER_EVENT, &phy->port_events_pending);
-
sas_deform_port(phy, 1);
}
@@ -308,8 +300,6 @@ void sas_porte_hard_reset(struct work_struct *work)
struct asd_sas_event *ev = to_asd_sas_event(work);
struct asd_sas_phy *phy = ev->phy;
- clear_bit(PORTE_HARD_RESET, &phy->port_events_pending);
-
sas_deform_port(phy, 1);
}
@@ -353,3 +343,11 @@ void sas_unregister_ports(struct sas_ha_struct *sas_ha)
sas_deform_port(sas_ha->sas_phy[i], 0);
}
+
+const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = {
+ [PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
+ [PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
+ [PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
+ [PORTE_TIMER_EVENT] = sas_porte_timer_event,
+ [PORTE_HARD_RESET] = sas_porte_hard_reset,
+};
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index cfaeed2..c41328d 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -229,6 +229,8 @@ struct domain_device {
struct sas_work {
struct list_head drain_node;
struct work_struct work;
+ struct list_head list;
+ bool used;
};
static inline void INIT_SAS_WORK(struct sas_work *sw, void (*fn)(struct work_struct *))
@@ -300,6 +302,7 @@ struct asd_sas_port {
struct asd_sas_event {
struct sas_work work;
struct asd_sas_phy *phy;
+ int type;
};
static inline struct asd_sas_event *to_asd_sas_event(struct work_struct *work)
@@ -309,16 +312,16 @@ static inline struct asd_sas_event *to_asd_sas_event(struct work_struct *work)
return ev;
}
+#define PORT_POOL_SIZE (PORT_NUM_EVENTS * 5)
+#define PHY_POOL_SIZE (PHY_NUM_EVENTS * 5)
+
/* The phy pretty much is controlled by the LLDD.
* The class only reads those fields.
*/
struct asd_sas_phy {
/* private: */
- struct asd_sas_event port_events[PORT_NUM_EVENTS];
- struct asd_sas_event phy_events[PHY_NUM_EVENTS];
-
- unsigned long port_events_pending;
- unsigned long phy_events_pending;
+ struct asd_sas_event port_events[PORT_POOL_SIZE];
+ struct asd_sas_event phy_events[PHY_POOL_SIZE];
int error;
int suspended;
@@ -365,6 +368,7 @@ struct scsi_core {
struct sas_ha_event {
struct sas_work work;
struct sas_ha_struct *ha;
+ int type;
};
static inline struct sas_ha_event *to_sas_ha_event(struct work_struct *work)
@@ -384,8 +388,6 @@ enum sas_ha_state {
struct sas_ha_struct {
/* private: */
struct sas_ha_event ha_events[HA_NUM_EVENTS];
- unsigned long pending;
-
struct list_head defer_q; /* work queued while draining */
struct mutex drain_mutex;
unsigned long state;
--
2.5.0
On 10/07/2017 08:06, Yijing Wang wrote:
> Now libsas hotplug work is static, every sas event type has its own
> static work, LLDD driver queue the hotplug work into shost->work_q.
> If LLDD driver burst post lots hotplug events to libsas, the hotplug
> events may pending in the workqueue like
>
> shost->work_q
> new work[PORTE_BYTES_DMAED] --> |[PHYE_LOSS_OF_SIGNAL][PORTE_BYTES_DMAED] -> processing
> |<-------wait worker to process-------->|
> In this case, a new PORTE_BYTES_DMAED event coming, libsas try to queue it
> to shost->work_q, but this work is already pending, so it would be lost.
> Finally, libsas delete the related sas port and sas devices, but LLDD driver
> expect libsas add the sas port and devices(last sas event).
>
> This patch and use static sas event work pool to appease this issue, since
> it's static work pool, it won't make memory exhaust.
>
[ ... ]
>
> +#define PORT_POOL_SIZE (PORT_NUM_EVENTS * 5)
> +#define PHY_POOL_SIZE (PHY_NUM_EVENTS * 5)
> +
> /* The phy pretty much is controlled by the LLDD.
> * The class only reads those fields.
> */
> struct asd_sas_phy {
> /* private: */
> - struct asd_sas_event port_events[PORT_NUM_EVENTS];
> - struct asd_sas_event phy_events[PHY_NUM_EVENTS];
> -
> - unsigned long port_events_pending;
> - unsigned long phy_events_pending;
> + struct asd_sas_event port_events[PORT_POOL_SIZE];
> + struct asd_sas_event phy_events[PHY_POOL_SIZE];
>
> int error;
Hi Yijing,
So now we are creating a static pool of events per PHY/port, instead of
having 1 static work struct per event per PHY/port. So, for sure, this
avoids the dynamic event issue of system memory exhaustion which we
discussed in v1+v2 series. And it seems to possibly remove issue of
losing SAS events.
But how did you determine the pool size for a PHY/port? It would seem to
be 5 * #phy events or #port events (which is also 5, I figure by
coincidence). How does this deal with flutter of >25 events?
Thanks,
John
On 10/07/2017 08:06, Yijing Wang wrote:
> No one uses the port_gone_completion in struct asd_sas_port,
> clean it out.
This seems like a reasonable tidy-up patch which could be taken in
isolation, having no dependency on the rest of the series.
>
> Signed-off-by: Yijing Wang <[email protected]>
> ---
> include/scsi/libsas.h | 2 --
> 1 file changed, 2 deletions(-)
>
> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
> index c41328d..628f48b 100644
> --- a/include/scsi/libsas.h
> +++ b/include/scsi/libsas.h
> @@ -263,8 +263,6 @@ struct sas_discovery {
> /* The port struct is Class:RW, driver:RO */
> struct asd_sas_port {
> /* private: */
> - struct completion port_gone_completion;
> -
> struct sas_discovery disc;
> struct domain_device *port_dev;
> spinlock_t dev_list_lock;
>
>> - unsigned long port_events_pending;
>> - unsigned long phy_events_pending;
>> + struct asd_sas_event port_events[PORT_POOL_SIZE];
>> + struct asd_sas_event phy_events[PHY_POOL_SIZE];
>>
>> int error;
>
> Hi Yijing,
>
> So now we are creating a static pool of events per PHY/port, instead of having 1 static work struct per event per PHY/port. So, for sure, this avoids the dynamic event issue of system memory exhaustion which we discussed in v1+v2 series. And it seems to possibly remove issue of losing SAS events.
>
> But how did you determine the pool size for a PHY/port? It would seem to be 5 * #phy events or #port events (which is also 5, I figure by coincidence). How does this deal with flutter of >25 events?
There is no special meaning for the pool size, if flutter of > 25 events, notify sas events will return error, and the further step work is depending on LLDD drivers.
I hope libsas could do more work in this case, but now it seems a little difficult, this patch may be a interim fix, until we find a perfect solution.
Thanks!
Yijing.
>
> Thanks,
> John
>
>
> .
>
在 2017/7/11 23:54, John Garry 写道:
> On 10/07/2017 08:06, Yijing Wang wrote:
>> No one uses the port_gone_completion in struct asd_sas_port,
>> clean it out.
>
> This seems like a reasonable tidy-up patch which could be taken in isolation, having no dependency on the rest of the series.
Yes.
>
>>
>> Signed-off-by: Yijing Wang <[email protected]>
>> ---
>> include/scsi/libsas.h | 2 --
>> 1 file changed, 2 deletions(-)
>>
>> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
>> index c41328d..628f48b 100644
>> --- a/include/scsi/libsas.h
>> +++ b/include/scsi/libsas.h
>> @@ -263,8 +263,6 @@ struct sas_discovery {
>> /* The port struct is Class:RW, driver:RO */
>> struct asd_sas_port {
>> /* private: */
>> - struct completion port_gone_completion;
>> -
>> struct sas_discovery disc;
>> struct domain_device *port_dev;
>> spinlock_t dev_list_lock;
>>
>
>
>
> .
>
On 12/07/2017 03:06, wangyijing wrote:
>>> - unsigned long port_events_pending;
>>> - unsigned long phy_events_pending;
>>> + struct asd_sas_event port_events[PORT_POOL_SIZE];
>>> + struct asd_sas_event phy_events[PHY_POOL_SIZE];
>>>
>>> int error;
>>
>> Hi Yijing,
>>
>> So now we are creating a static pool of events per PHY/port, instead of having 1 static work struct per event per PHY/port. So, for sure, this avoids the dynamic event issue of system memory exhaustion which we discussed in v1+v2 series. And it seems to possibly remove issue of losing SAS events.
>>
>> But how did you determine the pool size for a PHY/port? It would seem to be 5 * #phy events or #port events (which is also 5, I figure by coincidence). How does this deal with flutter of >25 events?
>
> There is no special meaning for the pool size, if flutter of > 25 events, notify sas events will return error, and the further step work is depending on LLDD drivers.
> I hope libsas could do more work in this case, but now it seems a little difficult, this patch may be a interim fix, until we find a perfect solution.
The principal of having a fixed-sized pool is ok, even though the pool
size needs more consideration.
However my issue is how to handle pool exhaustion. For a start, relaying
info to the LLDD that the event notification failed is probably not the
way to go. I only now noticed "scsi: sas: scsi_queue_work can fail, so
make callers aware" made it into the kernel; as I mentioned in response
to this patch, the LLDD does not know how to handle this (and no LLDDs
do actually handle this).
I would say it is better to shut down the PHY from libsas (As Dan
mentioned in the v1 series) when the pool exhausts, under the assumption
that the PHY has gone into some erroneous state. The user can later
re-enable the PHY from sysfs, if required.
Much appreciated,
John
>
> Thanks!
> Yijing.
>
>>
>> Thanks,
>> John
>>
>>
>> .
>>
>
>
> .
>
在 2017/7/12 16:17, John Garry 写道:
> On 12/07/2017 03:06, wangyijing wrote:
>>>> - unsigned long port_events_pending;
>>>> - unsigned long phy_events_pending;
>>>> + struct asd_sas_event port_events[PORT_POOL_SIZE];
>>>> + struct asd_sas_event phy_events[PHY_POOL_SIZE];
>>>>
>>>> int error;
>>>
>>> Hi Yijing,
>>>
>>> So now we are creating a static pool of events per PHY/port, instead of having 1 static work struct per event per PHY/port. So, for sure, this avoids the dynamic event issue of system memory exhaustion which we discussed in v1+v2 series. And it seems to possibly remove issue of losing SAS events.
>>>
>>> But how did you determine the pool size for a PHY/port? It would seem to be 5 * #phy events or #port events (which is also 5, I figure by coincidence). How does this deal with flutter of >25 events?
>>
>> There is no special meaning for the pool size, if flutter of > 25 events, notify sas events will return error, and the further step work is depending on LLDD drivers.
>> I hope libsas could do more work in this case, but now it seems a little difficult, this patch may be a interim fix, until we find a perfect solution.
>
> The principal of having a fixed-sized pool is ok, even though the pool size needs more consideration.
>
> However my issue is how to handle pool exhaustion. For a start, relaying info to the LLDD that the event notification failed is probably not the way to go. I only now noticed "scsi: sas: scsi_queue_work can fail, so make callers aware" made it into the kernel; as I mentioned in response to this patch, the LLDD does not know how to handle this (and no LLDDs do actually handle this).
>
> I would say it is better to shut down the PHY from libsas (As Dan mentioned in the v1 series) when the pool exhausts, under the assumption that the PHY has gone into some erroneous state. The user can later re-enable the PHY from sysfs, if required.
I considered this suggestion, and what I am worried about are, first if we disable phy once the sas event pool exhausts, it may hurt the pending sas event process which has been queued,
second, if phy was disabled, and no one trigger the reenable by sysfs, the LLDD has no way to post new sas phy events.
Thanks!
Yijing.
>
> Much appreciated,
> John
>
>>
>> Thanks!
>> Yijing.
>>
>>>
>>> Thanks,
>>> John
>>>
>>>
>>> .
>>>
>>
>>
>> .
>>
>
>
>
> .
>
On 10/07/2017 08:06, Yijing Wang wrote:
> This patchset is based Johannes's patch
> "scsi: sas: scsi_queue_work can fail, so make callers aware"
>
> Now the libsas hotplug has some issues, Dan Williams report
> a similar bug here before
> https://www.mail-archive.com/[email protected]/msg39187.html
>
> The issues we have found
> 1. if LLDD burst reports lots of phy-up/phy-down sas events, some events
> may lost because a same sas events is pending now, finally libsas topo
> may different the hardware.
> 2. receive a phy down sas event, libsas call sas_deform_port to remove
> devices, it would first delete the sas port, then put a destruction
> discovery event in a new work, and queue it at the tail of workqueue,
> once the sas port be deleted, its children device will be deleted too,
> when the destruction work start, it will found the target device has
> been removed, and report a sysfs warnning.
> 3. since a hotplug process will be devided into several works, if a phy up
> sas event insert into phydown works, like
> destruction work ---> PORTE_BYTES_DMAED (sas_form_port) ---->PHYE_LOSS_OF_SIGNAL
> the hot remove flow would broken by PORTE_BYTES_DMAED event, it's not
> we expected, and issues would occur.
>
> The first patch fix the sas events lost, and the second one introudce wait-complete
> to fix the hotplug order issues.
>
I quickly tested this for basic hotplug.
Before:
root@(none)$ echo 0 > ./phy-0:6/sas_phy/phy-0:6/enable
root@(none)$ echo 0 > ./phy-0:5/sas_phy/phy-0:5/enable
root@(none)$ echo 0 > ./phy-0:4/sas_phy/phy-0:4/enable
root@(none)$ echo 0 > ./phy-0:3/sas_phy/phy-0:3/enable
root@(none)$ echo 0 > ./phy-0:3/sas_phy/phy-0:2/enable
root@(none)$ echo 0 > ./phy-0:2/sas_phy/phy-0:2/enable
root@(none)$ echo 0 > ./phy-0:1/sas_phy/phy-0:1/enable
root@(none)$ echo 0 > ./phy-0:0/sas_phy/phy-0:0/enable
root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
root@(none)$ [ 102.570694] sysfs group 'power' not found for kobject
'0:0:7:0'
[ 102.577250] ------------[ cut here ]------------
[ 102.581861] WARNING: CPU: 3 PID: 1740 at fs/sysfs/group.c:237
sysfs_remove_group+0x8c/0x94
[ 102.590110] Modules linked in:
[ 102.593154] CPU: 3 PID: 1740 Comm: kworker/u128:2 Not tainted
4.12.0-rc1-00032-g3ab81fc #1907
[ 102.601664] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon
D05 UEFI Nemo 1.7 RC3 06/23/2017
[ 102.610784] Workqueue: scsi_wq_0 sas_destruct_devices
[ 102.615822] task: ffff8017d4793400 task.stack: ffff8017b7e70000
[ 102.621728] PC is at sysfs_remove_group+0x8c/0x94
[ 102.626419] LR is at sysfs_remove_group+0x8c/0x94
[ 102.631109] pc : [<ffff000008267c44>] lr : [<ffff000008267c44>]
pstate: 60000045
[ 102.638490] sp : ffff8017b7e73b80
[ 102.641791] x29: ffff8017b7e73b80 x28: ffff8017db010800
[ 102.647091] x27: ffff000008e27000 x26: ffff8017d43e6600
[ 102.652390] x25: ffff8017b8280000 x24: 0000000000000003
[ 102.657689] x23: ffff8017b78864b0 x22: ffff8017b784c988
[ 102.662988] x21: ffff8017b7886410 x20: ffff000008ee9dd0
[ 102.668288] x19: 0000000000000000 x18: ffff000008a1b678
[ 102.673587] x17: 000000000000000e x16: 0000000000000007
[ 102.678886] x15: 0000000000000000 x14: 00000000000000a3
[ 102.684185] x13: 0000000000000033 x12: 0000000000000028
[ 102.689484] x11: ffff000008f3be58 x10: 0000000000000000
[ 102.694783] x9 : 000000000000043c x8 : 6f6b20726f662064
[ 102.700082] x7 : ffff000008e29e08 x6 : ffff8017fbe34c50
[ 102.705382] x5 : 0000000000000000 x4 : 0000000000000000
[ 102.710681] x3 : ffffffffffffffff x2 : ffff000008e427e0
[ 102.715980] x1 : 0000000000000000 x0 : 0000000000000033
[ 102.721279] ---[ end trace c216cc1451d5f7ec ]---
[ 102.725882] Call trace:
[ 102.728316] Exception stack(0xffff8017b7e739b0 to 0xffff8017b7e73ae0)
[ 102.734742] 39a0: 0000000000000000
0001000000000000
[ 102.742557] 39c0: ffff8017b7e73b80 ffff000008267c44 ffff000008bfa050
0000000000000000
[ 102.750372] 39e0: ffff8017b78864b0 0000000000000003 ffff8017b8280000
ffff8017d43e6600
[ 102.758188] 3a00: ffff000008e27000 ffff8017db010800 ffff8017d4793400
0000000000000000
[ 102.766003] 3a20: ffff8017b7e73b80 ffff8017b7e73b80 ffff8017b7e73b40
00000000ffffffc8
[ 102.773818] 3a40: ffff8017b7e73a70 ffff00000810c12c 0000000000000033
0000000000000000
[ 102.781633] 3a60: ffff000008e427e0 ffffffffffffffff 0000000000000000
0000000000000000
[ 102.789449] 3a80: ffff8017fbe34c50 ffff000008e29e08 6f6b20726f662064
000000000000043c
[ 102.797264] 3aa0: 0000000000000000 ffff000008f3be58 0000000000000028
0000000000000033
[ 102.805079] 3ac0: 00000000000000a3 0000000000000000 0000000000000007
000000000000000e
[ 102.812895] [<ffff000008267c44>] sysfs_remove_group+0x8c/0x94
[ 102.818628] [<ffff00000855b14c>] dpm_sysfs_remove+0x58/0x68
[ 102.824188] [<ffff00000854e0e8>] device_del+0xf8/0x2d0
[ 102.829312] [<ffff00000854e2d4>] device_unregister+0x14/0x2c
[ 102.834959] [<ffff00000837e6e0>] bsg_unregister_queue+0x60/0x98
[ 102.840866] [<ffff000008593cd4>] __scsi_remove_device+0xa0/0xbc
<snip>
[ 151.331854] 3bc0: ffff0000081f21ac 0000ffff803370c0
[ 151.336718] [<ffff000008267c44>] sysfs_remove_group+0x8c/0x94
[ 151.342449] [<ffff00000855b14c>] dpm_sysfs_remove+0x58/0x68
[ 151.348008] [<ffff00000854e0e8>] device_del+0xf8/0x2d0
[ 151.353133] [<ffff000008597278>] sas_rphy_remove+0x54/0x80
[ 151.358604] [<ffff0000085972b8>] sas_rphy_delete+0x14/0x28
[ 151.364076] [<ffff00000859b304>] sas_destruct_devices+0x64/0x98
[ 151.369982] [<ffff0000080d8194>] process_one_work+0x12c/0x28c
[ 151.375714] [<ffff0000080d834c>] worker_thread+0x58/0x3b8
[ 151.381100] [<ffff0000080ddee4>] kthread+0x100/0x12c
[ 151.386050] [<ffff0000080836c0>] ret_from_fork+0x10/0x50
[ 151.391360] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
root@(none)$
So the console locks for ~50 seconds with WARN garbage.
After:
...
root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
root@(none)$ [ 446.193336] hisi_sas_v2_hw HISI0162:01: found dev[8:1]
is gone
[ 446.249205] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
[ 446.325201] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
[ 446.373189] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
[ 446.421187] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
[ 446.457232] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
[ 446.477151] sd 0:0:1:0: [sdb] Synchronizing SCSI cache
[ 446.482373] sd 0:0:1:0: [sdb] Synchronize Cache(10) failed: Result:
hostbyte=0x04 driverbyte=0x00
[ 446.491238] sd 0:0:1:0: [sdb] Stopping disk
[ 446.495419] sd 0:0:1:0: [sdb] Start/Stop Unit failed: Result:
hostbyte=0x04 driverbyte=0x00
[ 446.525227] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
[ 446.569249] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
[ 446.576872] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
root@(none)$
So much nicer. BTW, /dev/sdb is a SATA disk, the rest are SAS.
John
> v2->v3: some code improvements suggested by Johannes and John,
> split v2 patch 2 into several small pathes.
> v1->v2: some code improvements suggested by John Garry
>
> Yijing Wang (7):
> libsas: Use static sas event pool to appease sas event lost
> libsas: remove unused port_gone_completion
> libsas: Use new workqueue to run sas event
> libsas: add sas event wait-complete support
> libsas: add a new workqueue to run probe/destruct discovery event
> libsas: add wait-complete support to sync discovery event
> libsas: release disco mutex during waiting in sas_ex_discover_end_dev
>
> drivers/scsi/libsas/sas_discover.c | 58 +++++++---
> drivers/scsi/libsas/sas_event.c | 212 ++++++++++++++++++++++++++++++++-----
> drivers/scsi/libsas/sas_expander.c | 22 +++-
> drivers/scsi/libsas/sas_init.c | 21 ++--
> drivers/scsi/libsas/sas_internal.h | 64 +++++++++++
> drivers/scsi/libsas/sas_phy.c | 48 +++------
> drivers/scsi/libsas/sas_port.c | 22 ++--
> include/scsi/libsas.h | 27 +++--
> 8 files changed, 373 insertions(+), 101 deletions(-)
>
On 12/07/2017 09:47, wangyijing wrote:
>
>
> 在 2017/7/12 16:17, John Garry 写道:
>> On 12/07/2017 03:06, wangyijing wrote:
>>>>> - unsigned long port_events_pending;
>>>>> - unsigned long phy_events_pending;
>>>>> + struct asd_sas_event port_events[PORT_POOL_SIZE];
>>>>> + struct asd_sas_event phy_events[PHY_POOL_SIZE];
>>>>>
>>>>> int error;
>>>>
>>>> Hi Yijing,
>>>>
>>>> So now we are creating a static pool of events per PHY/port, instead of having 1 static work struct per event per PHY/port. So, for sure, this avoids the dynamic event issue of system memory exhaustion which we discussed in v1+v2 series. And it seems to possibly remove issue of losing SAS events.
>>>>
>>>> But how did you determine the pool size for a PHY/port? It would seem to be 5 * #phy events or #port events (which is also 5, I figure by coincidence). How does this deal with flutter of >25 events?
>>>
>>> There is no special meaning for the pool size, if flutter of > 25 events, notify sas events will return error, and the further step work is depending on LLDD drivers.
>>> I hope libsas could do more work in this case, but now it seems a little difficult, this patch may be a interim fix, until we find a perfect solution.
>>
>> The principal of having a fixed-sized pool is ok, even though the pool size needs more consideration.
>>
>> However my issue is how to handle pool exhaustion. For a start, relaying info to the LLDD that the event notification failed is probably not the way to go. I only now noticed "scsi: sas: scsi_queue_work can fail, so make callers aware" made it into the kernel; as I mentioned in response to this patch, the LLDD does not know how to handle this (and no LLDDs do actually handle this).
>>
>> I would say it is better to shut down the PHY from libsas (As Dan mentioned in the v1 series) when the pool exhausts, under the assumption that the PHY has gone into some erroneous state. The user can later re-enable the PHY from sysfs, if required.
>
> I considered this suggestion, and what I am worried about are, first if we disable phy once the sas event pool exhausts, it may hurt the pending sas event process which has been queued,
I don't see how it affects currently queued events - they should just be
processed normally. As for LLDD reporting events when the pool is
exhausted, they are just lost.
> second, if phy was disabled, and no one trigger the reenable by sysfs, the LLDD has no way to post new sas phy events.
For the extreme scenario of pool becoming exhausted and PHY being
disabled, it should remain disabled until user takes some action to fix
originating problem.
>
> Thanks!
> Yijing.
>
>>
>> Much appreciated,
>> John
>>
>>>
>>> Thanks!
>>> Yijing.
>>>
>>>>
>>>> Thanks,
>>>> John
>>>>
>>>>
>>>> .
>>>>
>>>
>>>
>>> .
>>>
>>
>>
>>
>> .
>>
>
>
> .
>
On Wed, Jul 12, 2017 at 10:59:27AM +0100, John Garry wrote:
> After:
> ...
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 446.193336] hisi_sas_v2_hw HISI0162:01: found dev[8:1] is
> gone
> [ 446.249205] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
> [ 446.325201] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
> [ 446.373189] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
> [ 446.421187] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
> [ 446.457232] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
> [ 446.477151] sd 0:0:1:0: [sdb] Synchronizing SCSI cache
> [ 446.482373] sd 0:0:1:0: [sdb] Synchronize Cache(10) failed: Result:
> hostbyte=0x04 driverbyte=0x00
> [ 446.491238] sd 0:0:1:0: [sdb] Stopping disk
> [ 446.495419] sd 0:0:1:0: [sdb] Start/Stop Unit failed: Result:
> hostbyte=0x04 driverbyte=0x00
> [ 446.525227] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
> [ 446.569249] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
> [ 446.576872] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$
>
> So much nicer. BTW, /dev/sdb is a SATA disk, the rest are SAS.
This is awesome. I hope I have some time reviewing the patches themselfes
soon.
Johannes
--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
On 10/07/2017 08:06, Yijing Wang wrote:
>
> static void sas_chain_event(int event, unsigned long *pending,
> @@ -592,9 +596,9 @@ int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
> {
> struct sas_discovery *disc;
>
> + disc = &port->disc;
> if (!port)
> return 0;
> - disc = &port->disc;
>
> BUG_ON(ev >= DISC_NUM_EVENTS);
>
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 570b2cb..9d26c28 1
I was just looking through the code and I noticed this, above. Is there
a specific reason to move the NULL check, or was it modified accidentally?
I mean, if port is NULL I don't think we would get as far as checking it
as we would have already de-referenced it.
On 10/07/2017 08:06, Yijing Wang wrote:
> Sometimes, we want sync libsas probe or destruct in sas discovery work,
> like when libsas revalidate domain. We need to split probe and destruct
> work from the scsi host workqueue.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_discover.c | 13 ++++++++++++-
> drivers/scsi/libsas/sas_init.c | 8 ++++++++
> include/scsi/libsas.h | 1 +
> 3 files changed, 21 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
> index 5d4a3a8..a25d648 100644
> --- a/drivers/scsi/libsas/sas_discover.c
> +++ b/drivers/scsi/libsas/sas_discover.c
> @@ -559,7 +559,18 @@ static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
> * not racing against draining
> */
> sas_port_get(port);
> - ret = scsi_queue_work(ha->core.shost, &sw->work);
> + /*
> + * discovery event probe and destruct would be called in other
> + * discovery event like discover domain and revalidate domain
> + * events, in some cases, we need to sync execute probe and destruct
> + * events, so run probe and destruct discover events except in a new
> + * workqueue.
Can we just use ha->disc_q for all discovery events for simplicity?
Would this solve the disco mutex you try to solve in patch 7/7?
> + */
> + if (ev->type == DISCE_PROBE || ev->type == DISCE_DESTRUCT)
> + ret = queue_work(ha->disc_q, &sw->work);
> + else
> + ret = scsi_queue_work(ha->core.shost, &sw->work);
> +
> if (ret != 1)
Uhh, we are mixing bool and int here... but we're forced to by
scsi_queue_work()
> sas_port_put(port);
> }
> diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
> index 2f3b736..df1d78b 100644
> --- a/drivers/scsi/libsas/sas_init.c
> +++ b/drivers/scsi/libsas/sas_init.c
> @@ -152,6 +152,13 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
> if (!sas_ha->event_q)
> goto Undo_ports;
>
> + snprintf(name, 64, "%s_disc_q", dev_name(sas_ha->dev));
> + sas_ha->disc_q = create_singlethread_workqueue(name);
> + if(!sas_ha->disc_q) {
> + destroy_workqueue(sas_ha->event_q);
> + goto Undo_ports;
> + }
> +
> INIT_LIST_HEAD(&sas_ha->eh_done_q);
> INIT_LIST_HEAD(&sas_ha->eh_ata_q);
>
> @@ -187,6 +194,7 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha)
> __sas_drain_work(sas_ha);
> mutex_unlock(&sas_ha->drain_mutex);
> destroy_workqueue(sas_ha->event_q);
> + destroy_workqueue(sas_ha->disc_q);
>
> return 0;
> }
> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
> index c2ef05e..4bcb9fe 100644
> --- a/include/scsi/libsas.h
> +++ b/include/scsi/libsas.h
> @@ -406,6 +406,7 @@ struct sas_ha_struct {
> struct device *dev; /* should be set */
> struct module *lldd_module; /* should be set */
> struct workqueue_struct *event_q;
> + struct workqueue_struct *disc_q;
>
> u8 *sas_addr; /* must be set */
> u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
>
在 2017/7/12 17:59, John Garry 写道:
> On 10/07/2017 08:06, Yijing Wang wrote:
>> This patchset is based Johannes's patch
>> "scsi: sas: scsi_queue_work can fail, so make callers aware"
>>
>> Now the libsas hotplug has some issues, Dan Williams report
>> a similar bug here before
>> https://www.mail-archive.com/[email protected]/msg39187.html
>>
>> The issues we have found
>> 1. if LLDD burst reports lots of phy-up/phy-down sas events, some events
>> may lost because a same sas events is pending now, finally libsas topo
>> may different the hardware.
>> 2. receive a phy down sas event, libsas call sas_deform_port to remove
>> devices, it would first delete the sas port, then put a destruction
>> discovery event in a new work, and queue it at the tail of workqueue,
>> once the sas port be deleted, its children device will be deleted too,
>> when the destruction work start, it will found the target device has
>> been removed, and report a sysfs warnning.
>> 3. since a hotplug process will be devided into several works, if a phy up
>> sas event insert into phydown works, like
>> destruction work ---> PORTE_BYTES_DMAED (sas_form_port) ---->PHYE_LOSS_OF_SIGNAL
>> the hot remove flow would broken by PORTE_BYTES_DMAED event, it's not
>> we expected, and issues would occur.
>>
>> The first patch fix the sas events lost, and the second one introudce wait-complete
>> to fix the hotplug order issues.
>>
>
> I quickly tested this for basic hotplug.
>
> Before:
> root@(none)$ echo 0 > ./phy-0:6/sas_phy/phy-0:6/enable
> root@(none)$ echo 0 > ./phy-0:5/sas_phy/phy-0:5/enable
> root@(none)$ echo 0 > ./phy-0:4/sas_phy/phy-0:4/enable
> root@(none)$ echo 0 > ./phy-0:3/sas_phy/phy-0:3/enable
> root@(none)$ echo 0 > ./phy-0:3/sas_phy/phy-0:2/enable
> root@(none)$ echo 0 > ./phy-0:2/sas_phy/phy-0:2/enable
> root@(none)$ echo 0 > ./phy-0:1/sas_phy/phy-0:1/enable
> root@(none)$ echo 0 > ./phy-0:0/sas_phy/phy-0:0/enable
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 102.570694] sysfs group 'power' not found for kobject '0:0:7:0'
> [ 102.577250] ------------[ cut here ]------------
> [ 102.581861] WARNING: CPU: 3 PID: 1740 at fs/sysfs/group.c:237 sysfs_remove_group+0x8c/0x94
> [ 102.590110] Modules linked in:
> [ 102.593154] CPU: 3 PID: 1740 Comm: kworker/u128:2 Not tainted 4.12.0-rc1-00032-g3ab81fc #1907
> [ 102.601664] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 UEFI Nemo 1.7 RC3 06/23/2017
> [ 102.610784] Workqueue: scsi_wq_0 sas_destruct_devices
> [ 102.615822] task: ffff8017d4793400 task.stack: ffff8017b7e70000
> [ 102.621728] PC is at sysfs_remove_group+0x8c/0x94
> [ 102.626419] LR is at sysfs_remove_group+0x8c/0x94
> [ 102.631109] pc : [<ffff000008267c44>] lr : [<ffff000008267c44>] pstate: 60000045
> [ 102.638490] sp : ffff8017b7e73b80
> [ 102.641791] x29: ffff8017b7e73b80 x28: ffff8017db010800
> [ 102.647091] x27: ffff000008e27000 x26: ffff8017d43e6600
> [ 102.652390] x25: ffff8017b8280000 x24: 0000000000000003
> [ 102.657689] x23: ffff8017b78864b0 x22: ffff8017b784c988
> [ 102.662988] x21: ffff8017b7886410 x20: ffff000008ee9dd0
> [ 102.668288] x19: 0000000000000000 x18: ffff000008a1b678
> [ 102.673587] x17: 000000000000000e x16: 0000000000000007
> [ 102.678886] x15: 0000000000000000 x14: 00000000000000a3
> [ 102.684185] x13: 0000000000000033 x12: 0000000000000028
> [ 102.689484] x11: ffff000008f3be58 x10: 0000000000000000
> [ 102.694783] x9 : 000000000000043c x8 : 6f6b20726f662064
> [ 102.700082] x7 : ffff000008e29e08 x6 : ffff8017fbe34c50
> [ 102.705382] x5 : 0000000000000000 x4 : 0000000000000000
> [ 102.710681] x3 : ffffffffffffffff x2 : ffff000008e427e0
> [ 102.715980] x1 : 0000000000000000 x0 : 0000000000000033
> [ 102.721279] ---[ end trace c216cc1451d5f7ec ]---
> [ 102.725882] Call trace:
> [ 102.728316] Exception stack(0xffff8017b7e739b0 to 0xffff8017b7e73ae0)
> [ 102.734742] 39a0: 0000000000000000 0001000000000000
> [ 102.742557] 39c0: ffff8017b7e73b80 ffff000008267c44 ffff000008bfa050 0000000000000000
> [ 102.750372] 39e0: ffff8017b78864b0 0000000000000003 ffff8017b8280000 ffff8017d43e6600
> [ 102.758188] 3a00: ffff000008e27000 ffff8017db010800 ffff8017d4793400 0000000000000000
> [ 102.766003] 3a20: ffff8017b7e73b80 ffff8017b7e73b80 ffff8017b7e73b40 00000000ffffffc8
> [ 102.773818] 3a40: ffff8017b7e73a70 ffff00000810c12c 0000000000000033 0000000000000000
> [ 102.781633] 3a60: ffff000008e427e0 ffffffffffffffff 0000000000000000 0000000000000000
> [ 102.789449] 3a80: ffff8017fbe34c50 ffff000008e29e08 6f6b20726f662064 000000000000043c
> [ 102.797264] 3aa0: 0000000000000000 ffff000008f3be58 0000000000000028 0000000000000033
> [ 102.805079] 3ac0: 00000000000000a3 0000000000000000 0000000000000007 000000000000000e
> [ 102.812895] [<ffff000008267c44>] sysfs_remove_group+0x8c/0x94
> [ 102.818628] [<ffff00000855b14c>] dpm_sysfs_remove+0x58/0x68
> [ 102.824188] [<ffff00000854e0e8>] device_del+0xf8/0x2d0
> [ 102.829312] [<ffff00000854e2d4>] device_unregister+0x14/0x2c
> [ 102.834959] [<ffff00000837e6e0>] bsg_unregister_queue+0x60/0x98
> [ 102.840866] [<ffff000008593cd4>] __scsi_remove_device+0xa0/0xbc
>
> <snip>
>
> [ 151.331854] 3bc0: ffff0000081f21ac 0000ffff803370c0
> [ 151.336718] [<ffff000008267c44>] sysfs_remove_group+0x8c/0x94
> [ 151.342449] [<ffff00000855b14c>] dpm_sysfs_remove+0x58/0x68
> [ 151.348008] [<ffff00000854e0e8>] device_del+0xf8/0x2d0
> [ 151.353133] [<ffff000008597278>] sas_rphy_remove+0x54/0x80
> [ 151.358604] [<ffff0000085972b8>] sas_rphy_delete+0x14/0x28
> [ 151.364076] [<ffff00000859b304>] sas_destruct_devices+0x64/0x98
> [ 151.369982] [<ffff0000080d8194>] process_one_work+0x12c/0x28c
> [ 151.375714] [<ffff0000080d834c>] worker_thread+0x58/0x3b8
> [ 151.381100] [<ffff0000080ddee4>] kthread+0x100/0x12c
> [ 151.386050] [<ffff0000080836c0>] ret_from_fork+0x10/0x50
> [ 151.391360] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$
>
> So the console locks for ~50 seconds with WARN garbage.
>
> After:
> ...
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 446.193336] hisi_sas_v2_hw HISI0162:01: found dev[8:1] is gone
> [ 446.249205] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
> [ 446.325201] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
> [ 446.373189] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
> [ 446.421187] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
> [ 446.457232] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
> [ 446.477151] sd 0:0:1:0: [sdb] Synchronizing SCSI cache
> [ 446.482373] sd 0:0:1:0: [sdb] Synchronize Cache(10) failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 446.491238] sd 0:0:1:0: [sdb] Stopping disk
> [ 446.495419] sd 0:0:1:0: [sdb] Start/Stop Unit failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 446.525227] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
> [ 446.569249] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
> [ 446.576872] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$
>
> So much nicer. BTW, /dev/sdb is a SATA disk, the rest are SAS.
I will check the calltrace, I tested in my local branch, the result is fine.
Thanks!
Yijing.
>
> John
>
>> v2->v3: some code improvements suggested by Johannes and John,
>> split v2 patch 2 into several small pathes.
>> v1->v2: some code improvements suggested by John Garry
>>
>> Yijing Wang (7):
>> libsas: Use static sas event pool to appease sas event lost
>> libsas: remove unused port_gone_completion
>> libsas: Use new workqueue to run sas event
>> libsas: add sas event wait-complete support
>> libsas: add a new workqueue to run probe/destruct discovery event
>> libsas: add wait-complete support to sync discovery event
>> libsas: release disco mutex during waiting in sas_ex_discover_end_dev
>>
>> drivers/scsi/libsas/sas_discover.c | 58 +++++++---
>> drivers/scsi/libsas/sas_event.c | 212 ++++++++++++++++++++++++++++++++-----
>> drivers/scsi/libsas/sas_expander.c | 22 +++-
>> drivers/scsi/libsas/sas_init.c | 21 ++--
>> drivers/scsi/libsas/sas_internal.h | 64 +++++++++++
>> drivers/scsi/libsas/sas_phy.c | 48 +++------
>> drivers/scsi/libsas/sas_port.c | 22 ++--
>> include/scsi/libsas.h | 27 +++--
>> 8 files changed, 373 insertions(+), 101 deletions(-)
>>
>
>
>
> .
>
在 2017/7/12 17:59, John Garry 写道:
> On 10/07/2017 08:06, Yijing Wang wrote:
>> This patchset is based Johannes's patch
>> "scsi: sas: scsi_queue_work can fail, so make callers aware"
>>
>> Now the libsas hotplug has some issues, Dan Williams report
>> a similar bug here before
>> https://www.mail-archive.com/[email protected]/msg39187.html
>>
>> The issues we have found
>> 1. if LLDD burst reports lots of phy-up/phy-down sas events, some events
>> may lost because a same sas events is pending now, finally libsas topo
>> may different the hardware.
>> 2. receive a phy down sas event, libsas call sas_deform_port to remove
>> devices, it would first delete the sas port, then put a destruction
>> discovery event in a new work, and queue it at the tail of workqueue,
>> once the sas port be deleted, its children device will be deleted too,
>> when the destruction work start, it will found the target device has
>> been removed, and report a sysfs warnning.
>> 3. since a hotplug process will be devided into several works, if a phy up
>> sas event insert into phydown works, like
>> destruction work ---> PORTE_BYTES_DMAED (sas_form_port) ---->PHYE_LOSS_OF_SIGNAL
>> the hot remove flow would broken by PORTE_BYTES_DMAED event, it's not
>> we expected, and issues would occur.
>>
>> The first patch fix the sas events lost, and the second one introudce wait-complete
>> to fix the hotplug order issues.
>>
>
> I quickly tested this for basic hotplug.
>
> Before:
> root@(none)$ echo 0 > ./phy-0:6/sas_phy/phy-0:6/enable
> root@(none)$ echo 0 > ./phy-0:5/sas_phy/phy-0:5/enable
> root@(none)$ echo 0 > ./phy-0:4/sas_phy/phy-0:4/enable
> root@(none)$ echo 0 > ./phy-0:3/sas_phy/phy-0:3/enable
> root@(none)$ echo 0 > ./phy-0:3/sas_phy/phy-0:2/enable
> root@(none)$ echo 0 > ./phy-0:2/sas_phy/phy-0:2/enable
> root@(none)$ echo 0 > ./phy-0:1/sas_phy/phy-0:1/enable
> root@(none)$ echo 0 > ./phy-0:0/sas_phy/phy-0:0/enable
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 102.570694] sysfs group 'power' not found for kobject '0:0:7:0'
> [ 102.577250] ------------[ cut here ]------------
> [ 102.581861] WARNING: CPU: 3 PID: 1740 at fs/sysfs/group.c:237 sysfs_remove_group+0x8c/0x94
> [ 102.590110] Modules linked in:
> [ 102.593154] CPU: 3 PID: 1740 Comm: kworker/u128:2 Not tainted 4.12.0-rc1-00032-g3ab81fc #1907
> [ 102.601664] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 UEFI Nemo 1.7 RC3 06/23/2017
> [ 102.610784] Workqueue: scsi_wq_0 sas_destruct_devices
> [ 102.615822] task: ffff8017d4793400 task.stack: ffff8017b7e70000
> [ 102.621728] PC is at sysfs_remove_group+0x8c/0x94
> [ 102.626419] LR is at sysfs_remove_group+0x8c/0x94
> [ 102.631109] pc : [<ffff000008267c44>] lr : [<ffff000008267c44>] pstate: 60000045
> [ 102.638490] sp : ffff8017b7e73b80
> [ 102.641791] x29: ffff8017b7e73b80 x28: ffff8017db010800
> [ 102.647091] x27: ffff000008e27000 x26: ffff8017d43e6600
> [ 102.652390] x25: ffff8017b8280000 x24: 0000000000000003
> [ 102.657689] x23: ffff8017b78864b0 x22: ffff8017b784c988
> [ 102.662988] x21: ffff8017b7886410 x20: ffff000008ee9dd0
> [ 102.668288] x19: 0000000000000000 x18: ffff000008a1b678
> [ 102.673587] x17: 000000000000000e x16: 0000000000000007
> [ 102.678886] x15: 0000000000000000 x14: 00000000000000a3
> [ 102.684185] x13: 0000000000000033 x12: 0000000000000028
> [ 102.689484] x11: ffff000008f3be58 x10: 0000000000000000
> [ 102.694783] x9 : 000000000000043c x8 : 6f6b20726f662064
> [ 102.700082] x7 : ffff000008e29e08 x6 : ffff8017fbe34c50
> [ 102.705382] x5 : 0000000000000000 x4 : 0000000000000000
> [ 102.710681] x3 : ffffffffffffffff x2 : ffff000008e427e0
> [ 102.715980] x1 : 0000000000000000 x0 : 0000000000000033
> [ 102.721279] ---[ end trace c216cc1451d5f7ec ]---
> [ 102.725882] Call trace:
> [ 102.728316] Exception stack(0xffff8017b7e739b0 to 0xffff8017b7e73ae0)
> [ 102.734742] 39a0: 0000000000000000 0001000000000000
> [ 102.742557] 39c0: ffff8017b7e73b80 ffff000008267c44 ffff000008bfa050 0000000000000000
> [ 102.750372] 39e0: ffff8017b78864b0 0000000000000003 ffff8017b8280000 ffff8017d43e6600
> [ 102.758188] 3a00: ffff000008e27000 ffff8017db010800 ffff8017d4793400 0000000000000000
> [ 102.766003] 3a20: ffff8017b7e73b80 ffff8017b7e73b80 ffff8017b7e73b40 00000000ffffffc8
> [ 102.773818] 3a40: ffff8017b7e73a70 ffff00000810c12c 0000000000000033 0000000000000000
> [ 102.781633] 3a60: ffff000008e427e0 ffffffffffffffff 0000000000000000 0000000000000000
> [ 102.789449] 3a80: ffff8017fbe34c50 ffff000008e29e08 6f6b20726f662064 000000000000043c
> [ 102.797264] 3aa0: 0000000000000000 ffff000008f3be58 0000000000000028 0000000000000033
> [ 102.805079] 3ac0: 00000000000000a3 0000000000000000 0000000000000007 000000000000000e
> [ 102.812895] [<ffff000008267c44>] sysfs_remove_group+0x8c/0x94
> [ 102.818628] [<ffff00000855b14c>] dpm_sysfs_remove+0x58/0x68
> [ 102.824188] [<ffff00000854e0e8>] device_del+0xf8/0x2d0
> [ 102.829312] [<ffff00000854e2d4>] device_unregister+0x14/0x2c
> [ 102.834959] [<ffff00000837e6e0>] bsg_unregister_queue+0x60/0x98
> [ 102.840866] [<ffff000008593cd4>] __scsi_remove_device+0xa0/0xbc
>
> <snip>
>
> [ 151.331854] 3bc0: ffff0000081f21ac 0000ffff803370c0
> [ 151.336718] [<ffff000008267c44>] sysfs_remove_group+0x8c/0x94
> [ 151.342449] [<ffff00000855b14c>] dpm_sysfs_remove+0x58/0x68
> [ 151.348008] [<ffff00000854e0e8>] device_del+0xf8/0x2d0
> [ 151.353133] [<ffff000008597278>] sas_rphy_remove+0x54/0x80
> [ 151.358604] [<ffff0000085972b8>] sas_rphy_delete+0x14/0x28
> [ 151.364076] [<ffff00000859b304>] sas_destruct_devices+0x64/0x98
> [ 151.369982] [<ffff0000080d8194>] process_one_work+0x12c/0x28c
> [ 151.375714] [<ffff0000080d834c>] worker_thread+0x58/0x3b8
> [ 151.381100] [<ffff0000080ddee4>] kthread+0x100/0x12c
> [ 151.386050] [<ffff0000080836c0>] ret_from_fork+0x10/0x50
> [ 151.391360] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$
>
> So the console locks for ~50 seconds with WARN garbage.
>
> After:
> ...
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 446.193336] hisi_sas_v2_hw HISI0162:01: found dev[8:1] is gone
> [ 446.249205] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
> [ 446.325201] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
> [ 446.373189] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
> [ 446.421187] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
> [ 446.457232] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
> [ 446.477151] sd 0:0:1:0: [sdb] Synchronizing SCSI cache
> [ 446.482373] sd 0:0:1:0: [sdb] Synchronize Cache(10) failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 446.491238] sd 0:0:1:0: [sdb] Stopping disk
> [ 446.495419] sd 0:0:1:0: [sdb] Start/Stop Unit failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 446.525227] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
> [ 446.569249] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
> [ 446.576872] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$
>
> So much nicer. BTW, /dev/sdb is a SATA disk, the rest are SAS.
Oh, I take a mistake ? The result you tested the hotplug which applied this patchset is fine ?
Thanks!
Yijing.
>
> John
>
>> v2->v3: some code improvements suggested by Johannes and John,
>> split v2 patch 2 into several small pathes.
>> v1->v2: some code improvements suggested by John Garry
>>
>> Yijing Wang (7):
>> libsas: Use static sas event pool to appease sas event lost
>> libsas: remove unused port_gone_completion
>> libsas: Use new workqueue to run sas event
>> libsas: add sas event wait-complete support
>> libsas: add a new workqueue to run probe/destruct discovery event
>> libsas: add wait-complete support to sync discovery event
>> libsas: release disco mutex during waiting in sas_ex_discover_end_dev
>>
>> drivers/scsi/libsas/sas_discover.c | 58 +++++++---
>> drivers/scsi/libsas/sas_event.c | 212 ++++++++++++++++++++++++++++++++-----
>> drivers/scsi/libsas/sas_expander.c | 22 +++-
>> drivers/scsi/libsas/sas_init.c | 21 ++--
>> drivers/scsi/libsas/sas_internal.h | 64 +++++++++++
>> drivers/scsi/libsas/sas_phy.c | 48 +++------
>> drivers/scsi/libsas/sas_port.c | 22 ++--
>> include/scsi/libsas.h | 27 +++--
>> 8 files changed, 373 insertions(+), 101 deletions(-)
>>
>
>
>
> .
>
>>>> There is no special meaning for the pool size, if flutter of > 25 events, notify sas events will return error, and the further step work is depending on LLDD drivers.
>>>> I hope libsas could do more work in this case, but now it seems a little difficult, this patch may be a interim fix, until we find a perfect solution.
>>>
>>> The principal of having a fixed-sized pool is ok, even though the pool size needs more consideration.
>>>
>>> However my issue is how to handle pool exhaustion. For a start, relaying info to the LLDD that the event notification failed is probably not the way to go. I only now noticed "scsi: sas: scsi_queue_work can fail, so make callers aware" made it into the kernel; as I mentioned in response to this patch, the LLDD does not know how to handle this (and no LLDDs do actually handle this).
>>>
>>> I would say it is better to shut down the PHY from libsas (As Dan mentioned in the v1 series) when the pool exhausts, under the assumption that the PHY has gone into some erroneous state. The user can later re-enable the PHY from sysfs, if required.
>>
>> I considered this suggestion, and what I am worried about are, first if we disable phy once the sas event pool exhausts, it may hurt the pending sas event process which has been queued,
>
> I don't see how it affects currently queued events - they should just be processed normally. As for LLDD reporting events when the pool is exhausted, they are just lost.
So if we disable a phy, it's nothing affect to the already queued sas event process, which including access the phy to find target device ?
>
>> second, if phy was disabled, and no one trigger the reenable by sysfs, the LLDD has no way to post new sas phy events.
>
> For the extreme scenario of pool becoming exhausted and PHY being disabled, it should remain disabled until user takes some action to fix originating problem.
So we should print explicit message to tell user what's happen and how to fix it.
Thanks!
Yijing.
>
>>
>> Thanks!
>> Yijing.
>>
>>>
>>> Much appreciated,
>>> John
>>>
>>>>
>>>> Thanks!
>>>> Yijing.
>>>>
>>>>>
>>>>> Thanks,
>>>>> John
>>>>>
>>>>>
>>>>> .
>>>>>
>>>>
>>>>
>>>> .
>>>>
>>>
>>>
>>>
>>> .
>>>
>>
>>
>> .
>>
>
>
>
> .
>
在 2017/7/12 21:51, John Garry 写道:
> On 10/07/2017 08:06, Yijing Wang wrote:
>>
>> static void sas_chain_event(int event, unsigned long *pending,
>> @@ -592,9 +596,9 @@ int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
>> {
>> struct sas_discovery *disc;
>>
>> + disc = &port->disc;
>> if (!port)
>> return 0;
>> - disc = &port->disc;
>>
>> BUG_ON(ev >= DISC_NUM_EVENTS);
>>
>> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
>> index 570b2cb..9d26c28 1
>
> I was just looking through the code and I noticed this, above. Is there a specific reason to move the NULL check, or was it modified accidentally?
>
> I mean, if port is NULL I don't think we would get as far as checking it as we would have already de-referenced it.
Oh, sorry, it's a accidental change, good catch, thanks!
>
>
>
> .
>
在 2017/7/13 0:50, John Garry 写道:
> On 10/07/2017 08:06, Yijing Wang wrote:
>> Sometimes, we want sync libsas probe or destruct in sas discovery work,
>> like when libsas revalidate domain. We need to split probe and destruct
>> work from the scsi host workqueue.
>>
>> Signed-off-by: Yijing Wang <[email protected]>
>> CC: John Garry <[email protected]>
>> CC: Johannes Thumshirn <[email protected]>
>> CC: Ewan Milne <[email protected]>
>> CC: Christoph Hellwig <[email protected]>
>> CC: Tomas Henzl <[email protected]>
>> CC: Dan Williams <[email protected]>
>> ---
>> drivers/scsi/libsas/sas_discover.c | 13 ++++++++++++-
>> drivers/scsi/libsas/sas_init.c | 8 ++++++++
>> include/scsi/libsas.h | 1 +
>> 3 files changed, 21 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
>> index 5d4a3a8..a25d648 100644
>> --- a/drivers/scsi/libsas/sas_discover.c
>> +++ b/drivers/scsi/libsas/sas_discover.c
>> @@ -559,7 +559,18 @@ static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
>> * not racing against draining
>> */
>> sas_port_get(port);
>> - ret = scsi_queue_work(ha->core.shost, &sw->work);
>> + /*
>> + * discovery event probe and destruct would be called in other
>> + * discovery event like discover domain and revalidate domain
>> + * events, in some cases, we need to sync execute probe and destruct
>> + * events, so run probe and destruct discover events except in a new
>> + * workqueue.
>
> Can we just use ha->disc_q for all discovery events for simplicity? Would this solve the disco mutex you try to solve in patch 7/7?
No, since we could queue sas discovery probe/destruct event in sas discovery work(like sas_revalidate_domain)
sas_revalidate_domain
sas_ex_revalidate_domain
sas_rediscover
sas_rediscover_dev
sas_unregister_devs_sas_addr
sas_unregister_dev
sas_discover_event(dev->port, DISCE_DESTRUCT)
sas_discover_new
sas_ex_discover_devices
sas_ex_discover_dev
sas_ex_discover_end_dev
sas_discover_end_dev
sas_discover_event(dev->port, DISCE_PROBE)
So we could not sync probe or destruct sas discovery event in sas_revalidate_domain if they are queued in a same workqueue,
or it would block for ever.
>
>> + */
>> + if (ev->type == DISCE_PROBE || ev->type == DISCE_DESTRUCT)
>> + ret = queue_work(ha->disc_q, &sw->work);
>> + else
>> + ret = scsi_queue_work(ha->core.shost, &sw->work);
>> +
>> if (ret != 1)
>
> Uhh, we are mixing bool and int here... but we're forced to by scsi_queue_work()
Eagle eye :), I could check the return value separately.
Thanks!
Yijing.
>
>> sas_port_put(port);
>> }
>> diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
>> index 2f3b736..df1d78b 100644
>> --- a/drivers/scsi/libsas/sas_init.c
>> +++ b/drivers/scsi/libsas/sas_init.c
>> @@ -152,6 +152,13 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
>> if (!sas_ha->event_q)
>> goto Undo_ports;
>>
>> + snprintf(name, 64, "%s_disc_q", dev_name(sas_ha->dev));
>> + sas_ha->disc_q = create_singlethread_workqueue(name);
>> + if(!sas_ha->disc_q) {
>> + destroy_workqueue(sas_ha->event_q);
>> + goto Undo_ports;
>> + }
>> +
>> INIT_LIST_HEAD(&sas_ha->eh_done_q);
>> INIT_LIST_HEAD(&sas_ha->eh_ata_q);
>>
>> @@ -187,6 +194,7 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha)
>> __sas_drain_work(sas_ha);
>> mutex_unlock(&sas_ha->drain_mutex);
>> destroy_workqueue(sas_ha->event_q);
>> + destroy_workqueue(sas_ha->disc_q);
>>
>> return 0;
>> }
>> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
>> index c2ef05e..4bcb9fe 100644
>> --- a/include/scsi/libsas.h
>> +++ b/include/scsi/libsas.h
>> @@ -406,6 +406,7 @@ struct sas_ha_struct {
>> struct device *dev; /* should be set */
>> struct module *lldd_module; /* should be set */
>> struct workqueue_struct *event_q;
>> + struct workqueue_struct *disc_q;
>>
>> u8 *sas_addr; /* must be set */
>> u8 hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
>>
>
>
>
> .
>
On 13/07/2017 02:37, wangyijing wrote:
>> > So much nicer. BTW, /dev/sdb is a SATA disk, the rest are SAS.
> Oh, I take a mistake ? The result you tested the hotplug which applied this patchset is fine ?
>
> Thanks!
> Yijing.
Well basic hotplug is fine, as below. I did not do any robust testing.
root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
root@(none)$ [ 180.147676] hisi_sas_v2_hw HISI0162:01: found dev[8:1]
is gone
[ 180.216558] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
[ 180.280548] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
[ 180.352556] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
[ 180.432495] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
[ 180.508492] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
[ 180.527577] sd 0:0:1:0: [sdb] Synchronizing SCSI cache
[ 180.532728] sd 0:0:1:0: [sdb] Synchronize Cache(10) failed: Result:
hostbyte=0x04 driverbyte=0x00
[ 180.541591] sd 0:0:1:0: [sdb] Stopping disk
[ 180.545767] sd 0:0:1:0: [sdb] Start/Stop Unit failed: Result:
hostbyte=0x04 driverbyte=0x00
[ 180.612491] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
[ 180.696452] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
[ 180.703221] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
root@(none)$ echo 1 > ./phy-0:7/sas_phy/phy-0:7/enable
root@(none)$ [ 185.937831] hisi_sas_v2_hw HISI0162:01: phyup: phy7
link_rate=11
[ 185.996575] scsi 0:0:8:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 187.059642] ata2.00: ATA-8: HGST HUS724040ALA640, MFAOA8B0, max UDMA/133
[ 187.066341] ata2.00: 7814037168 sectors, multi 0: LBA48 NCQ (depth 31/32)
[ 187.073278] ata2.00: ATA Identify Device Log not supported
[ 187.078755] ata2.00: Security Log not supported
[ 187.085239] ata2.00: ATA Identify Device Log not supported
[ 187.090715] ata2.00: Security Log not supported
[ 187.095236] ata2.00: configured for UDMA/133
[ 187.136917] scsi 0:0:9:0: Direct-Access ATA HGST HUS724040AL
A8B0 PQ: 0 ANSI: 5
[ 187.187612] sd 0:0:9:0: [sdb] 7814037168 512-byte logical blocks:
(4.00 TB/3.64 TiB)
[ 187.195365] sd 0:0:9:0: [sdb] Write Protect is off
[ 187.200161] sd 0:0:9:0: [sdb] Write cache: enabled, read cache:
enabled, doesn't support DPO or FUA
[ 187.223844] sd 0:0:9:0: [sdb] Attached SCSI disk
[ 187.225498] scsi 0:0:10:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 187.243864] sd 0:0:8:0: [sda] 390721968 512-byte logical blocks: (200
GB/186 GiB)
[ 187.285879] sd 0:0:8:0: [sda] Write Protect is off
[ 187.367898] sd 0:0:8:0: [sda] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 187.524043] scsi 0:0:11:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 187.701505] sd 0:0:10:0: [sdc] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 187.743547] sd 0:0:10:0: [sdc] Write Protect is off
[ 187.822546] scsi 0:0:12:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 187.825531] sd 0:0:10:0: [sdc] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 188.000167] sd 0:0:11:0: [sdd] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 188.042205] sd 0:0:11:0: [sdd] Write Protect is off
[ 188.121527] scsi 0:0:13:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 188.124274] sd 0:0:11:0: [sdd] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 188.298942] sd 0:0:12:0: [sde] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 188.340960] sd 0:0:12:0: [sde] Write Protect is off
[ 188.420023] scsi 0:0:14:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 188.422969] sd 0:0:12:0: [sde] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 188.597501] sd 0:0:13:0: [sdf] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 188.605069] sd 0:0:8:0: [sda] Attached SCSI disk
[ 188.639520] sd 0:0:13:0: [sdf] Write Protect is off
[ 188.682445] scsi 0:0:15:0: Enclosure 12G SAS Expander
RevB PQ: 0 ANSI: 6
[ 188.721540] sd 0:0:13:0: [sdf] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 188.896399] sd 0:0:14:0: [sdg] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 188.938445] sd 0:0:14:0: [sdg] Write Protect is off
[ 189.020444] sd 0:0:14:0: [sdg] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 189.060608] sd 0:0:10:0: [sdc] Attached SCSI disk
[ 189.359073] sd 0:0:11:0: [sdd] Attached SCSI disk
[ 189.657643] sd 0:0:12:0: [sde] Attached SCSI disk
[ 189.956585] sd 0:0:13:0: [sdf] Attached SCSI disk
[ 190.255148] sd 0:0:14:0: [sdg] Attached SCSI disk
root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
root@(none)$ [ 192.895718] hisi_sas_v2_hw HISI0162:01: found dev[8:1]
is gone
[ 192.964671] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
[ 193.032744] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
[ 193.096755] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
[ 193.157072] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
[ 193.221062] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
[ 193.247684] sd 0:0:9:0: [sdb] Synchronizing SCSI cache
[ 193.252834] sd 0:0:9:0: [sdb] Synchronize Cache(10) failed: Result:
hostbyte=0x04 driverbyte=0x00
[ 193.261701] sd 0:0:9:0: [sdb] Stopping disk
[ 193.265879] sd 0:0:9:0: [sdb] Start/Stop Unit failed: Result:
hostbyte=0x04 driverbyte=0x00
[ 193.325165] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
[ 193.381094] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
[ 193.388719] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
root@(none)$ echo 1 > ./phy-0:7/sas_phy/phy-0:7/enable
root@(none)$ [ 196.221879] hisi_sas_v2_hw HISI0162:01: phyup: phy7
link_rate=11
[ 196.281178] scsi 0:0:16:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 196.973390] ata3.00: ATA-8: HGST HUS724040ALA640, MFAOA8B0, max UDMA/133
[ 196.980088] ata3.00: 7814037168 sectors, multi 0: LBA48 NCQ (depth 31/32)
[ 196.987022] ata3.00: ATA Identify Device Log not supported
[ 196.992499] ata3.00: Security Log not supported
[ 196.998953] ata3.00: ATA Identify Device Log not supported
[ 197.004434] ata3.00: Security Log not supported
[ 197.008954] ata3.00: configured for UDMA/133
[ 197.050428] scsi 0:0:17:0: Direct-Access ATA HGST
HUS724040AL A8B0 PQ: 0 ANSI: 5
[ 197.091593] sd 0:0:17:0: [sdb] 7814037168 512-byte logical blocks:
(4.00 TB/3.64 TiB)
[ 197.099441] sd 0:0:17:0: [sdb] Write Protect is off
[ 197.104326] sd 0:0:17:0: [sdb] Write cache: enabled, read cache:
enabled, doesn't support DPO or FUA
[ 197.122982] sd 0:0:17:0: [sdb] Attached SCSI disk
[ 197.129367] scsi 0:0:18:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 197.157605] sd 0:0:16:0: [sda] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 197.199622] sd 0:0:16:0: [sda] Write Protect is off
[ 197.281604] sd 0:0:16:0: [sda] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 197.427727] scsi 0:0:19:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 197.605362] sd 0:0:18:0: [sdc] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 197.647409] sd 0:0:18:0: [sdc] Write Protect is off
[ 197.726403] scsi 0:0:20:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 197.729389] sd 0:0:18:0: [sdc] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 197.903664] sd 0:0:19:0: [sdd] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 197.945699] sd 0:0:19:0: [sdd] Write Protect is off
[ 198.024789] scsi 0:0:21:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 198.027685] sd 0:0:19:0: [sdd] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 198.202410] sd 0:0:20:0: [sde] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 198.244458] sd 0:0:20:0: [sde] Write Protect is off
[ 198.323137] scsi 0:0:22:0: Direct-Access SanDisk LT0200MO
P404 PQ: 0 ANSI: 6
[ 198.326491] sd 0:0:20:0: [sde] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 198.500754] sd 0:0:21:0: [sdf] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 198.516373] sd 0:0:16:0: [sda] Attached SCSI disk
[ 198.542773] sd 0:0:21:0: [sdf] Write Protect is off
[ 198.585184] scsi 0:0:23:0: Enclosure 12G SAS Expander
RevB PQ: 0 ANSI: 6
[ 198.624911] sd 0:0:21:0: [sdf] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 198.799446] sd 0:0:22:0: [sdg] 390721968 512-byte logical blocks:
(200 GB/186 GiB)
[ 198.841484] sd 0:0:22:0: [sdg] Write Protect is off
[ 198.923508] sd 0:0:22:0: [sdg] Write cache: disabled, read cache:
disabled, supports DPO and FUA
[ 198.964420] sd 0:0:18:0: [sdc] Attached SCSI disk
[ 199.262903] sd 0:0:19:0: [sdd] Attached SCSI disk
[ 199.561175] sd 0:0:20:0: [sde] Attached SCSI disk
[ 199.859533] sd 0:0:21:0: [sdf] Attached SCSI disk
root@(none)$ [ 200.158121] sd 0:0:22:0: [sdg] Attached SCSI disk
root@(none)$
>
>
>> >
>> > John
在 2017/7/13 16:08, John Garry 写道:
> On 13/07/2017 02:37, wangyijing wrote:
>>> > So much nicer. BTW, /dev/sdb is a SATA disk, the rest are SAS.
>> Oh, I take a mistake ? The result you tested the hotplug which applied this patchset is fine ?
>>
>> Thanks!
>> Yijing.
>
> Well basic hotplug is fine, as below. I did not do any robust testing.
>
OK, thanks,I tested with and without fio running, the results are both fine.
Thanks!
Yijing.
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 180.147676] hisi_sas_v2_hw HISI0162:01: found dev[8:1] is gone
> [ 180.216558] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
> [ 180.280548] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
> [ 180.352556] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
> [ 180.432495] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
> [ 180.508492] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
> [ 180.527577] sd 0:0:1:0: [sdb] Synchronizing SCSI cache
> [ 180.532728] sd 0:0:1:0: [sdb] Synchronize Cache(10) failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 180.541591] sd 0:0:1:0: [sdb] Stopping disk
> [ 180.545767] sd 0:0:1:0: [sdb] Start/Stop Unit failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 180.612491] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
> [ 180.696452] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
> [ 180.703221] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$ echo 1 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 185.937831] hisi_sas_v2_hw HISI0162:01: phyup: phy7 link_rate=11
> [ 185.996575] scsi 0:0:8:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 187.059642] ata2.00: ATA-8: HGST HUS724040ALA640, MFAOA8B0, max UDMA/133
> [ 187.066341] ata2.00: 7814037168 sectors, multi 0: LBA48 NCQ (depth 31/32)
> [ 187.073278] ata2.00: ATA Identify Device Log not supported
> [ 187.078755] ata2.00: Security Log not supported
> [ 187.085239] ata2.00: ATA Identify Device Log not supported
> [ 187.090715] ata2.00: Security Log not supported
> [ 187.095236] ata2.00: configured for UDMA/133
> [ 187.136917] scsi 0:0:9:0: Direct-Access ATA HGST HUS724040AL A8B0 PQ: 0 ANSI: 5
> [ 187.187612] sd 0:0:9:0: [sdb] 7814037168 512-byte logical blocks: (4.00 TB/3.64 TiB)
> [ 187.195365] sd 0:0:9:0: [sdb] Write Protect is off
> [ 187.200161] sd 0:0:9:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
> [ 187.223844] sd 0:0:9:0: [sdb] Attached SCSI disk
> [ 187.225498] scsi 0:0:10:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 187.243864] sd 0:0:8:0: [sda] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 187.285879] sd 0:0:8:0: [sda] Write Protect is off
> [ 187.367898] sd 0:0:8:0: [sda] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 187.524043] scsi 0:0:11:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 187.701505] sd 0:0:10:0: [sdc] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 187.743547] sd 0:0:10:0: [sdc] Write Protect is off
> [ 187.822546] scsi 0:0:12:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 187.825531] sd 0:0:10:0: [sdc] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 188.000167] sd 0:0:11:0: [sdd] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 188.042205] sd 0:0:11:0: [sdd] Write Protect is off
> [ 188.121527] scsi 0:0:13:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 188.124274] sd 0:0:11:0: [sdd] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 188.298942] sd 0:0:12:0: [sde] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 188.340960] sd 0:0:12:0: [sde] Write Protect is off
> [ 188.420023] scsi 0:0:14:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 188.422969] sd 0:0:12:0: [sde] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 188.597501] sd 0:0:13:0: [sdf] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 188.605069] sd 0:0:8:0: [sda] Attached SCSI disk
> [ 188.639520] sd 0:0:13:0: [sdf] Write Protect is off
> [ 188.682445] scsi 0:0:15:0: Enclosure 12G SAS Expander RevB PQ: 0 ANSI: 6
> [ 188.721540] sd 0:0:13:0: [sdf] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 188.896399] sd 0:0:14:0: [sdg] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 188.938445] sd 0:0:14:0: [sdg] Write Protect is off
> [ 189.020444] sd 0:0:14:0: [sdg] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 189.060608] sd 0:0:10:0: [sdc] Attached SCSI disk
> [ 189.359073] sd 0:0:11:0: [sdd] Attached SCSI disk
> [ 189.657643] sd 0:0:12:0: [sde] Attached SCSI disk
> [ 189.956585] sd 0:0:13:0: [sdf] Attached SCSI disk
> [ 190.255148] sd 0:0:14:0: [sdg] Attached SCSI disk
>
> root@(none)$ echo 0 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 192.895718] hisi_sas_v2_hw HISI0162:01: found dev[8:1] is gone
> [ 192.964671] hisi_sas_v2_hw HISI0162:01: found dev[7:1] is gone
> [ 193.032744] hisi_sas_v2_hw HISI0162:01: found dev[6:1] is gone
> [ 193.096755] hisi_sas_v2_hw HISI0162:01: found dev[5:1] is gone
> [ 193.157072] hisi_sas_v2_hw HISI0162:01: found dev[4:1] is gone
> [ 193.221062] hisi_sas_v2_hw HISI0162:01: found dev[3:1] is gone
> [ 193.247684] sd 0:0:9:0: [sdb] Synchronizing SCSI cache
> [ 193.252834] sd 0:0:9:0: [sdb] Synchronize Cache(10) failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 193.261701] sd 0:0:9:0: [sdb] Stopping disk
> [ 193.265879] sd 0:0:9:0: [sdb] Start/Stop Unit failed: Result: hostbyte=0x04 driverbyte=0x00
> [ 193.325165] hisi_sas_v2_hw HISI0162:01: found dev[2:5] is gone
> [ 193.381094] hisi_sas_v2_hw HISI0162:01: found dev[1:1] is gone
> [ 193.388719] hisi_sas_v2_hw HISI0162:01: found dev[0:2] is gone
>
> root@(none)$ echo 1 > ./phy-0:7/sas_phy/phy-0:7/enable
> root@(none)$ [ 196.221879] hisi_sas_v2_hw HISI0162:01: phyup: phy7 link_rate=11
> [ 196.281178] scsi 0:0:16:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 196.973390] ata3.00: ATA-8: HGST HUS724040ALA640, MFAOA8B0, max UDMA/133
> [ 196.980088] ata3.00: 7814037168 sectors, multi 0: LBA48 NCQ (depth 31/32)
> [ 196.987022] ata3.00: ATA Identify Device Log not supported
> [ 196.992499] ata3.00: Security Log not supported
> [ 196.998953] ata3.00: ATA Identify Device Log not supported
> [ 197.004434] ata3.00: Security Log not supported
> [ 197.008954] ata3.00: configured for UDMA/133
> [ 197.050428] scsi 0:0:17:0: Direct-Access ATA HGST HUS724040AL A8B0 PQ: 0 ANSI: 5
> [ 197.091593] sd 0:0:17:0: [sdb] 7814037168 512-byte logical blocks: (4.00 TB/3.64 TiB)
> [ 197.099441] sd 0:0:17:0: [sdb] Write Protect is off
> [ 197.104326] sd 0:0:17:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
> [ 197.122982] sd 0:0:17:0: [sdb] Attached SCSI disk
> [ 197.129367] scsi 0:0:18:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 197.157605] sd 0:0:16:0: [sda] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 197.199622] sd 0:0:16:0: [sda] Write Protect is off
> [ 197.281604] sd 0:0:16:0: [sda] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 197.427727] scsi 0:0:19:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 197.605362] sd 0:0:18:0: [sdc] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 197.647409] sd 0:0:18:0: [sdc] Write Protect is off
> [ 197.726403] scsi 0:0:20:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 197.729389] sd 0:0:18:0: [sdc] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 197.903664] sd 0:0:19:0: [sdd] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 197.945699] sd 0:0:19:0: [sdd] Write Protect is off
> [ 198.024789] scsi 0:0:21:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 198.027685] sd 0:0:19:0: [sdd] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 198.202410] sd 0:0:20:0: [sde] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 198.244458] sd 0:0:20:0: [sde] Write Protect is off
> [ 198.323137] scsi 0:0:22:0: Direct-Access SanDisk LT0200MO P404 PQ: 0 ANSI: 6
> [ 198.326491] sd 0:0:20:0: [sde] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 198.500754] sd 0:0:21:0: [sdf] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 198.516373] sd 0:0:16:0: [sda] Attached SCSI disk
> [ 198.542773] sd 0:0:21:0: [sdf] Write Protect is off
> [ 198.585184] scsi 0:0:23:0: Enclosure 12G SAS Expander RevB PQ: 0 ANSI: 6
> [ 198.624911] sd 0:0:21:0: [sdf] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 198.799446] sd 0:0:22:0: [sdg] 390721968 512-byte logical blocks: (200 GB/186 GiB)
> [ 198.841484] sd 0:0:22:0: [sdg] Write Protect is off
> [ 198.923508] sd 0:0:22:0: [sdg] Write cache: disabled, read cache: disabled, supports DPO and FUA
> [ 198.964420] sd 0:0:18:0: [sdc] Attached SCSI disk
> [ 199.262903] sd 0:0:19:0: [sdd] Attached SCSI disk
> [ 199.561175] sd 0:0:20:0: [sde] Attached SCSI disk
> [ 199.859533] sd 0:0:21:0: [sdf] Attached SCSI disk
>
> root@(none)$ [ 200.158121] sd 0:0:22:0: [sdg] Attached SCSI disk
>
> root@(none)$
>
>
>>
>>
>>> >
>>> > John
>
>
>
> .
>
On 10/07/2017 08:06, Yijing Wang wrote:
> Disco mutex was introudced to prevent domain rediscovery competing
> with ata error handling(87c8331). If we have already hold the lock
> in sas_revalidate_domain and sync executing probe, deadlock caused,
> because, sas_probe_sata() also need hold disco_mutex. Since disco mutex
> use to prevent revalidata domain happen during ata error handler,
> it should be safe to release disco mutex when sync probe, because
> no new revalidate domain event would be process until the sync return,
> and the current sas revalidate domain finish.
>
So with your changes we have a chain of synchronised events, running in
separate queues. In theory it sounds ok.
Then, as you said in the commit message, sas_revalidate_domain() holds
the disco mutex while *all* these chained events occur; so we will
continue to hold the mutex until we have revalidated the domain, meaning
until we have finished destroying or probing new devices.
But in the domain revalidation when you discover a new ATA device,
function sas_probe_sata() wants to grab the disco mutex and you just
temporarily release it, even though adding a new ATA device kicks in EH.
This defeats the principal of using a mutex at all, which is (according
to 87c8331 commit message) to mutually exclude the domain re-discovery
(which has not actually finished) and the ATA EH (and device destruction).
Anyway, since we are synchronising this series of events (broadcast
event, domain rediscovery, and device destruction), surely it should be
possible to include the ATA EH as well, so we can actually get rid of
the disco mutex finally, right?
Note: I think that there is a problem which you have not seen. Consider
removing a ATA disk with IO active conncted to an expander:
- LLDD sends brodcast event
- sas_revalidate_domain(), which grabs disco mutex
- revalidate finds dev is gone
- destruct device, which calls sas_rphy_delete
- this waits on command queue to drain
- commands time out and EH thread kicks in
- sas_ata_strategy_handler() called
- domain revalidation disable attempted
- try to grab disco mutex = Deadlock.
Thanks,
John
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_expander.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 9d26c28..077024e 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -776,6 +776,7 @@ static struct domain_device *sas_ex_discover_end_dev(
> struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
> struct domain_device *child = NULL;
> struct sas_rphy *rphy;
> + bool prev_lock;
> int res;
>
> if (phy->attached_sata_host || phy->attached_sata_ps)
> @@ -803,6 +804,7 @@ static struct domain_device *sas_ex_discover_end_dev(
> sas_ex_get_linkrate(parent, child, phy);
> sas_device_set_phy(child, phy->port);
>
> + prev_lock = mutex_is_locked(&child->port->ha->disco_mutex);
> #ifdef CONFIG_SCSI_SAS_ATA
> if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) {
> res = sas_get_ata_info(child, phy);
> @@ -832,7 +834,11 @@ static struct domain_device *sas_ex_discover_end_dev(
> SAS_ADDR(parent->sas_addr), phy_id, res);
> goto out_list_del;
> }
> + if (prev_lock)
> + mutex_unlock(&child->port->ha->disco_mutex);
> sas_disc_wait_completion(child->port, DISCE_PROBE);
> + if (prev_lock)
> + mutex_lock(&child->port->ha->disco_mutex);
>
> } else
> #endif
> @@ -861,7 +867,11 @@ static struct domain_device *sas_ex_discover_end_dev(
> SAS_ADDR(parent->sas_addr), phy_id, res);
> goto out_list_del;
> }
> + if (prev_lock)
> + mutex_unlock(&child->port->ha->disco_mutex);
> sas_disc_wait_completion(child->port, DISCE_PROBE);
> + if (prev_lock)
> + mutex_lock(&child->port->ha->disco_mutex);
> } else {
> SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
> phy->attached_tproto, SAS_ADDR(parent->sas_addr),
>
在 2017/7/14 0:10, John Garry 写道:
> On 10/07/2017 08:06, Yijing Wang wrote:
>> Disco mutex was introudced to prevent domain rediscovery competing
>> with ata error handling(87c8331). If we have already hold the lock
>> in sas_revalidate_domain and sync executing probe, deadlock caused,
>> because, sas_probe_sata() also need hold disco_mutex. Since disco mutex
>> use to prevent revalidata domain happen during ata error handler,
>> it should be safe to release disco mutex when sync probe, because
>> no new revalidate domain event would be process until the sync return,
>> and the current sas revalidate domain finish.
>>
>
> So with your changes we have a chain of synchronised events, running in separate queues. In theory it sounds ok.
>
> Then, as you said in the commit message, sas_revalidate_domain() holds the disco mutex while *all* these chained events occur; so we will continue to hold the mutex until we have revalidated the domain, meaning until we have finished destroying or probing new devices.
>
> But in the domain revalidation when you discover a new ATA device, function sas_probe_sata() wants to grab the disco mutex and you just temporarily release it, even though adding a new ATA device kicks in EH. This defeats the principal of using a mutex at all, which is (according to 87c8331 commit message) to mutually exclude the domain re-discovery (which has not actually finished) and the ATA EH (and device destruction).
>
> Anyway, since we are synchronising this series of events (broadcast event, domain rediscovery, and device destruction), surely it should be possible to include the ATA EH as well, so we can actually get rid of the disco mutex finally, right?
Yes, disco mutex make this issue complex, I checked the commit history, Dan introudce disco mutex and probe/destruct discovery event, so it seems to
need a big rework to the libsas process logic, I am so sorry that I have no more time to deal with it, I will leave today, if you like, you could
rework my patchset or add additional changes based this patchset.
>
> Note: I think that there is a problem which you have not seen. Consider removing a ATA disk with IO active conncted to an expander:
> - LLDD sends brodcast event
> - sas_revalidate_domain(), which grabs disco mutex
> - revalidate finds dev is gone
> - destruct device, which calls sas_rphy_delete
> - this waits on command queue to drain
> - commands time out and EH thread kicks in
> - sas_ata_strategy_handler() called
> - domain revalidation disable attempted
> - try to grab disco mutex = Deadlock.
Yes, it's a issue I haven't found.
Thanks!
Yijing.
Hi John, I also agree to rework disco mutex
>
> Thanks,
> John
>
>> Signed-off-by: Yijing Wang <[email protected]>
>> CC: John Garry <[email protected]>
>> CC: Johannes Thumshirn <[email protected]>
>> CC: Ewan Milne <[email protected]>
>> CC: Christoph Hellwig <[email protected]>
>> CC: Tomas Henzl <[email protected]>
>> CC: Dan Williams <[email protected]>
>> ---
>> drivers/scsi/libsas/sas_expander.c | 10 ++++++++++
>> 1 file changed, 10 insertions(+)
>>
>> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
>> index 9d26c28..077024e 100644
>> --- a/drivers/scsi/libsas/sas_expander.c
>> +++ b/drivers/scsi/libsas/sas_expander.c
>> @@ -776,6 +776,7 @@ static struct domain_device *sas_ex_discover_end_dev(
>> struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
>> struct domain_device *child = NULL;
>> struct sas_rphy *rphy;
>> + bool prev_lock;
>> int res;
>>
>> if (phy->attached_sata_host || phy->attached_sata_ps)
>> @@ -803,6 +804,7 @@ static struct domain_device *sas_ex_discover_end_dev(
>> sas_ex_get_linkrate(parent, child, phy);
>> sas_device_set_phy(child, phy->port);
>>
>> + prev_lock = mutex_is_locked(&child->port->ha->disco_mutex);
>> #ifdef CONFIG_SCSI_SAS_ATA
>> if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) {
>> res = sas_get_ata_info(child, phy);
>> @@ -832,7 +834,11 @@ static struct domain_device *sas_ex_discover_end_dev(
>> SAS_ADDR(parent->sas_addr), phy_id, res);
>> goto out_list_del;
>> }
>> + if (prev_lock)
>> + mutex_unlock(&child->port->ha->disco_mutex);
>> sas_disc_wait_completion(child->port, DISCE_PROBE);
>> + if (prev_lock)
>> + mutex_lock(&child->port->ha->disco_mutex);
>>
>> } else
>> #endif
>> @@ -861,7 +867,11 @@ static struct domain_device *sas_ex_discover_end_dev(
>> SAS_ADDR(parent->sas_addr), phy_id, res);
>> goto out_list_del;
>> }
>> + if (prev_lock)
>> + mutex_unlock(&child->port->ha->disco_mutex);
>> sas_disc_wait_completion(child->port, DISCE_PROBE);
>> + if (prev_lock)
>> + mutex_lock(&child->port->ha->disco_mutex);
>> } else {
>> SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
>> phy->attached_tproto, SAS_ADDR(parent->sas_addr),
>>
>
>
>
> .
>
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> Now libsas hotplug work is static, every sas event type has its own
> static work, LLDD driver queue the hotplug work into shost->work_q.
> If LLDD driver burst post lots hotplug events to libsas, the hotplug
> events may pending in the workqueue like
>
> shost->work_q
> new work[PORTE_BYTES_DMAED] --> |[PHYE_LOSS_OF_SIGNAL][PORTE_BYTES_DMAED] -> processing
> |<-------wait worker to process-------->|
> In this case, a new PORTE_BYTES_DMAED event coming, libsas try to queue it
> to shost->work_q, but this work is already pending, so it would be lost.
> Finally, libsas delete the related sas port and sas devices, but LLDD driver
> expect libsas add the sas port and devices(last sas event).
>
> This patch and use static sas event work pool to appease this issue, since
> it's static work pool, it won't make memory exhaust.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_event.c | 208 ++++++++++++++++++++++++++++++++-----
> drivers/scsi/libsas/sas_init.c | 6 --
> drivers/scsi/libsas/sas_internal.h | 3 +
> drivers/scsi/libsas/sas_phy.c | 48 +++------
> drivers/scsi/libsas/sas_port.c | 18 ++--
> include/scsi/libsas.h | 16 +--
> 6 files changed, 216 insertions(+), 83 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c
> index c0d0d97..a1370bd 100644
> --- a/drivers/scsi/libsas/sas_event.c
> +++ b/drivers/scsi/libsas/sas_event.c
> @@ -27,13 +27,20 @@
> #include "sas_internal.h"
> #include "sas_dump.h"
>
> +static DEFINE_SPINLOCK(sas_event_lock);
> +
> +static const work_func_t sas_ha_event_fns[HA_NUM_EVENTS] = {
> + [HAE_RESET] = sas_hae_reset,
> +};
> +
> int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw)
> {
> int rc = 0;
>
> if (!test_bit(SAS_HA_REGISTERED, &ha->state))
> - return 0;
> + return rc;
>
> + rc = 1;
> if (test_bit(SAS_HA_DRAINING, &ha->state)) {
> /* add it to the defer list, if not already pending */
> if (list_empty(&sw->drain_node))
> @@ -44,19 +51,15 @@ int sas_queue_work(struct sas_ha_struct *ha, struct sas_work *sw)
> return rc;
> }
>
> -static int sas_queue_event(int event, unsigned long *pending,
> - struct sas_work *work,
> +static int sas_queue_event(int event, struct sas_work *work,
> struct sas_ha_struct *ha)
> {
> int rc = 0;
> + unsigned long flags;
>
> - if (!test_and_set_bit(event, pending)) {
> - unsigned long flags;
> -
> - spin_lock_irqsave(&ha->lock, flags);
> - rc = sas_queue_work(ha, work);
> - spin_unlock_irqrestore(&ha->lock, flags);
> - }
> + spin_lock_irqsave(&ha->lock, flags);
> + rc = sas_queue_work(ha, work);
> + spin_unlock_irqrestore(&ha->lock, flags);
>
> return rc;
> }
> @@ -64,6 +67,8 @@ static int sas_queue_event(int event, unsigned long *pending,
>
> void __sas_drain_work(struct sas_ha_struct *ha)
> {
> + int ret;
> + unsigned long flags;
> struct workqueue_struct *wq = ha->core.shost->work_q;
> struct sas_work *sw, *_sw;
>
> @@ -78,7 +83,12 @@ void __sas_drain_work(struct sas_ha_struct *ha)
> clear_bit(SAS_HA_DRAINING, &ha->state);
> list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) {
> list_del_init(&sw->drain_node);
> - sas_queue_work(ha, sw);
> + ret = sas_queue_work(ha, sw);
> + if (ret != 1) {
> + spin_lock_irqsave(&sas_event_lock, flags);
> + sw->used = false;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + }
> }
> spin_unlock_irq(&ha->lock);
> }
> @@ -119,51 +129,197 @@ void sas_enable_revalidation(struct sas_ha_struct *ha)
> if (!test_and_clear_bit(ev, &d->pending))
> continue;
>
> - sas_queue_event(ev, &d->pending, &d->disc_work[ev].work, ha);
> + sas_queue_event(ev, &d->disc_work[ev].work, ha);
> }
> mutex_unlock(&ha->disco_mutex);
> }
>
> +static void sas_free_ha_event(struct sas_ha_event *event)
> +{
> + unsigned long flags;
> + spin_lock_irqsave(&sas_event_lock, flags);
> + event->work.used = false;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> +}
> +
> +static void sas_free_port_event(struct asd_sas_event *event)
> +{
> + unsigned long flags;
> + spin_lock_irqsave(&sas_event_lock, flags);
> + event->work.used = false;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> +}
> +
> +static void sas_free_phy_event(struct asd_sas_event *event)
> +{
> + unsigned long flags;
> + spin_lock_irqsave(&sas_event_lock, flags);
> + event->work.used = false;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> +}
> +
> +static void sas_ha_event_worker(struct work_struct *work)
> +{
> + struct sas_ha_event *ev = to_sas_ha_event(work);
> +
> + sas_ha_event_fns[ev->type](work);
> + sas_free_ha_event(ev);
> +}
> +
> +static void sas_port_event_worker(struct work_struct *work)
> +{
> + struct asd_sas_event *ev = to_asd_sas_event(work);
> +
> + sas_port_event_fns[ev->type](work);
> + sas_free_port_event(ev);
> +}
> +
> +static void sas_phy_event_worker(struct work_struct *work)
> +{
> + struct asd_sas_event *ev = to_asd_sas_event(work);
> +
> + sas_phy_event_fns[ev->type](work);
> + sas_free_phy_event(ev);
> +}
> +
> +static struct sas_ha_event *sas_alloc_ha_event(struct sas_ha_struct *sas_ha)
> +{
> + int i;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sas_event_lock, flags);
> + for (i = 0; i < HA_NUM_EVENTS; i++)
> + if (!sas_ha->ha_events[i].work.used)
> + break;
> +
> + if (i == HA_NUM_EVENTS) {
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + return NULL;
> + }
> +
> + sas_ha->ha_events[i].work.used = true;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + return &sas_ha->ha_events[i];
> +}
> +
> static int notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event)
> {
> + int ret;
> + struct sas_ha_event *ev;
> +
> BUG_ON(event >= HA_NUM_EVENTS);
>
> - return sas_queue_event(event, &sas_ha->pending,
> - &sas_ha->ha_events[event].work, sas_ha);
> + ev = sas_alloc_ha_event(sas_ha);
> + if (!ev) {
> + pr_err("%s: alloc sas ha event fail!\n", __func__);
> + return 0;
> + }
> +
> + INIT_SAS_WORK(&ev->work, sas_ha_event_worker);
> + ev->ha = sas_ha;
> + ev->type = event;
> + ret = sas_queue_event(event, &ev->work, sas_ha);
> + if (ret != 1)
> + sas_free_ha_event(ev);
> +
> + return ret;
> +}
> +
> +struct asd_sas_event *sas_alloc_port_event(struct asd_sas_phy *phy)
> +{
> + int i;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sas_event_lock, flags);
> + for (i = 0; i < PORT_POOL_SIZE; i++)
> + {
> + if (!phy->port_events[i].work.used)
> + break;
> + }
> +
> + if (i == PORT_POOL_SIZE) {
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + return NULL;
> + }
> +
> + phy->port_events[i].work.used = true;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + return &phy->port_events[i];
> }
>
> static int notify_port_event(struct asd_sas_phy *phy, enum port_event event)
> {
> + int ret;
> + struct asd_sas_event *ev;
> struct sas_ha_struct *ha = phy->ha;
>
> BUG_ON(event >= PORT_NUM_EVENTS);
>
> - return sas_queue_event(event, &phy->port_events_pending,
> - &phy->port_events[event].work, ha);
> + ev = sas_alloc_port_event(phy);
> + if (!ev) {
> + pr_err("%s: alloc sas port event fail!\n", __func__);
> + return 0;
> + }
> +
> + INIT_SAS_WORK(&ev->work, sas_port_event_worker);
> + ev->phy = phy;
> + ev->type = event;
> + ret = sas_queue_event(event, &ev->work, ha);
> + if (ret != 1)
> + sas_free_port_event(ev);
> +
> + return ret;
> }
>
> +struct asd_sas_event *sas_alloc_phy_event(struct asd_sas_phy *phy)
> +{
> + int i;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sas_event_lock, flags);
> + for (i = 0; i < PHY_POOL_SIZE; i++)
> + if (!phy->phy_events[i].work.used)
> + break;
> +
> + if (i == PHY_POOL_SIZE) {
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + return NULL;
> + }
> +
> + phy->phy_events[i].work.used = true;
> + spin_unlock_irqrestore(&sas_event_lock, flags);
> + return &phy->phy_events[i];
> +}
> int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event)
> {
> + int ret;
> + struct asd_sas_event *ev;
> struct sas_ha_struct *ha = phy->ha;
>
> BUG_ON(event >= PHY_NUM_EVENTS);
>
> - return sas_queue_event(event, &phy->phy_events_pending,
> - &phy->phy_events[event].work, ha);
> + ev = sas_alloc_phy_event(phy);
> + if (!ev) {
> + pr_err("%s: alloc sas phy event fail!\n", __func__);
> + return 0;
> + }
> +
> + INIT_SAS_WORK(&ev->work, sas_phy_event_worker);
> + ev->phy = phy;
> + ev->type = event;
> + ret = sas_queue_event(event, &ev->work, ha);
> + if (ret != 1)
> + sas_free_phy_event(ev);
> +
> + return ret;
> }
>
> int sas_init_events(struct sas_ha_struct *sas_ha)
> {
> - static const work_func_t sas_ha_event_fns[HA_NUM_EVENTS] = {
> - [HAE_RESET] = sas_hae_reset,
> - };
> -
> int i;
>
> - for (i = 0; i < HA_NUM_EVENTS; i++) {
> - INIT_SAS_WORK(&sas_ha->ha_events[i].work, sas_ha_event_fns[i]);
> - sas_ha->ha_events[i].ha = sas_ha;
> - }
> + for (i = 0; i < HA_NUM_EVENTS; i++)
> + sas_ha->ha_events[i].work.used = false;
>
> sas_ha->notify_ha_event = notify_ha_event;
> sas_ha->notify_port_event = notify_port_event;
> diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
> index 64e9cdd..c227a8b 100644
> --- a/drivers/scsi/libsas/sas_init.c
> +++ b/drivers/scsi/libsas/sas_init.c
> @@ -111,10 +111,6 @@ void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
>
> void sas_hae_reset(struct work_struct *work)
> {
> - struct sas_ha_event *ev = to_sas_ha_event(work);
> - struct sas_ha_struct *ha = ev->ha;
> -
> - clear_bit(HAE_RESET, &ha->pending);
> }
>
> int sas_register_ha(struct sas_ha_struct *sas_ha)
> @@ -375,8 +371,6 @@ void sas_prep_resume_ha(struct sas_ha_struct *ha)
> struct asd_sas_phy *phy = ha->sas_phy[i];
>
> memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
> - phy->port_events_pending = 0;
> - phy->phy_events_pending = 0;
> phy->frame_rcvd_size = 0;
> }
> }
> diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
> index a216c95..f03ce64 100644
> --- a/drivers/scsi/libsas/sas_internal.h
> +++ b/drivers/scsi/libsas/sas_internal.h
> @@ -97,6 +97,9 @@ void sas_hae_reset(struct work_struct *work);
>
> void sas_free_device(struct kref *kref);
>
> +extern const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS];
> +extern const work_func_t sas_port_event_fns[PORT_NUM_EVENTS];
> +
> #ifdef CONFIG_SCSI_SAS_HOST_SMP
> extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
> struct request *rsp);
> diff --git a/drivers/scsi/libsas/sas_phy.c b/drivers/scsi/libsas/sas_phy.c
> index cdee446c..07766ad 100644
> --- a/drivers/scsi/libsas/sas_phy.c
> +++ b/drivers/scsi/libsas/sas_phy.c
> @@ -35,7 +35,6 @@ static void sas_phye_loss_of_signal(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PHYE_LOSS_OF_SIGNAL, &phy->phy_events_pending);
> phy->error = 0;
> sas_deform_port(phy, 1);
> }
> @@ -45,7 +44,6 @@ static void sas_phye_oob_done(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PHYE_OOB_DONE, &phy->phy_events_pending);
> phy->error = 0;
> }
>
> @@ -58,8 +56,6 @@ static void sas_phye_oob_error(struct work_struct *work)
> struct sas_internal *i =
> to_sas_internal(sas_ha->core.shost->transportt);
>
> - clear_bit(PHYE_OOB_ERROR, &phy->phy_events_pending);
> -
> sas_deform_port(phy, 1);
>
> if (!port && phy->enabled && i->dft->lldd_control_phy) {
> @@ -88,8 +84,6 @@ static void sas_phye_spinup_hold(struct work_struct *work)
> struct sas_internal *i =
> to_sas_internal(sas_ha->core.shost->transportt);
>
> - clear_bit(PHYE_SPINUP_HOLD, &phy->phy_events_pending);
> -
> phy->error = 0;
> i->dft->lldd_control_phy(phy, PHY_FUNC_RELEASE_SPINUP_HOLD, NULL);
> }
> @@ -99,8 +93,6 @@ static void sas_phye_resume_timeout(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PHYE_RESUME_TIMEOUT, &phy->phy_events_pending);
> -
> /* phew, lldd got the phy back in the nick of time */
> if (!phy->suspended) {
> dev_info(&phy->phy->dev, "resume timeout cancelled\n");
> @@ -112,30 +104,12 @@ static void sas_phye_resume_timeout(struct work_struct *work)
> sas_deform_port(phy, 1);
> }
>
> -
> /* ---------- Phy class registration ---------- */
>
> int sas_register_phys(struct sas_ha_struct *sas_ha)
> {
> int i;
>
> - static const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
> - [PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
> - [PHYE_OOB_DONE] = sas_phye_oob_done,
> - [PHYE_OOB_ERROR] = sas_phye_oob_error,
> - [PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
> - [PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
> -
> - };
> -
> - static const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = {
> - [PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
> - [PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
> - [PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
> - [PORTE_TIMER_EVENT] = sas_porte_timer_event,
> - [PORTE_HARD_RESET] = sas_porte_hard_reset,
> - };
> -
> /* Now register the phys. */
> for (i = 0; i < sas_ha->num_phys; i++) {
> int k;
> @@ -143,15 +117,12 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
>
> phy->error = 0;
> INIT_LIST_HEAD(&phy->port_phy_el);
> - for (k = 0; k < PORT_NUM_EVENTS; k++) {
> - INIT_SAS_WORK(&phy->port_events[k].work, sas_port_event_fns[k]);
> - phy->port_events[k].phy = phy;
> - }
>
> - for (k = 0; k < PHY_NUM_EVENTS; k++) {
> - INIT_SAS_WORK(&phy->phy_events[k].work, sas_phy_event_fns[k]);
> - phy->phy_events[k].phy = phy;
> - }
> + for (k = 0; k < PORT_POOL_SIZE; k++)
> + phy->port_events[k].work.used = false;
> +
> + for (k = 0; k < PHY_POOL_SIZE; k++)
> + phy->phy_events[k].work.used = false;
>
> phy->port = NULL;
> phy->ha = sas_ha;
> @@ -179,3 +150,12 @@ int sas_register_phys(struct sas_ha_struct *sas_ha)
>
> return 0;
> }
> +
> +const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS] = {
> + [PHYE_LOSS_OF_SIGNAL] = sas_phye_loss_of_signal,
> + [PHYE_OOB_DONE] = sas_phye_oob_done,
> + [PHYE_OOB_ERROR] = sas_phye_oob_error,
> + [PHYE_SPINUP_HOLD] = sas_phye_spinup_hold,
> + [PHYE_RESUME_TIMEOUT] = sas_phye_resume_timeout,
> +
> +};
> diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
> index d3c5297..9326628 100644
> --- a/drivers/scsi/libsas/sas_port.c
> +++ b/drivers/scsi/libsas/sas_port.c
> @@ -261,8 +261,6 @@ void sas_porte_bytes_dmaed(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PORTE_BYTES_DMAED, &phy->port_events_pending);
> -
> sas_form_port(phy);
> }
>
> @@ -273,8 +271,6 @@ void sas_porte_broadcast_rcvd(struct work_struct *work)
> unsigned long flags;
> u32 prim;
>
> - clear_bit(PORTE_BROADCAST_RCVD, &phy->port_events_pending);
> -
> spin_lock_irqsave(&phy->sas_prim_lock, flags);
> prim = phy->sas_prim;
> spin_unlock_irqrestore(&phy->sas_prim_lock, flags);
> @@ -288,8 +284,6 @@ void sas_porte_link_reset_err(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PORTE_LINK_RESET_ERR, &phy->port_events_pending);
> -
> sas_deform_port(phy, 1);
> }
>
> @@ -298,8 +292,6 @@ void sas_porte_timer_event(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PORTE_TIMER_EVENT, &phy->port_events_pending);
> -
> sas_deform_port(phy, 1);
> }
>
> @@ -308,8 +300,6 @@ void sas_porte_hard_reset(struct work_struct *work)
> struct asd_sas_event *ev = to_asd_sas_event(work);
> struct asd_sas_phy *phy = ev->phy;
>
> - clear_bit(PORTE_HARD_RESET, &phy->port_events_pending);
> -
> sas_deform_port(phy, 1);
> }
>
> @@ -353,3 +343,11 @@ void sas_unregister_ports(struct sas_ha_struct *sas_ha)
> sas_deform_port(sas_ha->sas_phy[i], 0);
>
> }
> +
> +const work_func_t sas_port_event_fns[PORT_NUM_EVENTS] = {
> + [PORTE_BYTES_DMAED] = sas_porte_bytes_dmaed,
> + [PORTE_BROADCAST_RCVD] = sas_porte_broadcast_rcvd,
> + [PORTE_LINK_RESET_ERR] = sas_porte_link_reset_err,
> + [PORTE_TIMER_EVENT] = sas_porte_timer_event,
> + [PORTE_HARD_RESET] = sas_porte_hard_reset,
> +};
> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
> index cfaeed2..c41328d 100644
> --- a/include/scsi/libsas.h
> +++ b/include/scsi/libsas.h
> @@ -229,6 +229,8 @@ struct domain_device {
> struct sas_work {
> struct list_head drain_node;
> struct work_struct work;
> + struct list_head list;
> + bool used;
> };
>
> static inline void INIT_SAS_WORK(struct sas_work *sw, void (*fn)(struct work_struct *))
> @@ -300,6 +302,7 @@ struct asd_sas_port {
> struct asd_sas_event {
> struct sas_work work;
> struct asd_sas_phy *phy;
> + int type;
> };
>
> static inline struct asd_sas_event *to_asd_sas_event(struct work_struct *work)
> @@ -309,16 +312,16 @@ static inline struct asd_sas_event *to_asd_sas_event(struct work_struct *work)
> return ev;
> }
>
> +#define PORT_POOL_SIZE (PORT_NUM_EVENTS * 5)
> +#define PHY_POOL_SIZE (PHY_NUM_EVENTS * 5)
> +
> /* The phy pretty much is controlled by the LLDD.
> * The class only reads those fields.
> */
> struct asd_sas_phy {
> /* private: */
> - struct asd_sas_event port_events[PORT_NUM_EVENTS];
> - struct asd_sas_event phy_events[PHY_NUM_EVENTS];
> -
> - unsigned long port_events_pending;
> - unsigned long phy_events_pending;
> + struct asd_sas_event port_events[PORT_POOL_SIZE];
> + struct asd_sas_event phy_events[PHY_POOL_SIZE];
>
> int error;
> int suspended;
> @@ -365,6 +368,7 @@ struct scsi_core {
> struct sas_ha_event {
> struct sas_work work;
> struct sas_ha_struct *ha;
> + int type;
> };
>
> static inline struct sas_ha_event *to_sas_ha_event(struct work_struct *work)
> @@ -384,8 +388,6 @@ enum sas_ha_state {
> struct sas_ha_struct {
> /* private: */
> struct sas_ha_event ha_events[HA_NUM_EVENTS];
> - unsigned long pending;
> -
> struct list_head defer_q; /* work queued while draining */
> struct mutex drain_mutex;
> unsigned long state;
>
I would make 'PORT_NUM_EVENTS' and 'PHY_NUM_EVENTS' configurable (module
parameter?); after all, there _might_ be an event burst or even a large
fabric causing the driver to overflow the event table again.
Otherwise looks good to me.
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> No one uses the port_gone_completion in struct asd_sas_port,
> clean it out.
>
> Signed-off-by: Yijing Wang <[email protected]>
> ---
> include/scsi/libsas.h | 2 --
> 1 file changed, 2 deletions(-)
>
> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
> index c41328d..628f48b 100644
> --- a/include/scsi/libsas.h
> +++ b/include/scsi/libsas.h
> @@ -263,8 +263,6 @@ struct sas_discovery {
> /* The port struct is Class:RW, driver:RO */
> struct asd_sas_port {
> /* private: */
> - struct completion port_gone_completion;
> -
> struct sas_discovery disc;
> struct domain_device *port_dev;
> spinlock_t dev_list_lock;
>
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> Now all libsas works are queued to scsi host workqueue,
> include sas event work post by LLDD and sas discovery
> work, and a sas hotplug flow may be divided into several
> works, e.g libsas receive a PORTE_BYTES_DMAED event,
> now we process it as following steps:
> sas_form_port --- run in work in shot workq
> sas_discover_domain --- run in another work in shost workq
> ...
> sas_probe_devices --- run in new work in shost workq
> We found during hot-add a device, libsas may need run several
> works in same workqueue to add device in system, the process is
> not atomic, it may interrupt by other sas event works, like
> PHYE_LOSS_OF_SIGNAL. Finally, we would found lots unexpected
> errors. This patch is preparation of execute libsas sas event
> in sync.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_event.c | 4 ++--
> drivers/scsi/libsas/sas_init.c | 7 +++++++
> include/scsi/libsas.h | 1 +
> 3 files changed, 10 insertions(+), 2 deletions(-)
>
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> Introduce wait-complete for libsas sas event processing,
> execute sas port create/destruct in sync.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_discover.c | 41 ++++++++++++++++++++++++++++----------
> drivers/scsi/libsas/sas_internal.h | 34 +++++++++++++++++++++++++++++++
> drivers/scsi/libsas/sas_port.c | 4 ++++
> include/scsi/libsas.h | 5 ++++-
> 4 files changed, 72 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
> index 60de662..5d4a3a8 100644
> --- a/drivers/scsi/libsas/sas_discover.c
> +++ b/drivers/scsi/libsas/sas_discover.c
> @@ -525,16 +525,43 @@ static void sas_revalidate_domain(struct work_struct *work)
> mutex_unlock(&ha->disco_mutex);
> }
>
> +static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
> + [DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
> + [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
> + [DISCE_PROBE] = sas_probe_devices,
> + [DISCE_SUSPEND] = sas_suspend_devices,
> + [DISCE_RESUME] = sas_resume_devices,
> + [DISCE_DESTRUCT] = sas_destruct_devices,
> +};
> +
> +/* a simple wrapper for sas discover event funtions */
> +static void sas_discover_common_fn(struct work_struct *work)
> +{
> + struct sas_discovery_event *ev = to_sas_discovery_event(work);
> + struct asd_sas_port *port = ev->port;
> +
> + sas_event_fns[ev->type](work);
> + sas_port_put(port);
> +}
> +
> +
> /* ---------- Events ---------- */
>
> static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
> {
> + int ret;
> + struct sas_discovery_event *ev = to_sas_discovery_event(&sw->work);
> + struct asd_sas_port *port = ev->port;
> +
> /* chained work is not subject to SA_HA_DRAINING or
> * SAS_HA_REGISTERED, because it is either submitted in the
> * workqueue, or known to be submitted from a context that is
> * not racing against draining
> */
> - scsi_queue_work(ha->core.shost, &sw->work);
> + sas_port_get(port);
> + ret = scsi_queue_work(ha->core.shost, &sw->work);
> + if (ret != 1)
> + sas_port_put(port);
> }
>
> static void sas_chain_event(int event, unsigned long *pending,
> @@ -575,18 +602,10 @@ void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
> {
> int i;
>
> - static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
> - [DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
> - [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
> - [DISCE_PROBE] = sas_probe_devices,
> - [DISCE_SUSPEND] = sas_suspend_devices,
> - [DISCE_RESUME] = sas_resume_devices,
> - [DISCE_DESTRUCT] = sas_destruct_devices,
> - };
> -
> disc->pending = 0;
> for (i = 0; i < DISC_NUM_EVENTS; i++) {
> - INIT_SAS_WORK(&disc->disc_work[i].work, sas_event_fns[i]);
> + INIT_SAS_WORK(&disc->disc_work[i].work, sas_discover_common_fn);
> disc->disc_work[i].port = port;
> + disc->disc_work[i].type = i;
> }
> }
> diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
> index f03ce64..890b5d26 100644
> --- a/drivers/scsi/libsas/sas_internal.h
> +++ b/drivers/scsi/libsas/sas_internal.h
> @@ -100,6 +100,40 @@ void sas_free_device(struct kref *kref);
> extern const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS];
> extern const work_func_t sas_port_event_fns[PORT_NUM_EVENTS];
>
> +static void sas_complete_event(struct kref *kref)
> +{
> + struct asd_sas_port *port = container_of(kref, struct asd_sas_port, ref);
> +
> + complete_all(&port->completion);
> +}
> +
> +static inline void sas_port_put(struct asd_sas_port *port)
> +{
> + if (port->is_sync)
> + kref_put(&port->ref, sas_complete_event);
> +}
> +
> +static inline void sas_port_wait_init(struct asd_sas_port *port)
> +{
> + init_completion(&port->completion);
> + kref_init(&port->ref);
> + port->is_sync = true;
> +}
> +
> +static inline void sas_port_wait_completion(
> + struct asd_sas_port *port)
> +{
> + sas_port_put(port);
> + wait_for_completion(&port->completion);
> + port->is_sync = false;
> +}
> +
> +static inline void sas_port_get(struct asd_sas_port *port)
> +{
> + if (port && port->is_sync)
> + kref_get(&port->ref);
> +}
> +
> #ifdef CONFIG_SCSI_SAS_HOST_SMP
> extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
> struct request *rsp);
> diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
> index 9326628..d589adb 100644
> --- a/drivers/scsi/libsas/sas_port.c
> +++ b/drivers/scsi/libsas/sas_port.c
> @@ -191,7 +191,9 @@ static void sas_form_port(struct asd_sas_phy *phy)
> if (si->dft->lldd_port_formed)
> si->dft->lldd_port_formed(phy);
>
> + sas_port_wait_init(port);
> sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN);
> + sas_port_wait_completion(port);
> }
>
> /**
> @@ -218,7 +220,9 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
> dev->pathways--;
>
> if (port->num_phys == 1) {
> + sas_port_wait_init(port);
> sas_unregister_domain_devices(port, gone);
> + sas_port_wait_completion(port);
> sas_port_delete(port->port);
> port->port = NULL;
> } else {
I would rather use the standard on-stack completion here;
like this:
DECLARE_COMPLETION_ONSTACK(complete);
port->completion = &complete;
sas_unregister_domain_devices(port, gone);
wait_for_completion(&complete);
sas_port_delete(port->port);
which would simplify the above helpers to:
static inline void sas_port_put(struct asd_sas_port *port)
{
if (port->completion)
kref_put(&port->ref, sas_complete_event);
}
and you could do away with the 'is_sync' helper.
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> Sometimes, we want sync libsas probe or destruct in sas discovery work,
> like when libsas revalidate domain. We need to split probe and destruct
> work from the scsi host workqueue.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_discover.c | 13 ++++++++++++-
> drivers/scsi/libsas/sas_init.c | 8 ++++++++
> include/scsi/libsas.h | 1 +
> 3 files changed, 21 insertions(+), 1 deletion(-)
>
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> Introduce a sync flag to tag discovery event whether need to
> sync execute, per-event wait-complete ensure sync.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_discover.c | 8 ++++++--
> drivers/scsi/libsas/sas_expander.c | 12 +++++++++++-
> drivers/scsi/libsas/sas_internal.h | 27 +++++++++++++++++++++++++++
> include/scsi/libsas.h | 2 ++
> 4 files changed, 46 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
> index a25d648..d68f8dd 100644
> --- a/drivers/scsi/libsas/sas_discover.c
> +++ b/drivers/scsi/libsas/sas_discover.c
> @@ -378,6 +378,7 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
> list_del_init(&dev->disco_list_node);
> sas_rphy_free(dev->rphy);
> sas_unregister_common_dev(port, dev);
> + sas_disc_cancel_sync(&port->disc.disc_work[DISCE_DESTRUCT]);
> return;
> }
>
> @@ -541,6 +542,7 @@ static void sas_discover_common_fn(struct work_struct *work)
> struct asd_sas_port *port = ev->port;
>
> sas_event_fns[ev->type](work);
> + sas_disc_wakeup(ev);
> sas_port_put(port);
> }
>
> @@ -571,8 +573,10 @@ static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
> else
> ret = scsi_queue_work(ha->core.shost, &sw->work);
>
> - if (ret != 1)
> + if (ret != 1) {
> sas_port_put(port);
> + sas_disc_cancel_sync(ev);
> + }
> }
>
> static void sas_chain_event(int event, unsigned long *pending,
> @@ -592,9 +596,9 @@ int sas_discover_event(struct asd_sas_port *port, enum discover_event ev)
> {
> struct sas_discovery *disc;
>
> + disc = &port->disc;
> if (!port)
> return 0;
> - disc = &port->disc;
>
> BUG_ON(ev >= DISC_NUM_EVENTS);
>
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 570b2cb..9d26c28 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -822,14 +822,18 @@ static struct domain_device *sas_ex_discover_end_dev(
>
> list_add_tail(&child->disco_list_node, &parent->port->disco_list);
>
> + sas_disc_wait_init(child->port, DISCE_PROBE);
> res = sas_discover_sata(child);
> if (res) {
> + sas_disc_cancel_sync(&child->port->disc.disc_work[DISCE_PROBE]);
> SAS_DPRINTK("sas_discover_sata() for device %16llx at "
> "%016llx:0x%x returned 0x%x\n",
> SAS_ADDR(child->sas_addr),
> SAS_ADDR(parent->sas_addr), phy_id, res);
> goto out_list_del;
> }
> + sas_disc_wait_completion(child->port, DISCE_PROBE);
> +
> } else
> #endif
> if (phy->attached_tproto & SAS_PROTOCOL_SSP) {
> @@ -847,14 +851,17 @@ static struct domain_device *sas_ex_discover_end_dev(
>
> list_add_tail(&child->disco_list_node, &parent->port->disco_list);
>
> + sas_disc_wait_init(child->port, DISCE_PROBE);
> res = sas_discover_end_dev(child);
> if (res) {
> + sas_disc_cancel_sync(&child->port->disc.disc_work[DISCE_PROBE]);
> SAS_DPRINTK("sas_discover_end_dev() for device %16llx "
> "at %016llx:0x%x returned 0x%x\n",
> SAS_ADDR(child->sas_addr),
> SAS_ADDR(parent->sas_addr), phy_id, res);
> goto out_list_del;
> }
> + sas_disc_wait_completion(child->port, DISCE_PROBE);
> } else {
> SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
> phy->attached_tproto, SAS_ADDR(parent->sas_addr),
> @@ -1890,8 +1897,11 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
> if (child->dev_type == SAS_EDGE_EXPANDER_DEVICE ||
> child->dev_type == SAS_FANOUT_EXPANDER_DEVICE)
> sas_unregister_ex_tree(parent->port, child);
> - else
> + else {
> + sas_disc_wait_init(parent->port, DISCE_DESTRUCT);
> sas_unregister_dev(parent->port, child);
> + sas_disc_wait_completion(parent->port, DISCE_DESTRUCT);
> + }
> found = child;
> break;
> }
> diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
> index 890b5d26..09a9b10 100644
> --- a/drivers/scsi/libsas/sas_internal.h
> +++ b/drivers/scsi/libsas/sas_internal.h
> @@ -134,6 +134,33 @@ static inline void sas_port_get(struct asd_sas_port *port)
> kref_get(&port->ref);
> }
>
> +static inline void sas_disc_cancel_sync(struct sas_discovery_event *event)
> +{
> + event->is_sync = false;
> +}
> +
> +static inline void sas_disc_wakeup(struct sas_discovery_event *event)
> +{
> + if (event->is_sync)
> + complete(&event->completion);
> +}
> +
> +static inline void sas_disc_wait_init(struct asd_sas_port *port,
> + enum discover_event event)
> +{
> + port->disc.disc_work[event].is_sync = true;
> + init_completion(&port->disc.disc_work[event].completion);
> +}
> +
> +static inline void sas_disc_wait_completion(struct asd_sas_port *port,
> + enum discover_event event)
> +{
> + if (port->disc.disc_work[event].is_sync) {
> + wait_for_completion(&port->disc.disc_work[event].completion);
> + port->disc.disc_work[event].is_sync = false;
> + }
> +}
> +
> #ifdef CONFIG_SCSI_SAS_HOST_SMP
> extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
> struct request *rsp);
> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
> index 4bcb9fe..21e9fb140 100644
> --- a/include/scsi/libsas.h
> +++ b/include/scsi/libsas.h
> @@ -243,6 +243,8 @@ struct sas_discovery_event {
> struct sas_work work;
> struct asd_sas_port *port;
> enum discover_event type;
> + bool is_sync;
> + struct completion completion;
> };
>
> static inline struct sas_discovery_event *to_sas_discovery_event(struct work_struct *work)
>
Some comments as the earlier patch: please use DECLARE_COMPETION_ONSTACK
here.
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
On 07/10/2017 09:06 AM, Yijing Wang wrote:
> Disco mutex was introudced to prevent domain rediscovery competing
> with ata error handling(87c8331). If we have already hold the lock
> in sas_revalidate_domain and sync executing probe, deadlock caused,
> because, sas_probe_sata() also need hold disco_mutex. Since disco mutex
> use to prevent revalidata domain happen during ata error handler,
> it should be safe to release disco mutex when sync probe, because
> no new revalidate domain event would be process until the sync return,
> and the current sas revalidate domain finish.
>
> Signed-off-by: Yijing Wang <[email protected]>
> CC: John Garry <[email protected]>
> CC: Johannes Thumshirn <[email protected]>
> CC: Ewan Milne <[email protected]>
> CC: Christoph Hellwig <[email protected]>
> CC: Tomas Henzl <[email protected]>
> CC: Dan Williams <[email protected]>
> ---
> drivers/scsi/libsas/sas_expander.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 9d26c28..077024e 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -776,6 +776,7 @@ static struct domain_device *sas_ex_discover_end_dev(
> struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
> struct domain_device *child = NULL;
> struct sas_rphy *rphy;
> + bool prev_lock;
> int res;
>
> if (phy->attached_sata_host || phy->attached_sata_ps)
> @@ -803,6 +804,7 @@ static struct domain_device *sas_ex_discover_end_dev(
> sas_ex_get_linkrate(parent, child, phy);
> sas_device_set_phy(child, phy->port);
>
> + prev_lock = mutex_is_locked(&child->port->ha->disco_mutex);
> #ifdef CONFIG_SCSI_SAS_ATA
> if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) {
> res = sas_get_ata_info(child, phy);
> @@ -832,7 +834,11 @@ static struct domain_device *sas_ex_discover_end_dev(
> SAS_ADDR(parent->sas_addr), phy_id, res);
> goto out_list_del;
> }
> + if (prev_lock)
> + mutex_unlock(&child->port->ha->disco_mutex);
> sas_disc_wait_completion(child->port, DISCE_PROBE);
> + if (prev_lock)
> + mutex_lock(&child->port->ha->disco_mutex);
>
> } else
> #endif
> @@ -861,7 +867,11 @@ static struct domain_device *sas_ex_discover_end_dev(
> SAS_ADDR(parent->sas_addr), phy_id, res);
> goto out_list_del;
> }
> + if (prev_lock)
> + mutex_unlock(&child->port->ha->disco_mutex);
> sas_disc_wait_completion(child->port, DISCE_PROBE);
> + if (prev_lock)
> + mutex_lock(&child->port->ha->disco_mutex);
> } else {
> SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
> phy->attached_tproto, SAS_ADDR(parent->sas_addr),
>
I would rather have an analysis if this really cannot happen; 'should
not' is rather vague. But seeing that it _is_ quite complex:
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Teamlead Storage & Networking
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
在 2017/7/14 14:51, Hannes Reinecke 写道:
> On 07/10/2017 09:06 AM, Yijing Wang wrote:
>> Introduce wait-complete for libsas sas event processing,
>> execute sas port create/destruct in sync.
>>
>> Signed-off-by: Yijing Wang <[email protected]>
>> CC: John Garry <[email protected]>
>> CC: Johannes Thumshirn <[email protected]>
>> CC: Ewan Milne <[email protected]>
>> CC: Christoph Hellwig <[email protected]>
>> CC: Tomas Henzl <[email protected]>
>> CC: Dan Williams <[email protected]>
>> ---
>> drivers/scsi/libsas/sas_discover.c | 41 ++++++++++++++++++++++++++++----------
>> drivers/scsi/libsas/sas_internal.h | 34 +++++++++++++++++++++++++++++++
>> drivers/scsi/libsas/sas_port.c | 4 ++++
>> include/scsi/libsas.h | 5 ++++-
>> 4 files changed, 72 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
>> index 60de662..5d4a3a8 100644
>> --- a/drivers/scsi/libsas/sas_discover.c
>> +++ b/drivers/scsi/libsas/sas_discover.c
>> @@ -525,16 +525,43 @@ static void sas_revalidate_domain(struct work_struct *work)
>> mutex_unlock(&ha->disco_mutex);
>> }
>>
>> +static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
>> + [DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
>> + [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
>> + [DISCE_PROBE] = sas_probe_devices,
>> + [DISCE_SUSPEND] = sas_suspend_devices,
>> + [DISCE_RESUME] = sas_resume_devices,
>> + [DISCE_DESTRUCT] = sas_destruct_devices,
>> +};
>> +
>> +/* a simple wrapper for sas discover event funtions */
>> +static void sas_discover_common_fn(struct work_struct *work)
>> +{
>> + struct sas_discovery_event *ev = to_sas_discovery_event(work);
>> + struct asd_sas_port *port = ev->port;
>> +
>> + sas_event_fns[ev->type](work);
>> + sas_port_put(port);
>> +}
>> +
>> +
>> /* ---------- Events ---------- */
>>
>> static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
>> {
>> + int ret;
>> + struct sas_discovery_event *ev = to_sas_discovery_event(&sw->work);
>> + struct asd_sas_port *port = ev->port;
>> +
>> /* chained work is not subject to SA_HA_DRAINING or
>> * SAS_HA_REGISTERED, because it is either submitted in the
>> * workqueue, or known to be submitted from a context that is
>> * not racing against draining
>> */
>> - scsi_queue_work(ha->core.shost, &sw->work);
>> + sas_port_get(port);
>> + ret = scsi_queue_work(ha->core.shost, &sw->work);
>> + if (ret != 1)
>> + sas_port_put(port);
>> }
>>
>> static void sas_chain_event(int event, unsigned long *pending,
>> @@ -575,18 +602,10 @@ void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
>> {
>> int i;
>>
>> - static const work_func_t sas_event_fns[DISC_NUM_EVENTS] = {
>> - [DISCE_DISCOVER_DOMAIN] = sas_discover_domain,
>> - [DISCE_REVALIDATE_DOMAIN] = sas_revalidate_domain,
>> - [DISCE_PROBE] = sas_probe_devices,
>> - [DISCE_SUSPEND] = sas_suspend_devices,
>> - [DISCE_RESUME] = sas_resume_devices,
>> - [DISCE_DESTRUCT] = sas_destruct_devices,
>> - };
>> -
>> disc->pending = 0;
>> for (i = 0; i < DISC_NUM_EVENTS; i++) {
>> - INIT_SAS_WORK(&disc->disc_work[i].work, sas_event_fns[i]);
>> + INIT_SAS_WORK(&disc->disc_work[i].work, sas_discover_common_fn);
>> disc->disc_work[i].port = port;
>> + disc->disc_work[i].type = i;
>> }
>> }
>> diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
>> index f03ce64..890b5d26 100644
>> --- a/drivers/scsi/libsas/sas_internal.h
>> +++ b/drivers/scsi/libsas/sas_internal.h
>> @@ -100,6 +100,40 @@ void sas_free_device(struct kref *kref);
>> extern const work_func_t sas_phy_event_fns[PHY_NUM_EVENTS];
>> extern const work_func_t sas_port_event_fns[PORT_NUM_EVENTS];
>>
>> +static void sas_complete_event(struct kref *kref)
>> +{
>> + struct asd_sas_port *port = container_of(kref, struct asd_sas_port, ref);
>> +
>> + complete_all(&port->completion);
>> +}
>> +
>> +static inline void sas_port_put(struct asd_sas_port *port)
>> +{
>> + if (port->is_sync)
>> + kref_put(&port->ref, sas_complete_event);
>> +}
>> +
>> +static inline void sas_port_wait_init(struct asd_sas_port *port)
>> +{
>> + init_completion(&port->completion);
>> + kref_init(&port->ref);
>> + port->is_sync = true;
>> +}
>> +
>> +static inline void sas_port_wait_completion(
>> + struct asd_sas_port *port)
>> +{
>> + sas_port_put(port);
>> + wait_for_completion(&port->completion);
>> + port->is_sync = false;
>> +}
>> +
>> +static inline void sas_port_get(struct asd_sas_port *port)
>> +{
>> + if (port && port->is_sync)
>> + kref_get(&port->ref);
>> +}
>> +
>> #ifdef CONFIG_SCSI_SAS_HOST_SMP
>> extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
>> struct request *rsp);
>> diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
>> index 9326628..d589adb 100644
>> --- a/drivers/scsi/libsas/sas_port.c
>> +++ b/drivers/scsi/libsas/sas_port.c
>> @@ -191,7 +191,9 @@ static void sas_form_port(struct asd_sas_phy *phy)
>> if (si->dft->lldd_port_formed)
>> si->dft->lldd_port_formed(phy);
>>
>> + sas_port_wait_init(port);
>> sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN);
>> + sas_port_wait_completion(port);
>> }
>>
>> /**
>> @@ -218,7 +220,9 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
>> dev->pathways--;
>>
>> if (port->num_phys == 1) {
>> + sas_port_wait_init(port);
>> sas_unregister_domain_devices(port, gone);
>> + sas_port_wait_completion(port);
>> sas_port_delete(port->port);
>> port->port = NULL;
>> } else {
>
> I would rather use the standard on-stack completion here;
> like this:
>
> DECLARE_COMPLETION_ONSTACK(complete);
> port->completion = &complete;
> sas_unregister_domain_devices(port, gone);
> wait_for_completion(&complete);
> sas_port_delete(port->port);
>
> which would simplify the above helpers to:
>
> static inline void sas_port_put(struct asd_sas_port *port)
> {
> if (port->completion)
> kref_put(&port->ref, sas_complete_event);
> }
>
> and you could do away with the 'is_sync' helper.
It looks better, thanks!
>
> Cheers,
>
> Hannes
>
Hi, I'm sorry to say that I have to stop the libsas hotplug improvement work, I will resign from
Huawei, so I have no time and hardware to continue to work at this issue. John is very familiar with
this work, and provide a lot of good suggestions. So if John like, I am glad he could join to work
at this issues, And my colleague Jason Yan could also provide helps.
Thanks!
Yijing.
在 2017/7/10 15:06, Yijing Wang 写道:
> This patchset is based Johannes's patch
> "scsi: sas: scsi_queue_work can fail, so make callers aware"
>
> Now the libsas hotplug has some issues, Dan Williams report
> a similar bug here before
> https://www.mail-archive.com/[email protected]/msg39187.html
>
> The issues we have found
> 1. if LLDD burst reports lots of phy-up/phy-down sas events, some events
> may lost because a same sas events is pending now, finally libsas topo
> may different the hardware.
> 2. receive a phy down sas event, libsas call sas_deform_port to remove
> devices, it would first delete the sas port, then put a destruction
> discovery event in a new work, and queue it at the tail of workqueue,
> once the sas port be deleted, its children device will be deleted too,
> when the destruction work start, it will found the target device has
> been removed, and report a sysfs warnning.
> 3. since a hotplug process will be devided into several works, if a phy up
> sas event insert into phydown works, like
> destruction work ---> PORTE_BYTES_DMAED (sas_form_port) ---->PHYE_LOSS_OF_SIGNAL
> the hot remove flow would broken by PORTE_BYTES_DMAED event, it's not
> we expected, and issues would occur.
>
> The first patch fix the sas events lost, and the second one introudce wait-complete
> to fix the hotplug order issues.
>
> v2->v3: some code improvements suggested by Johannes and John,
> split v2 patch 2 into several small pathes.
> v1->v2: some code improvements suggested by John Garry
>
> Yijing Wang (7):
> libsas: Use static sas event pool to appease sas event lost
> libsas: remove unused port_gone_completion
> libsas: Use new workqueue to run sas event
> libsas: add sas event wait-complete support
> libsas: add a new workqueue to run probe/destruct discovery event
> libsas: add wait-complete support to sync discovery event
> libsas: release disco mutex during waiting in sas_ex_discover_end_dev
>
> drivers/scsi/libsas/sas_discover.c | 58 +++++++---
> drivers/scsi/libsas/sas_event.c | 212 ++++++++++++++++++++++++++++++++-----
> drivers/scsi/libsas/sas_expander.c | 22 +++-
> drivers/scsi/libsas/sas_init.c | 21 ++--
> drivers/scsi/libsas/sas_internal.h | 64 +++++++++++
> drivers/scsi/libsas/sas_phy.c | 48 +++------
> drivers/scsi/libsas/sas_port.c | 22 ++--
> include/scsi/libsas.h | 27 +++--
> 8 files changed, 373 insertions(+), 101 deletions(-)
>
On 14/07/2017 02:44, wangyijing wrote:
>
>
> 在 2017/7/14 0:10, John Garry 写道:
>> On 10/07/2017 08:06, Yijing Wang wrote:
>>> Disco mutex was introudced to prevent domain rediscovery competing
>>> with ata error handling(87c8331). If we have already hold the lock
>>> in sas_revalidate_domain and sync executing probe, deadlock caused,
>>> because, sas_probe_sata() also need hold disco_mutex. Since disco mutex
>>> use to prevent revalidata domain happen during ata error handler,
>>> it should be safe to release disco mutex when sync probe, because
>>> no new revalidate domain event would be process until the sync return,
>>> and the current sas revalidate domain finish.
>>>
>>
>> So with your changes we have a chain of synchronised events, running in separate queues. In theory it sounds ok.
>>
>> Then, as you said in the commit message, sas_revalidate_domain() holds the disco mutex while *all* these chained events occur; so we will continue to hold the mutex until we have revalidated the domain, meaning until we have finished destroying or probing new devices.
>>
>> But in the domain revalidation when you discover a new ATA device, function sas_probe_sata() wants to grab the disco mutex and you just temporarily release it, even though adding a new ATA device kicks in EH. This defeats the principal of using a mutex at all, which is (according to 87c8331 commit message) to mutually exclude the domain re-discovery (which has not actually finished) and the ATA EH (and device destruction).
>>
>> Anyway, since we are synchronising this series of events (broadcast event, domain rediscovery, and device destruction), surely it should be possible to include the ATA EH as well, so we can actually get rid of the disco mutex finally, right?
>
> Yes, disco mutex make this issue complex, I checked the commit history, Dan introudce disco mutex and probe/destruct discovery event, so it seems to
> need a big rework to the libsas process logic, I am so sorry that I have no more time to deal with it, I will leave today, if you like, you could
> rework my patchset or add additional changes based this patchset.
>
>
Since we are now synchronising everything, we should work on removing
the disco mutex. After all, that is what a mutex is for - synchronising.
But the devil is in the detail...
>
>>
>> Note: I think that there is a problem which you have not seen. Consider removing a ATA disk with IO active conncted to an expander:
>> - LLDD sends brodcast event
>> - sas_revalidate_domain(), which grabs disco mutex
>> - revalidate finds dev is gone
>> - destruct device, which calls sas_rphy_delete
>> - this waits on command queue to drain
>> - commands time out and EH thread kicks in
>> - sas_ata_strategy_handler() called
>> - domain revalidation disable attempted
>> - try to grab disco mutex = Deadlock.
>
> Yes, it's a issue I haven't found.
>
>
> Thanks!
> Yijing.
>
>
>
>
> Hi John, I also agree to rework disco mutex
>
>
>>
>> Thanks,
>> John
>>
>>> Signed-off-by: Yijing Wang <[email protected]>
>>> CC: John Garry <[email protected]>
>>> CC: Johannes Thumshirn <[email protected]>
>>> CC: Ewan Milne <[email protected]>
>>> CC: Christoph Hellwig <[email protected]>
>>> CC: Tomas Henzl <[email protected]>
>>> CC: Dan Williams <[email protected]>
>>> ---
>>> drivers/scsi/libsas/sas_expander.c | 10 ++++++++++
>>> 1 file changed, 10 insertions(+)
>>>
>>> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
>>> index 9d26c28..077024e 100644
>>> --- a/drivers/scsi/libsas/sas_expander.c
>>> +++ b/drivers/scsi/libsas/sas_expander.c
>>> @@ -776,6 +776,7 @@ static struct domain_device *sas_ex_discover_end_dev(
>>> struct ex_phy *phy = &parent_ex->ex_phy[phy_id];
>>> struct domain_device *child = NULL;
>>> struct sas_rphy *rphy;
>>> + bool prev_lock;
>>> int res;
>>>
>>> if (phy->attached_sata_host || phy->attached_sata_ps)
>>> @@ -803,6 +804,7 @@ static struct domain_device *sas_ex_discover_end_dev(
>>> sas_ex_get_linkrate(parent, child, phy);
>>> sas_device_set_phy(child, phy->port);
>>>
>>> + prev_lock = mutex_is_locked(&child->port->ha->disco_mutex);
>>> #ifdef CONFIG_SCSI_SAS_ATA
>>> if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) {
>>> res = sas_get_ata_info(child, phy);
>>> @@ -832,7 +834,11 @@ static struct domain_device *sas_ex_discover_end_dev(
>>> SAS_ADDR(parent->sas_addr), phy_id, res);
>>> goto out_list_del;
>>> }
>>> + if (prev_lock)
>>> + mutex_unlock(&child->port->ha->disco_mutex);
>>> sas_disc_wait_completion(child->port, DISCE_PROBE);
>>> + if (prev_lock)
>>> + mutex_lock(&child->port->ha->disco_mutex);
>>>
>>> } else
>>> #endif
>>> @@ -861,7 +867,11 @@ static struct domain_device *sas_ex_discover_end_dev(
>>> SAS_ADDR(parent->sas_addr), phy_id, res);
>>> goto out_list_del;
>>> }
>>> + if (prev_lock)
>>> + mutex_unlock(&child->port->ha->disco_mutex);
>>> sas_disc_wait_completion(child->port, DISCE_PROBE);
>>> + if (prev_lock)
>>> + mutex_lock(&child->port->ha->disco_mutex);
>>> } else {
>>> SAS_DPRINTK("target proto 0x%x at %016llx:0x%x not handled\n",
>>> phy->attached_tproto, SAS_ADDR(parent->sas_addr),
>>>
>>
>>
>>
>> .
>>
>
>
> .
>
On 14/07/2017 07:51, Hannes Reinecke wrote:
>> #ifdef CONFIG_SCSI_SAS_HOST_SMP
>> > extern int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
>> > struct request *rsp);
>> > diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
>> > index 9326628..d589adb 100644
>> > --- a/drivers/scsi/libsas/sas_port.c
>> > +++ b/drivers/scsi/libsas/sas_port.c
>> > @@ -191,7 +191,9 @@ static void sas_form_port(struct asd_sas_phy *phy)
>> > if (si->dft->lldd_port_formed)
>> > si->dft->lldd_port_formed(phy);
>> >
>> > + sas_port_wait_init(port);
>> > sas_discover_event(phy->port, DISCE_DISCOVER_DOMAIN);
>> > + sas_port_wait_completion(port);
>> > }
>> >
>> > /**
>> > @@ -218,7 +220,9 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)
>> > dev->pathways--;
>> >
Hannes thanks for checking.
>> > if (port->num_phys == 1) {
>> > + sas_port_wait_init(port);
>> > sas_unregister_domain_devices(port, gone);
>> > + sas_port_wait_completion(port);
>> > sas_port_delete(port->port);
>> > port->port = NULL;
>> > } else {
> I would rather use the standard on-stack completion here;
> like this:
>
> DECLARE_COMPLETION_ONSTACK(complete);
> port->completion = &complete;
> sas_unregister_domain_devices(port, gone);
> wait_for_completion(&complete);
> sas_port_delete(port->port);
>
> which would simplify the above helpers to:
>
> static inline void sas_port_put(struct asd_sas_port *port)
> {
> if (port->completion)
> kref_put(&port->ref, sas_complete_event);
> }
>
> and you could do away with the 'is_sync' helper.
>
I did wonder if we could avoid using completion altogether and just
flush the respective queue which the work item is being processed in.
But, due to the intricacy of SCSI/ATA EH, and since we still use shost
workqueue for the libsas hotplug processing, maybe it best to keep it
straightforward and keep using completions.
Anyway, The idea to declare the completion on the stack seems sound.
And, for patch 6/7, I don't think the is_sync element is even required
without any change to declaration of completion in struct
sas_discovery_event.
John
> Cheers,
>
> Hannes
> --