2020-08-27 10:00:45

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH 0/4] qla2xxx: A couple crash fixes

Hi,

The first crash we observed is due memory corruption in the srb memory
pool. Unforuntatly, I couldn't find the source of the problem but the
workaround by resetting the cleanup callbacks 'fixes' this problem
(patch #1). I think as intermeditate step this should be merged until
the real cause can be identified.

The second crash is due a race condition(?) in the firmware. The sts
entries are not updated in time which leads to this crash pattern
which several customers have reported:

#0 [c00000ffffd1bb80] scsi_dma_unmap at d00000001e4904d4 [scsi_mod]
#1 [c00000ffffd1bbe0] qla2x00_sp_compl at d0000000204803cc [qla2xxx]
#2 [c00000ffffd1bc20] qla24xx_process_response_queue at d0000000204c5810 [qla2xxx]
#3 [c00000ffffd1bd50] qla24xx_msix_rsp_q at d0000000204c8fd8 [qla2xxx]
#4 [c00000ffffd1bde0] __handle_irq_event_percpu at c000000000189510
#5 [c00000ffffd1bea0] handle_irq_event_percpu at c00000000018978c
#6 [c00000ffffd1bee0] handle_irq_event at c00000000018984c
#7 [c00000ffffd1bf10] handle_fasteoi_irq at c00000000018efc0
#8 [c00000ffffd1bf40] generic_handle_irq at c000000000187f10
#9 [c00000ffffd1bf60] __do_irq at c000000000018784
#10 [c00000ffffd1bf90] call_do_irq at c00000000002caa4
#11 [c00000ecca417a00] do_IRQ at c000000000018970
#12 [c00000ecca417a50] restore_check_irq_replay at c00000000000de98

From analyzing the crash dump it was clear that
qla24xx_mbx_iocb_entry() calls sp->done (qla2x00_sp_compl) which
crashes because the response is not a mailbox entry, it is a status
entry. Patch #4 changes the process logic for mailbox commands so that
the sp is parsed before calling the correct proccess function.

Thanks,
Daniel

Daniel Wagner (4):
qla2xxx: Reset done and free callback pointer on release
qla2xxx: Simplify return value logic in qla2x00_get_sp_from_handle()
qla2xxx: Drop unused function argument from
qla2x00_get_sp_from_handle()
qla2xxx: Handle incorrect entry_type entries

drivers/scsi/qla2xxx/qla_gbl.h | 3 +-
drivers/scsi/qla2xxx/qla_inline.h | 2 ++
drivers/scsi/qla2xxx/qla_isr.c | 72 ++++++++++++++++++++++-----------------
drivers/scsi/qla2xxx/qla_mr.c | 9 ++---
4 files changed, 47 insertions(+), 39 deletions(-)

--
2.16.4


2020-08-27 10:01:57

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH 3/4] qla2xxx: Drop unused function argument from qla2x00_get_sp_from_handle()

Commit 7c3df1320e5e ("[SCSI] qla2xxx: Code changes to support new
dynamic logging infrastructure.") removed the use of the func
argument.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/scsi/qla2xxx/qla_gbl.h | 3 +--
drivers/scsi/qla2xxx/qla_isr.c | 36 ++++++++++++------------------------
drivers/scsi/qla2xxx/qla_mr.c | 9 +++------
3 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 0ced18f3104e..bbe3dca6d0ab 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -561,8 +561,7 @@ extern void qla2x00_free_irqs(scsi_qla_host_t *);
extern int qla2x00_get_data_rate(scsi_qla_host_t *);
extern const char *qla2x00_get_link_speed_str(struct qla_hw_data *, uint16_t);
extern srb_t *
-qla2x00_get_sp_from_handle(scsi_qla_host_t *, const char *, struct req_que *,
- void *);
+qla2x00_get_sp_from_handle(scsi_qla_host_t *, struct req_que *, void *);
extern void
qla2x00_process_completed_request(struct scsi_qla_host *, struct req_que *,
uint32_t);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 5d278155e4e7..b787643f5031 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1711,8 +1711,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
}

srb_t *
-qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
- struct req_que *req, void *iocb)
+qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, struct req_que *req, void *iocb)
{
struct qla_hw_data *ha = vha->hw;
sts_entry_t *pkt = iocb;
@@ -1750,7 +1749,6 @@ static void
qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct mbx_entry *mbx)
{
- const char func[] = "MBX-IOCB";
const char *type;
fc_port_t *fcport;
srb_t *sp;
@@ -1758,7 +1756,7 @@ qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
uint16_t *data;
uint16_t status;

- sp = qla2x00_get_sp_from_handle(vha, func, req, mbx);
+ sp = qla2x00_get_sp_from_handle(vha, req, mbx);
if (!sp)
return;

@@ -1836,13 +1834,12 @@ static void
qla24xx_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct mbx_24xx_entry *pkt)
{
- const char func[] = "MBX-IOCB2";
srb_t *sp;
struct srb_iocb *si;
u16 sz, i;
int res;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -1861,11 +1858,10 @@ static void
qla24xxx_nack_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct nack_to_isp *pkt)
{
- const char func[] = "nack";
srb_t *sp;
int res = 0;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -1879,7 +1875,6 @@ static void
qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
sts_entry_t *pkt, int iocb_type)
{
- const char func[] = "CT_IOCB";
const char *type;
srb_t *sp;
struct bsg_job *bsg_job;
@@ -1887,7 +1882,7 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
uint16_t comp_status;
int res = 0;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -1952,7 +1947,6 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
struct sts_entry_24xx *pkt, int iocb_type)
{
struct els_sts_entry_24xx *ese = (struct els_sts_entry_24xx *)pkt;
- const char func[] = "ELS_CT_IOCB";
const char *type;
srb_t *sp;
struct bsg_job *bsg_job;
@@ -1962,7 +1956,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
int res;
struct srb_iocb *els;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -2077,7 +2071,6 @@ static void
qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
struct logio_entry_24xx *logio)
{
- const char func[] = "LOGIO-IOCB";
const char *type;
fc_port_t *fcport;
srb_t *sp;
@@ -2085,7 +2078,7 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
uint16_t *data;
uint32_t iop[2];

- sp = qla2x00_get_sp_from_handle(vha, func, req, logio);
+ sp = qla2x00_get_sp_from_handle(vha, req, logio);
if (!sp)
return;

@@ -2206,14 +2199,13 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
static void
qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk)
{
- const char func[] = "TMF-IOCB";
const char *type;
fc_port_t *fcport;
srb_t *sp;
struct srb_iocb *iocb;
struct sts_entry_24xx *sts = (struct sts_entry_24xx *)tsk;

- sp = qla2x00_get_sp_from_handle(vha, func, req, tsk);
+ sp = qla2x00_get_sp_from_handle(vha, req, tsk);
if (!sp)
return;

@@ -2385,11 +2377,10 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
static void qla_ctrlvp_completed(scsi_qla_host_t *vha, struct req_que *req,
struct vp_ctrl_entry_24xx *vce)
{
- const char func[] = "CTRLVP-IOCB";
srb_t *sp;
int rval = QLA_SUCCESS;

- sp = qla2x00_get_sp_from_handle(vha, func, req, vce);
+ sp = qla2x00_get_sp_from_handle(vha, req, vce);
if (!sp)
return;

@@ -3287,7 +3278,6 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
{
srb_t *sp;
struct qla_hw_data *ha = vha->hw;
- const char func[] = "ERROR-IOCB";
uint16_t que = MSW(pkt->handle);
struct req_que *req = NULL;
int res = DID_ERROR << 16;
@@ -3317,7 +3307,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
case ABORT_IOCB_TYPE:
case MBX_IOCB_TYPE:
default:
- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (sp) {
sp->done(sp, res);
return 0;
@@ -3376,11 +3366,10 @@ static void
qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct abort_entry_24xx *pkt)
{
- const char func[] = "ABT_IOCB";
srb_t *sp;
struct srb_iocb *abt;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -3393,10 +3382,9 @@ void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *vha,
struct pt_ls4_request *pkt, struct req_que *req)
{
srb_t *sp;
- const char func[] = "LS4_IOCB";
uint16_t comp_status;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index a8fe4f725fa0..ba41c78d063c 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2187,11 +2187,10 @@ static void
qlafx00_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct abort_iocb_entry_fx00 *pkt)
{
- const char func[] = "ABT_IOCB";
srb_t *sp;
struct srb_iocb *abt;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -2204,7 +2203,6 @@ static void
qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct ioctl_iocb_entry_fx00 *pkt)
{
- const char func[] = "IOSB_IOCB";
srb_t *sp;
struct bsg_job *bsg_job;
struct fc_bsg_reply *bsg_reply;
@@ -2213,7 +2211,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
struct qla_mt_iocb_rsp_fx00 fstatus;
uint8_t *fw_sts_ptr;

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (!sp)
return;

@@ -2687,14 +2685,13 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
{
srb_t *sp;
struct qla_hw_data *ha = vha->hw;
- const char func[] = "ERROR-IOCB";
uint16_t que = 0;
struct req_que *req = NULL;
int res = DID_ERROR << 16;

req = ha->req_q_map[que];

- sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+ sp = qla2x00_get_sp_from_handle(vha, req, pkt);
if (sp) {
sp->done(sp, res);
return;
--
2.16.4

2020-08-27 10:02:26

by Daniel Wagner

[permalink] [raw]
Subject: [PATCH 2/4] qla2xxx: Simplify return value logic in qla2x00_get_sp_from_handle()

Refactor qla2x00_get_sp_from_handle() to avoid the unecessary
goto if early returns are used. With this we can also avoid
preinitilzing the sp pointer.

Signed-off-by: Daniel Wagner <[email protected]>
---
drivers/scsi/qla2xxx/qla_isr.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 27bcd346af7c..5d278155e4e7 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1716,7 +1716,7 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
{
struct qla_hw_data *ha = vha->hw;
sts_entry_t *pkt = iocb;
- srb_t *sp = NULL;
+ srb_t *sp;
uint16_t index;

index = LSW(pkt->handle);
@@ -1728,13 +1728,13 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
else
set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
- goto done;
+ return NULL;
}
sp = req->outstanding_cmds[index];
if (!sp) {
ql_log(ql_log_warn, vha, 0x5032,
"Invalid completion handle (%x) -- timed-out.\n", index);
- return sp;
+ return NULL;
}
if (sp->handle != index) {
ql_log(ql_log_warn, vha, 0x5033,
@@ -1743,8 +1743,6 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
}

req->outstanding_cmds[index] = NULL;
-
-done:
return sp;
}

--
2.16.4