LinuxLists.cc - [RFC/PATCH, 2/4] readX_check() performance evaluation

2004-01-28 01:58:34

Subject: [RFC/PATCH, 2/4] readX_check() performance evaluation

This is a readX_check() prototype patch to evaluate
the performance disadvantage.

Thanks,
Hironobu Ishii
--------------------
diff -prN linux-2.6.1/drivers/message/fusion/Makefile linux-2.6.1.modified/drivers/message/fusion/Makefile
*** linux-2.6.1/drivers/message/fusion/Makefile 2004-01-09 15:59:45.000000000 +0900
--- linux-2.6.1.modified/drivers/message/fusion/Makefile 2004-01-26 20:25:21.691512363 +0900
***************
*** 15,20 ****
--- 15,23 ----

EXTRA_CFLAGS += ${MPT_CFLAGS}

+ # for PCI_RECOVERY
+ EXTRA_CFLAGS += -DCONFIG_PCI_RECOVERY
+
# Fusion MPT drivers; recognized debug defines...
# MPT general:
#EXTRA_CFLAGS += -DDEBUG
*************** obj-$(CONFIG_FUSION) += mptbase.o mptsc
*** 50,52 ****
--- 53,60 ----
obj-$(CONFIG_FUSION_ISENSE) += isense.o
obj-$(CONFIG_FUSION_CTL) += mptctl.o
obj-$(CONFIG_FUSION_LAN) += mptlan.o
+
+ # for PCI error recovery
+ mptbase-objs := mptbase_main.o read_check.o
+
+
diff -prN linux-2.6.1/drivers/message/fusion/mptbase.c linux-2.6.1.modified/drivers/message/fusion/mptbase.c
*** linux-2.6.1/drivers/message/fusion/mptbase.c 2004-01-09 15:59:18.000000000 +0900
--- linux-2.6.1.modified/drivers/message/fusion/mptbase.c 2004-01-26 22:29:18.185561891 +0900
*************** struct _mpt_ioc_proc_list {
*** 260,265 ****
--- 260,305 ----

#endif

+
+ #ifdef CONFIG_PCI_RECOVERY
+ /*
+ * Try to recover from PCI intermittent read errors
+ */
+ #define REG_READ_SUCCESS 0
+
+ #define REG_READ_OK 0
+ #define REG_READ_RETRY_OK 1
+ #define REG_READ_NG -1
+
+ #define REG_READ_RETRY 5 /* retry limit */
+
+ extern inline int readl_check(volatile u32 *d, volatile u32 *a);
+
+ static inline int do_readl(volatile u32 *d, volatile u32 *a)
+ {
+ int retry ;
+ int fail = 0;
+
+ for (retry = REG_READ_RETRY; retry; retry--) {
+ if (readl_check(d, a) == REG_READ_SUCCESS) {
+ return (fail ? REG_READ_RETRY_OK : REG_READ_OK);
+ }
+ fail++;
+ }
+ /* retry out */
+ /* PCI reset? */
+ return REG_READ_NG;
+ }
+
+ static inline int CHIPREG_READ32(volatile u32 *d, volatile u32 *a)
+ {
+ if (PortIo) {
+ *d =inl((unsigned long)a);
+ return REG_READ_OK;
+ } else
+ return do_readl(d, a);
+ }
+ #else
/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
/* 20000207 -sralston
* GRRRRR... IOSpace (port i/o) register access (for the 909) is back!
*************** static inline u32 CHIPREG_READ32(volatil
*** 274,279 ****
--- 314,320 ----
else
return readl(a);
}
+ #endif

static inline void CHIPREG_WRITE32(volatile u32 *a, u32 v)
{
*************** mpt_interrupt(int irq, void *bus_id, str
*** 352,360 ****
*/
while (1) {

if ((pa = CHIPREG_READ32(&ioc->chip->ReplyFifo)) == 0xFFFFFFFF)
return IRQ_HANDLED;
!
cb_idx = 0;
freeme = 0;

--- 393,413 ----
*/
while (1) {

+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&pa, &ioc->chip->ReplyFifo);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ if (pa == 0xFFFFFFFF)
+ return IRQ_HANDLED;
+ }
+ #else
if ((pa = CHIPREG_READ32(&ioc->chip->ReplyFifo)) == 0xFFFFFFFF)
return IRQ_HANDLED;
! #endif
cb_idx = 0;
freeme = 0;

*************** mpt_send_handshake_request(int handle, i
*** 1066,1073 ****
--- 1119,1140 ----
}

/* Read doorbell and check for active bit */
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ u32 dummy;
+ read_fail = CHIPREG_READ32(&dummy, &iocp->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ if (!(dummy & MPI_DOORBELL_ACTIVE))
+ return -5;
+ }
+ #else
if (!(CHIPREG_READ32(&iocp->chip->Doorbell) & MPI_DOORBELL_ACTIVE))
return -5;
+ #endif

dhsprintk((KERN_INFO MYNAM ": %s: mpt_send_handshake_request start, WaitCnt=%d\n",
iocp->name, ii));
*************** mpt_GetIocState(MPT_ADAPTER *ioc, int co
*** 2170,2176 ****
--- 2237,2255 ----
u32 s, sc;

/* Get! */
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&s, &ioc->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
s = CHIPREG_READ32(&ioc->chip->Doorbell);
+ #endif
+
// dprintk((MYIOC_s_INFO_FMT "raw state = %08x\n", ioc->name, s));
sc = s & MPI_IOC_STATE_MASK;

*************** mpt_downloadboot(MPT_ADAPTER *ioc, int s
*** 2871,2879 ****
--- 2950,2981 ----
ddlprintk((MYIOC_s_INFO_FMT "DbGb0: downloadboot entered.\n",
ioc->name));
#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail ;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery codes */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail ;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
ddlprintk((MYIOC_s_INFO_FMT "DbGb1: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
#endif
*************** mpt_downloadboot(MPT_ADAPTER *ioc, int s
*** 2903,2909 ****
--- 3005,3023 ----
/* Write magic sequence to WriteSequence register
* until enter diagnostic mode
*/
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
while ((diag0val & MPI_DIAG_DRWE) == 0) {
CHIPREG_WRITE32(&ioc->chip->WriteSequence, 0xFF);
CHIPREG_WRITE32(&ioc->chip->WriteSequence, MPI_WRSEQ_1ST_KEY_VALUE);
*************** mpt_downloadboot(MPT_ADAPTER *ioc, int s
*** 2928,2937 ****
--- 3042,3075 ----

}

+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail ;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
+
ddlprintk((MYIOC_s_INFO_FMT "DbGb3: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
#endif
*************** mpt_downloadboot(MPT_ADAPTER *ioc, int s
*** 2944,2951 ****
--- 3082,3100 ----
CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val);

#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif

ddlprintk((MYIOC_s_INFO_FMT "DbGb3: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
*************** mpt_downloadboot(MPT_ADAPTER *ioc, int s
*** 3097,3103 ****
--- 3246,3265 ----
CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwData, diagRwData);

/* clear the RW enable and DISARM bits */
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail ;
+
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
diag0val &= ~(MPI_DIAG_DISABLE_ARM | MPI_DIAG_RW_ENABLE | MPI_DIAG_FLASH_BAD_SIG);
CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val);

*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3215,3225 ****
--- 3377,3410 ----
CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);

/* Use "Diagnostic reset" method! (only thing available!) */
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif

#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
+
dprintk((MYIOC_s_INFO_FMT "DbG1: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
#endif
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3255,3269 ****
--- 3440,3477 ----

}

+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif

dprintk((MYIOC_s_INFO_FMT "Wrote magic DiagWriteEn sequence (%x)\n",
ioc->name, diag0val));
}

#ifdef MPT_DEBUG
+ #ifndef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
+
dprintk((MYIOC_s_INFO_FMT "DbG2: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
#endif
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3320,3329 ****
--- 3528,3561 ----
* case. _diag_reset will return < 0
*/
for (count = 0; count < 30; count ++) {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
+
dprintk((MYIOC_s_INFO_FMT
"DbG2b: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3353,3359 ****
--- 3585,3603 ----
* with calling program.
*/
for (count = 0; count < 30; count ++) {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&doorbell, &ioc->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
doorbell = CHIPREG_READ32(&ioc->chip->Doorbell);
+ #endif
+
doorbell &= MPI_IOC_STATE_MASK;

if (doorbell == MPI_IOC_STATE_READY) {
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3371,3380 ****
--- 3615,3648 ----
}
}

+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->alt_ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
+
dprintk((MYIOC_s_INFO_FMT "DbG3: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
#endif
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3382,3388 ****
--- 3650,3668 ----
/* Clear RESET_HISTORY bit! Place board in the
* diagnostic mode to update the diag register.
*/
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
count = 0;
while ((diag0val & MPI_DIAG_DRWE) == 0) {
/* Write magic sequence to WriteSequence register
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3408,3418 ****
--- 3688,3722 ----
ioc->name, diag0val);
break;
}
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
}
diag0val &= ~MPI_DIAG_RESET_HISTORY;
CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val);
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
if (diag0val & MPI_DIAG_RESET_HISTORY) {
printk(MYIOC_s_WARN_FMT "ResetHistory bit failed to clear!\n",
ioc->name);
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3424,3430 ****
--- 3728,3746 ----

/* Check FW reload status flags.
*/
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag0val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+ #endif
+
if (diag0val & (MPI_DIAG_FLASH_BAD_SIG | MPI_DIAG_RESET_ADAPTER | MPI_DIAG_DISABLE_ARM)) {
printk(MYIOC_s_ERR_FMT "Diagnostic reset FAILED! (%02xh)\n",
ioc->name, diag0val);
*************** mpt_diag_reset(MPT_ADAPTER *ioc, int ign
*** 3432,3439 ****
--- 3748,3767 ----
}

#ifdef MPT_DEBUG
+ #ifdef CONFIG_PCI_RECOVERY
+ if (ioc->alt_ioc) {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&diag1val, &ioc->chip->Diagnostic);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
if (ioc->alt_ioc)
diag1val = CHIPREG_READ32(&ioc->alt_ioc->chip->Diagnostic);
+ #endif
+
dprintk((MYIOC_s_INFO_FMT "DbG4: diag0=%08x, diag1=%08x\n",
ioc->name, diag0val, diag1val));
#endif
*************** mpt_handshake_req_reply_wait(MPT_ADAPTER
*** 3743,3750 ****
--- 4071,4092 ----
ioc->name, t, failcnt ? " - MISSING DOORBELL HANDSHAKE!" : ""));

/* Read doorbell and check for active bit */
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ u32 dummy;
+ read_fail = CHIPREG_READ32(&dummy, &ioc->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ if (!(dummy & MPI_DOORBELL_ACTIVE))
+ return -1;
+ }
+ #else
if (!(CHIPREG_READ32(&ioc->chip->Doorbell) & MPI_DOORBELL_ACTIVE))
return -1;
+ #endif

/*
* Clear doorbell int (WRITE 0 to IntStatus reg),
*************** WaitForDoorbellAck(MPT_ADAPTER *ioc, int
*** 3822,3828 ****
--- 4164,4182 ----

if (sleepFlag == CAN_SLEEP) {
while (--cntdn) {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&intstat, &ioc->chip->IntStatus);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
intstat = CHIPREG_READ32(&ioc->chip->IntStatus);
+ #endif
+
if (! (intstat & MPI_HIS_IOP_DOORBELL_STATUS))
break;
set_current_state(TASK_INTERRUPTIBLE);
*************** WaitForDoorbellAck(MPT_ADAPTER *ioc, int
*** 3831,3837 ****
--- 4185,4203 ----
}
} else {
while (--cntdn) {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&intstat, &ioc->chip->IntStatus);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
intstat = CHIPREG_READ32(&ioc->chip->IntStatus);
+ #endif
+
if (! (intstat & MPI_HIS_IOP_DOORBELL_STATUS))
break;
mdelay (1);
*************** WaitForDoorbellInt(MPT_ADAPTER *ioc, int
*** 3872,3878 ****
--- 4238,4256 ----
cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * howlong;
if (sleepFlag == CAN_SLEEP) {
while (--cntdn) {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&intstat, &ioc->chip->IntStatus);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
intstat = CHIPREG_READ32(&ioc->chip->IntStatus);
+ #endif
+
if (intstat & MPI_HIS_DOORBELL_INTERRUPT)
break;
set_current_state(TASK_INTERRUPTIBLE);
*************** WaitForDoorbellInt(MPT_ADAPTER *ioc, int
*** 3881,3887 ****
--- 4259,4277 ----
}
} else {
while (--cntdn) {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ read_fail = CHIPREG_READ32(&intstat, &ioc->chip->IntStatus);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
intstat = CHIPREG_READ32(&ioc->chip->IntStatus);
+ #endif
+
if (intstat & MPI_HIS_DOORBELL_INTERRUPT)
break;
mdelay(1);
*************** WaitForDoorbellReply(MPT_ADAPTER *ioc, i
*** 3932,3943 ****
--- 4322,4361 ----
if ((t = WaitForDoorbellInt(ioc, howlong, sleepFlag)) < 0) {
failcnt++;
} else {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ u32 dummy;
+ read_fail = CHIPREG_READ32(&dummy, &ioc->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ hs_reply[u16cnt++] = le16_to_cpu(dummy & 0x0000FFFF);
+ }
+ #else
hs_reply[u16cnt++] = le16_to_cpu(CHIPREG_READ32(&ioc->chip->Doorbell) & 0x0000FFFF);
+ #endif
+
CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
if ((t = WaitForDoorbellInt(ioc, 2, sleepFlag)) < 0)
failcnt++;
else {
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ u32 dummy;
+ read_fail = CHIPREG_READ32(&dummy, &ioc->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ hs_reply[u16cnt++] = le16_to_cpu(dummy & 0x0000FFFF);
+ }
+ #else
hs_reply[u16cnt++] = le16_to_cpu(CHIPREG_READ32(&ioc->chip->Doorbell) & 0x0000FFFF);
+ #endif
+
CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
}
}
*************** WaitForDoorbellReply(MPT_ADAPTER *ioc, i
*** 3953,3959 ****
--- 4371,4391 ----
for (u16cnt=2; !failcnt && u16cnt < (2 * mptReply->MsgLength); u16cnt++) {
if ((t = WaitForDoorbellInt(ioc, 2, sleepFlag)) < 0)
failcnt++;
+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ u32 dummy;
+ read_fail = CHIPREG_READ32(&dummy, &ioc->chip->Doorbell);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ hword = le16_to_cpu(dummy & 0x0000FFFF);
+ }
+ #else
hword = le16_to_cpu(CHIPREG_READ32(&ioc->chip->Doorbell) & 0x0000FFFF);
+ #endif
+
/* don't overflow our IOC hs_reply[] buffer! */
if (u16cnt < sizeof(ioc->hs_reply) / sizeof(ioc->hs_reply[0]))
hs_reply[u16cnt] = hword;
*************** fusion_init(void)
*** 5896,5901 ****
--- 6328,6339 ----
show_mptmod_ver(my_NAME, my_VERSION);
printk(KERN_INFO COPYRIGHT "\n");

+ #ifdef CONFIG_PCI_RECOVERY
+ printk(KERN_INFO "Enable PCI PIO read error recovery\n");
+ #else
+ printk(KERN_INFO "Disable PCI PIO read error recovery\n");
+ #endif
+
Q_INIT(&MptAdapters, MPT_ADAPTER); /* set to empty */
for (i = 0; i < MPT_MAX_PROTOCOL_DRIVERS; i++) {
MptCallbacks[i] = NULL;
*************** fusion_exit(void)
*** 5962,5968 ****
--- 6400,6418 ----
/* Clear any lingering interrupt */
CHIPREG_WRITE32(&this->chip->IntStatus, 0);

+ #ifdef CONFIG_PCI_RECOVERY
+ {
+ int read_fail;
+ u32 dummy;
+ read_fail = CHIPREG_READ32(&dummy, &this->chip->IntStatus);
+ if (read_fail) {
+ printk("PCI PIO read error:%d\n", read_fail);
+ /* recovery code */
+ }
+ }
+ #else
CHIPREG_READ32(&this->chip->IntStatus);
+ #endif

Q_DEL_ITEM(this);
mpt_adapter_dispose(this);
diff -prN linux-2.6.1/drivers/message/fusion/mptbase.h linux-2.6.1.modified/drivers/message/fusion/mptbase.h
*** linux-2.6.1/drivers/message/fusion/mptbase.h 2004-01-09 15:59:10.000000000 +0900
--- linux-2.6.1.modified/drivers/message/fusion/mptbase.h 2004-01-26 16:41:43.394801737 +0900
***************
*** 80,87 ****
#define COPYRIGHT "Copyright (c) 1999-2003 " MODULEAUTHOR
#endif

! #define MPT_LINUX_VERSION_COMMON "2.05.00.05"
! #define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-2.05.00.05"
#define WHAT_MAGIC_STRING "@" "(" "#" ")"

#define show_mptmod_ver(s,ver) \
--- 80,87 ----
#define COPYRIGHT "Copyright (c) 1999-2003 " MODULEAUTHOR
#endif

! #define MPT_LINUX_VERSION_COMMON "2.05.00.05PCI"
! #define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-2.05.00.05PCI"
#define WHAT_MAGIC_STRING "@" "(" "#" ")"

#define show_mptmod_ver(s,ver) \
diff -prN linux-2.6.1/drivers/message/fusion/read_check.S linux-2.6.1.modified/drivers/message/fusion/read_check.S
*** linux-2.6.1/drivers/message/fusion/read_check.S 1970-01-01 09:00:00.000000000 +0900
--- linux-2.6.1.modified/drivers/message/fusion/read_check.S 2004-01-26 22:19:20.426780151 +0900
***************
*** 0 ****
--- 1,69 ----
+ #define ENTRY(name) \
+ .align 32; \
+ .global name; \
+ .proc name; \
+ name:
+ #define END(name) \
+ .endp name
+
+ /****************************************************************************/
+ .rodata
+ .align 8
+ .global readchk_addrs
+ readchk_addrs:
+ data8 .readl_start#
+ data8 .readl_end#
+
+ /* This should be per CPU data */
+ .global readl_mca#
+ .sbss
+ .align 4
+ .type readl_mca#,@object
+ .size readl_mca#,4
+ readl_mca:
+ .skip 4
+ /*
+ *
+ * int readl_check( __u32 *valp, __u32 *addr ) ;
+ * in0, in1
+ */
+ .text
+ .align 16
+ ENTRY(readl_check)
+ .prologue
+ .body
+ {.mmi addl r15 = @ltoffx(readl_mca#), r1
+ ;;
+ ld8.mov r14 = [r15], readl_mca# /* r14 = &readl_mca */
+ nop.i 0
+ ;;
+ }
+ .readl_start:
+ {.mmi ld4.acq r15 = [r14] /* r15 = readl_mca */
+ ld4 r33 = [r33] /* r33 = *addr(r33) */
+ nop.i 0
+ ;;
+ }
+ {.mmi add r16 =1, r33 /* consume r33 */
+ nop.m 0
+ nop.i 0
+ ;;
+ }
+ .readl_end:
+ {.mmi ld4.acq r14 = [r14] /* r14 = readl_mca */
+ ;;
+ cmp4.ne p6, p7 = r14, r15 /* old readl_mca == readl_mca? */
+ nop.i 0
+ ;;
+ }
+ {.mmi (p7) st4 [r32] = r33 /* if (old==new) *valp(r32) = r33 */
+ (p7) mov r8 = r0 /* if (old==new) ret = 0 */
+ nop.i 0
+ }
+ {.mmb (p6) addl r8 = 1, r0 /* if (old!=new) ret = 1 */
+ nop.m 0
+ br.ret.sptk.many rp
+ ;;
+ }
+
+ END(readl_check)

2004-01-28 02:55:34

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Hironobu Ishii wrote:
>
> This is a readX_check() prototype patch to evaluate
> the performance disadvantage.

Quite frankly, I'd much rather have something more like this:

clear_pcix_errors(dev);
..
x = readX_check(dev, offset); /* Maybe several ones, maybe in a loop */
..
error = read_pcix_errors(dev);
if (error)
take_pcix_offline(dev);

in other words, I'd rather _not_ see the "readX_check()" code itself have
the retry logic and error value handling.

Why? Because on a number of architectures it is entirely possible that the
error comes as a _asynchronous_ machine exception or similar. So I'd much
rather have the interfaces be designed for that. Also, it's likely to
perform a lot better, and result in much clearer code this way (ie you can
try to set up the whole command before reading the error just once).

It is _also_ going to be a hell of a lot easier to disable the code if you
want to, with just a

#ifndef CONFIG_PCI_RECOVERY
#define clear_pcix_errors(dev) do { } while (0)
#define read_pcix_errors(dev) (0)
#define take_pcix_offline(dev) do { } while (0)
#endif

in a header file for architectures that don't support it.

Does anybody see any downsides to something like this?

Linus

2004-01-28 03:10:08

by Matthew Wilcox

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, Jan 28, 2004 at 10:54:37AM +0900, Hironobu Ishii wrote:
> This is a readX_check() prototype patch to evaluate
> the performance disadvantage.

I think you've just demonstrated why this type of interface is unacceptable:

> + #ifdef CONFIG_PCI_RECOVERY
> + {
> + int read_fail;
> + read_fail = CHIPREG_READ32(&pa, &ioc->chip->ReplyFifo);
> + if (read_fail) {
> + printk("PCI PIO read error:%d\n", read_fail);
> + /* recovery code */
> + }
> + if (pa == 0xFFFFFFFF)
> + return IRQ_HANDLED;
> + }
> + #else
> if ((pa = CHIPREG_READ32(&ioc->chip->ReplyFifo)) == 0xFFFFFFFF)
> return IRQ_HANDLED;
> ! #endif

We go from two easily understood lines to ten plus the recovery code.
If indeed recovery is even possible. An exception framework is clearly
the way to do this.

--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain

2004-01-28 04:50:38

by Paul Mackerras

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

Linus Torvalds writes:

> Does anybody see any downsides to something like this?

Looks OK to me.

On pSeries (ppc64) machines, we don't get an asynchronous machine
check, but instead the read will return all 1s, and the system will
isolate the slot and arrange that all further reads return all 1s.
If you get all 1s back on a read, you are supposed to do a firmware
call to find out if there was actually an error.

With your design, I would make readX_check set a bit somewhere
(associated with the dev argument) if it saw all 1s, and then make
read_pcix_errors do the firmware call if the bit is set.

The only thing to be careful of is that drivers cope correctly with an
all-1s value returned. E.g. they shouldn't do:

while (readb_check(dev, offset) & BUSY)
udelay(1);

But of course they shouldn't do that anyway. :)

Paul.

2004-01-28 08:58:35

by Russell King

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Tue, Jan 27, 2004 at 06:55:17PM -0800, Linus Torvalds wrote:
> Does anybody see any downsides to something like this?

What if the failing PCI access happened in an interrupt routine?
(I'm thinking of the situation where you may need to read the PCI
status registers to find out whether an error occurred.)

Also, for that matter, what if a network device receives an abort
while performing BM-DMA?

Do we even care about either of these two scenarios?

--
Russell King
Linux kernel 2.6 ARM Linux - http://www.arm.linux.org.uk/
maintainer of: 2.6 PCMCIA - http://pcmcia.arm.linux.org.uk/
2.6 Serial core

2004-01-28 16:15:46

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Russell King wrote:
>
> What if the failing PCI access happened in an interrupt routine?
> (I'm thinking of the situation where you may need to read the PCI
> status registers to find out whether an error occurred.)
>
> Also, for that matter, what if a network device receives an abort
> while performing BM-DMA?
>
> Do we even care about either of these two scenarios?

We do, and the people who care about readX_check() had better be careful.
Quite possibly the "clear_pcix_error()" has to get a lock and disable
interrupts, and the "read_pcix_error()" routine would release the lock.
But that depends on the hardware - details like whether hardware can track
individual errors on multiple CPU's or not.

And keep in mind, 99% of all people won't ever care.

Linus

2004-01-28 17:00:35

by Grant Grundler

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, Jan 28, 2004 at 08:58:25AM +0000, Russell King wrote:
> On Tue, Jan 27, 2004 at 06:55:17PM -0800, Linus Torvalds wrote:
> > Does anybody see any downsides to something like this?
>
> What if the failing PCI access happened in an interrupt routine?
> (I'm thinking of the situation where you may need to read the PCI
> status registers to find out whether an error occurred.)

The driver needs to be able to clean up in any context.
That's why I'm advocating what willy called an "exception framework".

While I like linus' suggestion is better than the original,
it spreads the driver error recovery code throughout the driver.
That upside is it can handle every situation.
The downside is numerous error paths makes the regular code alot
harder to read and maintain.

> Also, for that matter, what if a network device receives an abort
> while performing BM-DMA?

The next PIO read will see the error caused by BM-DMA.

> Do we even care about either of these two scenarios?

yes. IO Error recovery has to deal with every scenario.

grant

2004-01-28 18:20:59

by Matthew Wilcox

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Tue, Jan 27, 2004 at 06:55:17PM -0800, Linus Torvalds wrote:
> Quite frankly, I'd much rather have something more like this:
>
> clear_pcix_errors(dev);
> ..
> x = readX_check(dev, offset); /* Maybe several ones, maybe in a loop */
> ..
> error = read_pcix_errors(dev);
> if (error)
> take_pcix_offline(dev);
>
> in other words, I'd rather _not_ see the "readX_check()" code itself have
> the retry logic and error value handling.
>
> Why? Because on a number of architectures it is entirely possible that the
> error comes as a _asynchronous_ machine exception or similar. So I'd much
> rather have the interfaces be designed for that. Also, it's likely to
> perform a lot better, and result in much clearer code this way (ie you can
> try to set up the whole command before reading the error just once).

Well, read() is a bad example for that -- errors are always going
to come back straight away for a read. write() is a better example.
I'd really like to hear from someone who's done this kind of thing before.
Are there any actions worth taking when an error occurs *other* than
taking the card offline and notifying the user?

If there are, Linus' interface is probably the best one. If not, we could
simply have readX_check() / writeX_check() call dev->driver->unregister()
if they notice an error has occurred and then the driver doesn't even
need to call read_pcix_errors().

--
"Next the statesmen will invent cheap lies, putting the blame upon
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince
himself that the war is just, and will thank God for the better sleep
he enjoys after this process of grotesque self-deception." -- Mark Twain

2004-01-28 19:19:50

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 18:20:03 +0000
Matthew Wilcox <[email protected]> wrote:

> If there are, Linus' interface is probably the best one. If not, we could
> simply have readX_check() / writeX_check() call dev->driver->unregister()
> if they notice an error has occurred and then the driver doesn't even
> need to call read_pcix_errors().

It just won't really work for platforms with inexact MCEs for IO errors.
And even for those with exact MCEs it would probably be a nightmare
to implement (writing MCE handlers is extremly hard because you cannot
rely on any locking guarantees - even a printk can randomly deadlock)

For those the per pci_dev callback is the only realistic way.

-Andi

2004-01-28 19:41:23

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 11:33:33 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

> For example, if checking for an error involves actually reading a value
> from a bridge register, then that implies some _serious_ amount of
> serialization and external CPU stuff.

Which is _extremly_ hard to do from an MCE handler ...

(currently all our MCE handlers are buggy because they can deadlock on the
printk lock)

-Andi

2004-01-28 19:33:42

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Matthew Wilcox wrote:
>
> If there are, Linus' interface is probably the best one. If not, we could
> simply have readX_check() / writeX_check() call dev->driver->unregister()
> if they notice an error has occurred and then the driver doesn't even
> need to call read_pcix_errors().

What worries me is that not only can the errors be asynchronous, they may
well be relatively expensive to check for.

For example, if checking for an error involves actually reading a value
from a bridge register, then that implies some _serious_ amount of
serialization and external CPU stuff.

The advantage of having the "check for errors" be done later and
independently of the actual IO access itself is not only that I think it
makes the code look nicer - it also allows you to do the IO independently
of the check. Which potentially means that the CPU can burst out the
writes as a burst write, rather than doing them one at a time and then
doing a read of a bridge register in between that serializes everything.

Linus

2004-01-28 20:06:26

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Andi Kleen wrote:
>
> On Wed, 28 Jan 2004 11:33:33 -0800 (PST)
> Linus Torvalds <[email protected]> wrote:
>
> > For example, if checking for an error involves actually reading a value
> > from a bridge register, then that implies some _serious_ amount of
> > serialization and external CPU stuff.
>
> Which is _extremly_ hard to do from an MCE handler ...

So don't do it in the MCE handler.

Just set a flag aka "may need checking", and let the check be done by the
actual "read_pcix_error()" code.

Linus

2004-01-28 20:26:15

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation II

On Wed, 28 Jan 2004 21:15:54 +0100
Andi Kleen <[email protected]> wrote:

>
> >
> > Just set a flag aka "may need checking", and let the check be done by the
> > actual "read_pcix_error()" code.
>
> Where would you put the flag?
>
> Doing it global may give false errors for the wrong device with async MCEs
> and on SMP.
>
> For putting it into the pci_dev you need to take logs to walk the list.
> If you delay it to a softirq for safely getting the lock it would be set too late.
>
> Putting it into a different table indexed by pci index would be also racy
> with hotplug.

... to follow up myself ...

I suppose moving the pci_dev lists to RCU could make the flag in pci-dev work. But it would be still
a bit tricky with preemptive kernels.

-Andi

2004-01-28 20:16:03

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 12:06:12 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

>
>
> On Wed, 28 Jan 2004, Andi Kleen wrote:
> >
> > On Wed, 28 Jan 2004 11:33:33 -0800 (PST)
> > Linus Torvalds <[email protected]> wrote:
> >
> > > For example, if checking for an error involves actually reading a value
> > > from a bridge register, then that implies some _serious_ amount of
> > > serialization and external CPU stuff.
> >
> > Which is _extremly_ hard to do from an MCE handler ...
>
> So don't do it in the MCE handler.
>
> Just set a flag aka "may need checking", and let the check be done by the
> actual "read_pcix_error()" code.

Where would you put the flag?

Doing it global may give false errors for the wrong device with async MCEs
and on SMP.

For putting it into the pci_dev you need to take logs to walk the list.
If you delay it to a softirq for safely getting the lock it would be set too late.

Putting it into a different table indexed by pci index would be also racy
with hotplug.

-Andi

2004-01-28 20:29:11

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Andi Kleen wrote:
>
> Where would you put the flag?
>
> Doing it global may give false errors for the wrong device with async MCEs
> and on SMP.

That entirely depends on what the hardware supports. How much information
will you get about the error?

If the real error is on the bridge somewhere but you don't even know which
CPU did the access (and just "somebody" gets an MCE), just set a global
flag, and have "read_pcix_error()" check the bridge (since it doesn't need
to look anything up - it already knows the device).

And in that case then you need to take the proper locks (per-bridge, or
global, depending on just how much you care) in "clear_pcix_error()" and
release them in "read_pcix_error()".

Alternatively, if you get a lot of information at MCE time (CPU that did
the access + some device data), just queue up the information in a per-CPU
queue. You don't have to worry about overflow - you can just drop if if
you get many errors (or maybe maintain a count), since the only thing that
matters is "we got an error for this device" along with maybe some small
amount of info on what kind(s) of error(s).

Basically: it all boils down to what the hardware offers.

Linus

2004-01-28 20:31:05

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Linus Torvalds wrote:
>
> If the real error is on the bridge somewhere but you don't even know which
> CPU did the access (and just "somebody" gets an MCE), just set a global
> flag, and have "read_pcix_error()" check the bridge (since it doesn't need
> to look anything up - it already knows the device).
>
> And in that case then you need to take the proper locks (per-bridge, or
> global, depending on just how much you care) in "clear_pcix_error()" and
> release them in "read_pcix_error()".

Note, in case this wasn't clear already: in this case, the "MCE flag" is
just a lazy flag saying "you need to check more deeply". It wouldn't cause
false positives, simply because the _real_ check ends up being
"read_pcix_error()" actually reading the error status from the bridge or
the device.

It's just that 99% of the time, you don't want to do even that.

Linus

2004-01-28 21:10:44

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 12:28:56 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

>
> Alternatively, if you get a lot of information at MCE time (CPU that did
> the access + some device data), just queue up the information in a per-CPU
> queue. You don't have to worry about overflow - you can just drop if if

That assumes that the access happened with preempt off ?

That's fine if it's guaranteed that the MCE still happened inside
readl/writel. But if it's delayed longer for some reason there is no guarantee
that you can find back to the CPU that caused the fault.

Putting it into the pci_dev and using RCU would be probably better.

-Andi

2004-01-28 21:52:10

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 13:43:54 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

> Again, this is all depending on hardware implementation issues. It is
> entirely possible that "read_pcix_error()" has to do some kind of
> synchronization to make sure that any async operations have finished and
> all errors have been accounted for.

I have no idea how to do such an synchronization on i386/x86-64. E.g. Opteron
chipsets would likely support MCEs for bus aborts, but there is no way to
synchronize it for writes.

-Andi

2004-01-28 21:46:07

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Andi Kleen wrote:
>
> On Wed, 28 Jan 2004 12:28:56 -0800 (PST)
> Linus Torvalds <[email protected]> wrote:
>
> >
> > Alternatively, if you get a lot of information at MCE time (CPU that did
> > the access + some device data), just queue up the information in a per-CPU
> > queue. You don't have to worry about overflow - you can just drop if if
>
> That assumes that the access happened with preempt off ?

Yes. I assume you want _some_ locking anyway, at least within that
particular device driver (you don't want to have an irq handler touch the
device at the same time you are doing this thing), so any such spinlock
would have disabled preemption anyway.

> That's fine if it's guaranteed that the MCE still happened inside
> readl/writel. But if it's delayed longer for some reason there is no guarantee
> that you can find back to the CPU that caused the fault.

Again, this is all depending on hardware implementation issues. It is
entirely possible that "read_pcix_error()" has to do some kind of
synchronization to make sure that any async operations have finished and
all errors have been accounted for.

Again, this is the whole reason for doing the separate "clear/read"
phases in error handling - exactly because hardware implementation may
make error handling fairly heavy-weight, so you don't want to do it on a
per-IO basis, but rather on a "per transaction" basis.

Linus

2004-01-28 22:15:38

by David Miller

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 13:43:54 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

> On Wed, 28 Jan 2004, Andi Kleen wrote:
> >
> > On Wed, 28 Jan 2004 12:28:56 -0800 (PST)
> > Linus Torvalds <[email protected]> wrote:
> >
> > >
> > > Alternatively, if you get a lot of information at MCE time (CPU that did
> > > the access + some device data), just queue up the information in a per-CPU
> > > queue. You don't have to worry about overflow - you can just drop if if
> >
> > That assumes that the access happened with preempt off ?
>
> Yes. I assume you want _some_ locking anyway, at least within that
> particular device driver (you don't want to have an irq handler touch the
> device at the same time you are doing this thing), so any such spinlock
> would have disabled preemption anyway.

I am rather certain you are going to need to do a per-controller lock
the driver will need to grab during such sequences. It is the only way
I can see, if the state sits in some controller register or resetting
that status must be done in the controller, to keep two driver inits
or resets or whatever from bumping into each other.

2004-01-28 22:21:48

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Andi Kleen wrote:
>
> I have no idea how to do such an synchronization on i386/x86-64. E.g. Opteron
> chipsets would likely support MCEs for bus aborts, but there is no way to
> synchronize it for writes.

Doing a status read from the device should do it (just read the config
space, for example).

But remember: I suspect there are very _very_ few people who care at that
stage.

Linus

2004-01-28 22:45:00

by Andi Kleen

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004 14:21:40 -0800 (PST)
Linus Torvalds <[email protected]> wrote:

>
>
> On Wed, 28 Jan 2004, Andi Kleen wrote:
> >
> > I have no idea how to do such an synchronization on i386/x86-64. E.g. Opteron
> > chipsets would likely support MCEs for bus aborts, but there is no way to
> > synchronize it for writes.
>
> Doing a status read from the device should do it (just read the config
> space, for example).

The device is just not known. iirc you only get a bit in the bridge, which
leaves a wide choice. Ok, I guess you could read all config spaces in this
case

(I don't remember the ordering rules well enough - maybe one single config
space read is enough for all devices behind the bridge)

> But remember: I suspect there are very _very_ few people who care at that
> stage.

Driver writers caring would be a good start.

I definitely agree that it shouldn't be enabled on anything near a production kernel.

-Andi

2004-01-28 22:59:20

by Linus Torvalds

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

On Wed, 28 Jan 2004, Andi Kleen wrote:
> > Doing a status read from the device should do it (just read the config
> > space, for example).
>
> The device is just not known. iirc you only get a bit in the bridge, which
> leaves a wide choice.

read_pcix_error() _does_ know the device. The driver tells it.

Remember: none of this should be done at machine check time.

Linus

2004-01-29 12:25:11

by Hironobu Ishii

[permalink] [raw]

Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

From: "Linus Torvalds" <[email protected]>
To: "Andi Kleen" <[email protected]>
Cc: <[email protected]>; <[email protected]>; <[email protected]>; <[email protected]>
Sent: Thursday, January 29, 2004 7:59 AM
Subject: Re: [RFC/PATCH, 2/4] readX_check() performance evaluation

>
>
> On Wed, 28 Jan 2004, Andi Kleen wrote:
> > > Doing a status read from the device should do it (just read the config
> > > space, for example).
> >
> > The device is just not known. iirc you only get a bit in the bridge, which
> > leaves a wide choice.
>
> read_pcix_error() _does_ know the device. The driver tells it.
>
> Remember: none of this should be done at machine check time.
>
> Linus

Thank you for a lot of comments.
I prefer Linus's I/F than callback(exception) I/F,
because I can recover from intermittent errors.

I'd need time to consider how to map these I/F onto ia64 platform.
Later, I will post the result.

Thank you.
Hironobu Ishii