Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965096AbVKGVbs (ORCPT ); Mon, 7 Nov 2005 16:31:48 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S965151AbVKGVbs (ORCPT ); Mon, 7 Nov 2005 16:31:48 -0500 Received: from e6.ny.us.ibm.com ([32.97.182.146]:26317 "EHLO e6.ny.us.ibm.com") by vger.kernel.org with ESMTP id S965096AbVKGVbq (ORCPT ); Mon, 7 Nov 2005 16:31:46 -0500 Date: Mon, 7 Nov 2005 15:31:40 -0600 To: Greg KH Cc: linux-kernel@vger.kernel.org, bluesmoke-devel@lists.sourceforge.net, Paul Mackerras , linuxppc64-dev@ozlabs.org, linux-pci@atrey.karlin.mff.cuni.cz Subject: [PATCH 3/7]: Revised [PATCH 28/42]: SCSI: add PCI error recovery to Symbios dev driver Message-ID: <20051107213139.GJ19593@austin.ibm.com> References: <20051103235918.GA25616@mail.gnucash.org> <20051104005035.GA26929@mail.gnucash.org> <20051105061114.GA27016@kroah.com> <17262.37107.857718.184055@cargo.ozlabs.ibm.com> <20051107175541.GB19593@austin.ibm.com> <20051107182727.GD18861@kroah.com> <20051107195727.GF19593@austin.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20051107195727.GF19593@austin.ibm.com> User-Agent: Mutt/1.5.6+20040907i From: linas Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7206 Lines: 236 On Mon, Nov 07, 2005 at 01:57:27PM -0600, linas was heard to remark: > On Mon, Nov 07, 2005 at 10:27:27AM -0800, Greg KH was heard to remark: > > 3) realy strong typing that sparse can detect. Various PCI bus errors can be signaled by newer PCI controllers. This patch adds the PCI error recovery callbacks to the Symbios SCSI device driver. The patch has been tested, and appears to work well. Signed-off-by: Linas Vepstas -- Index: linux-2.6.14-mm1/drivers/scsi/sym53c8xx_2/sym_glue.c =================================================================== --- linux-2.6.14-mm1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2005-11-07 13:55:26.839081234 -0600 +++ linux-2.6.14-mm1/drivers/scsi/sym53c8xx_2/sym_glue.c 2005-11-07 15:02:08.152337375 -0600 @@ -686,6 +686,10 @@ if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); + /* Avoid spinloop trying to handle interrupts on frozen device */ + if (np->s.io_state != pci_channel_io_normal) + return IRQ_HANDLED; + spin_lock_irqsave(np->s.host->host_lock, flags); sym_interrupt(np); spin_unlock_irqrestore(np->s.host->host_lock, flags); @@ -759,6 +763,25 @@ */ static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); } +static void sym_eeh_timeout(u_long p) +{ + struct sym_eh_wait *ep = (struct sym_eh_wait *) p; + if (!ep) + return; + complete(&ep->done); +} + +static void sym_eeh_done(struct sym_eh_wait *ep) +{ + if (!ep) + return; + ep->timed_out = 0; + if (!del_timer(&ep->timer)) + return; + + complete(&ep->done); +} + /* * Generic method for our eh processing. * The 'op' argument tells what we have to do. @@ -799,6 +822,35 @@ /* Try to proceed the operation we have been asked for */ sts = -1; + + /* We may be in an error condition because the PCI bus + * went down. In this case, we need to wait until the + * PCI bus is reset, the card is reset, and only then + * proceed with the scsi error recovery. We'll wait + * for 15 seconds for this to happen. + */ +#define WAIT_FOR_PCI_RECOVERY 15 + if (np->s.io_state != pci_channel_io_normal) { + struct sym_eh_wait eeh, *eep = &eeh; + np->s.io_reset_wait = eep; + init_completion(&eep->done); + init_timer(&eep->timer); + eep->to_do = SYM_EH_DO_WAIT; + eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ); + eep->timer.function = sym_eeh_timeout; + eep->timer.data = (u_long)eep; + eep->timed_out = 1; /* Be pessimistic for once :) */ + add_timer(&eep->timer); + spin_unlock_irq(np->s.host->host_lock); + wait_for_completion(&eep->done); + spin_lock_irq(np->s.host->host_lock); + if (eep->timed_out) { + printk (KERN_ERR "%s: Timed out waiting for PCI reset\n", + sym_name(np)); + } + np->s.io_reset_wait = NULL; + } + switch(op) { case SYM_EH_ABORT: sts = sym_abort_scsiio(np, cmd, 1); @@ -1584,6 +1636,8 @@ np->maxoffs = dev->chip.offset_max; np->maxburst = dev->chip.burst_max; np->myaddr = dev->host_id; + np->s.io_state = pci_channel_io_normal; + np->s.io_reset_wait = NULL; /* * Edit its name. @@ -1916,6 +1970,58 @@ return 1; } +/* ------------- PCI Error Recovery infrastructure -------------- */ +/** sym2_io_error_detected() is called when PCI error is detected */ +static pers_result_t sym2_io_error_detected (struct pci_dev *pdev, pci_channel_state_t state) +{ + struct sym_hcb *np = pci_get_drvdata(pdev); + + np->s.io_state = state; + // XXX If slot is permanently frozen, then what? + // Should we scsi_remove_host() maybe ?? + + /* Request a slot slot reset. */ + return PERS_RESULT_NEED_RESET; +} + +/** sym2_io_slot_reset is called when the pci bus has been reset. + * Restart the card from scratch. */ +static pers_result_t sym2_io_slot_reset (struct pci_dev *pdev) +{ + struct sym_hcb *np = pci_get_drvdata(pdev); + + printk (KERN_INFO "%s: recovering from a PCI slot reset\n", + sym_name(np)); + + if (pci_enable_device(pdev)) + printk (KERN_ERR "%s: device setup failed most egregiously\n", + sym_name(np)); + + pci_set_master(pdev); + enable_irq (pdev->irq); + + /* Perform host reset only on one instance of the card */ + if (0 == PCI_FUNC (pdev->devfn)) + sym_reset_scsi_bus(np, 0); + + return PERS_RESULT_RECOVERED; +} + +/** sym2_io_resume is called when the error recovery driver + * tells us that its OK to resume normal operation. + */ +static void sym2_io_resume (struct pci_dev *pdev) +{ + struct sym_hcb *np = pci_get_drvdata(pdev); + + /* Perform device startup only once for this card. */ + if (0 == PCI_FUNC (pdev->devfn)) + sym_start_up (np, 1); + + np->s.io_state = pci_channel_io_normal; + sym_eeh_done (np->s.io_reset_wait); +} + /* * Driver host template. */ @@ -2169,11 +2275,18 @@ MODULE_DEVICE_TABLE(pci, sym2_id_table); +static struct pci_error_handlers sym2_err_handler = { + .error_detected = sym2_io_error_detected, + .slot_reset = sym2_io_slot_reset, + .resume = sym2_io_resume, +}; + static struct pci_driver sym2_driver = { .name = NAME53C8XX, .id_table = sym2_id_table, .probe = sym2_probe, .remove = __devexit_p(sym2_remove), + .err_handler = &sym2_err_handler, }; static int __init sym2_init(void) Index: linux-2.6.14-mm1/drivers/scsi/sym53c8xx_2/sym_glue.h =================================================================== --- linux-2.6.14-mm1.orig/drivers/scsi/sym53c8xx_2/sym_glue.h 2005-11-07 13:55:26.839081234 -0600 +++ linux-2.6.14-mm1/drivers/scsi/sym53c8xx_2/sym_glue.h 2005-11-07 15:02:08.154337094 -0600 @@ -181,6 +181,10 @@ char chip_name[8]; struct pci_dev *device; + /* pci bus i/o state; waiter for clearing of i/o state */ + pci_channel_state_t io_state; + struct sym_eh_wait *io_reset_wait; + struct Scsi_Host *host; void __iomem * ioaddr; /* MMIO kernel io address */ Index: linux-2.6.14-mm1/drivers/scsi/sym53c8xx_2/sym_hipd.c =================================================================== --- linux-2.6.14-mm1.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2005-11-07 13:55:26.840081093 -0600 +++ linux-2.6.14-mm1/drivers/scsi/sym53c8xx_2/sym_hipd.c 2005-11-07 15:02:08.162335970 -0600 @@ -2810,6 +2810,7 @@ u_char istat, istatc; u_char dstat; u_short sist; + u_int icnt; /* * interrupt on the fly ? @@ -2851,6 +2852,7 @@ sist = 0; dstat = 0; istatc = istat; + icnt = 0; do { if (istatc & SIP) sist |= INW(np, nc_sist); @@ -2858,6 +2860,19 @@ dstat |= INB(np, nc_dstat); istatc = INB(np, nc_istat); istat |= istatc; + + /* Prevent deadlock waiting on a condition that may never clear. */ + /* XXX this is a temporary kludge; the correct to detect + * a PCI bus error would be to use the io_check interfaces + * proposed by Hidetoshi Seto + * Problem with polling like that is the state flag might not + * be set. + */ + icnt ++; + if (100 < icnt) { + if (np->s.device->error_state != pci_channel_io_normal) + return; + } } while (istatc & (SIP|DIP)); if (DEBUG_FLAGS & DEBUG_TINY) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/