Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752181AbeAJVpu (ORCPT + 1 other); Wed, 10 Jan 2018 16:45:50 -0500 Received: from mail.kernel.org ([198.145.29.99]:33310 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750782AbeAJVpt (ORCPT ); Wed, 10 Jan 2018 16:45:49 -0500 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org E047620C51 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=kernel.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=helgaas@kernel.org Date: Wed, 10 Jan 2018 15:45:36 -0600 From: Bjorn Helgaas To: Alex Williamson Cc: linux-pci@vger.kernel.org, bhelgaas@google.com, keith.busch@intel.com, liudongdong3@huawei.com, linux-kernel@vger.kernel.org Subject: Re: [PATCH v2] PCI/DPC: Fix shared interrupt handling Message-ID: <20180110214536.GD241460@bhelgaas-glaptop.roam.corp.google.com> References: <20171214151854.7578.20594.stgit@gimli.home> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20171214151854.7578.20594.stgit@gimli.home> User-Agent: Mutt/1.5.21 (2010-09-15) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Return-Path: On Thu, Dec 14, 2017 at 08:20:18AM -0700, Alex Williamson wrote: > DPC supports shared interrupts, but it plays very loosely with testing > whether the interrupt is generated by DPC before generating spurious > log messages, such as: > > dpc 0000:10:01.2:pcie010: DPC containment event, status:0x1f00 source:0x0000 > > Testing the status register for zero or -1 is not sufficient when the > device supports the RP PIO First Error Pointer register. Change this > to test whether the interrupt is enabled in the control register, > retaining the device present test, and that the status reports the > interrupt as signaled and DPC is triggered, clearing as a spurious > interrupt otherwise. > > Additionally, since the interrupt is actually serviced by a workqueue, > disable the interrupt in the control register until that completes or > else we may never see it execute due to further incoming interrupts. > A software generated DPC floods the system otherwise. > > Signed-off-by: Alex Williamson Applied with Keith's reviewed-by on pci/dpc for v4.16, thanks! > --- > > v2: Fix interrupt re-enable as spotted by Keith, tested multiple > injections via software trigger. > > drivers/pci/pcie/pcie-dpc.c | 60 +++++++++++++++++++++++++++++-------------- > 1 file changed, 40 insertions(+), 20 deletions(-) > > diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c > index 2d976a623ddc..f7cf5ae7dec2 100644 > --- a/drivers/pci/pcie/pcie-dpc.c > +++ b/drivers/pci/pcie/pcie-dpc.c > @@ -109,6 +109,7 @@ static void interrupt_event_handler(struct work_struct *work) > struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); > struct pci_dev *dev, *temp, *pdev = dpc->dev->port; > struct pci_bus *parent = pdev->subordinate; > + u16 ctl; > > pci_lock_rescan_remove(); > list_for_each_entry_safe_reverse(dev, temp, &parent->devices, > @@ -135,6 +136,10 @@ static void interrupt_event_handler(struct work_struct *work) > > pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, > PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT); > + > + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); > + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, > + ctl | PCI_EXP_DPC_CTL_INT_EN); > } > > static void dpc_rp_pio_print_tlp_header(struct device *dev, > @@ -249,34 +254,49 @@ static irqreturn_t dpc_irq(int irq, void *context) > struct dpc_dev *dpc = (struct dpc_dev *)context; > struct pci_dev *pdev = dpc->dev->port; > struct device *dev = &dpc->dev->device; > - u16 status, source; > + u16 ctl, status, source, reason, ext_reason; > + > + pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, &ctl); > + > + if (!(ctl & PCI_EXP_DPC_CTL_INT_EN) || ctl == (u16)(~0)) > + return IRQ_NONE; > > pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, &status); > + > + if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT)) > + return IRQ_NONE; > + > + if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) { > + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS, > + PCI_EXP_DPC_STATUS_INTERRUPT); > + return IRQ_HANDLED; > + } > + > + pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, > + ctl & ~PCI_EXP_DPC_CTL_INT_EN); > + > pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_SOURCE_ID, > &source); > - if (!status || status == (u16)(~0)) > - return IRQ_NONE; > > dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n", > status, source); > > - if (status & PCI_EXP_DPC_STATUS_TRIGGER) { > - u16 reason = (status >> 1) & 0x3; > - u16 ext_reason = (status >> 5) & 0x3; > - > - dev_warn(dev, "DPC %s detected, remove downstream devices\n", > - (reason == 0) ? "unmasked uncorrectable error" : > - (reason == 1) ? "ERR_NONFATAL" : > - (reason == 2) ? "ERR_FATAL" : > - (ext_reason == 0) ? "RP PIO error" : > - (ext_reason == 1) ? "software trigger" : > - "reserved error"); > - /* show RP PIO error detail information */ > - if (reason == 3 && ext_reason == 0) > - dpc_process_rp_pio_error(dpc); > - > - schedule_work(&dpc->work); > - } > + reason = (status >> 1) & 0x3; > + ext_reason = (status >> 5) & 0x3; > + > + dev_warn(dev, "DPC %s detected, remove downstream devices\n", > + (reason == 0) ? "unmasked uncorrectable error" : > + (reason == 1) ? "ERR_NONFATAL" : > + (reason == 2) ? "ERR_FATAL" : > + (ext_reason == 0) ? "RP PIO error" : > + (ext_reason == 1) ? "software trigger" : > + "reserved error"); > + /* show RP PIO error detail information */ > + if (reason == 3 && ext_reason == 0) > + dpc_process_rp_pio_error(dpc); > + > + schedule_work(&dpc->work); > + > return IRQ_HANDLED; > } > >