This patch set brings in error handling support for DPC
The current implementation of AER and error message broadcasting to the
EP driver is tightly coupled and limited to AER service driver.
It is important to factor out broadcasting and other link handling
callbacks. So that not only when AER gets triggered, but also when DPC get
triggered (for e.g. ERR_FATAL), callbacks are handled appropriately.
DPC should enumerate the devices after recovering the link, which is
achieved by implementing error_resume callback.
Changes since v5:
Sinan's and Keith's comments incorporated.
> made separate patch for mutex
> unified error repotting codes into driver/pci/pci.h
> got rid of wait link active/inactive and
made generic function in driver/pci/pci.c
Changes since v4:
Bjorn's comments incorporated.
> Renamed only do_recovery.
> moved the things more locally to drivers/pci/pci.h
Changes since v3:
Bjorn's comments incorporated.
> Made separate patch renaming generic pci_err.c
> Introduce pci_err.h to contain all the error types and recovery
> removed all the dependencies on pci.h
Changes since v2:
Based on feedback from Keith:
"
When DPC is triggered due to receipt of an uncorrectable error Message,
the Requester ID from the Message is recorded in the DPC Error
Source ID register and that Message is discarded and not forwarded Upstream.
"
Removed the patch where AER checks if DPC service is active
Changes since v1:
Kbuild errors fixed:
> pci_find_dpc_dev made static
> ras_event.h updated
> pci_find_aer_service call with CONFIG check
> pci_find_dpc_service call with CONFIG check
Oza Pawandeep (7):
PCI/AER: Rename error recovery to generic pci naming
PCI/AER: factor out error reporting from AER
PCI/ERR: add mutex to synchronize recovery
PCI/DPC: Unify and plumb error handling into DPC
PCI/AER: Unify aer error defines at single space
PCI/DPC: Enumerate the devices after DPC trigger event
PCI: Unify wait for link active into generic pci
drivers/acpi/apei/ghes.c | 1 +
drivers/pci/hotplug/pciehp_hpc.c | 21 +-
drivers/pci/pci.c | 39 +++-
drivers/pci/pci.h | 11 +
drivers/pci/pcie/Makefile | 2 +-
drivers/pci/pcie/aer/aerdrv.h | 30 ---
drivers/pci/pcie/aer/aerdrv_core.c | 293 +-------------------------
drivers/pci/pcie/aer/aerdrv_errprint.c | 1 +
drivers/pci/pcie/pcie-dpc.c | 115 ++++++++++-
drivers/pci/pcie/pcie-err.c | 366 +++++++++++++++++++++++++++++++++
drivers/pci/pcie/portdrv.h | 2 +
include/linux/aer.h | 4 -
include/linux/pci.h | 1 +
13 files changed, 534 insertions(+), 352 deletions(-)
create mode 100644 drivers/pci/pcie/pcie-err.c
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
This patch renames error recovery to generic name with pci prefix
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 7448052..6cb1b36 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -482,7 +482,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
}
/**
- * do_recovery - handle nonfatal/fatal error recovery process
+ * pci_do_recovery - handle nonfatal/fatal error recovery process
* @dev: pointer to a pci_dev data structure of agent detecting an error
* @severity: error severity type
*
@@ -490,7 +490,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
* error detected message to all downstream drivers within a hierarchy in
* question and return the returned code.
*/
-static void do_recovery(struct pci_dev *dev, int severity)
+static void pci_do_recovery(struct pci_dev *dev, int severity)
{
pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
enum pci_channel_state state;
@@ -569,7 +569,7 @@ static void handle_error_source(struct pcie_device *aerdev,
pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
info->status);
} else
- do_recovery(dev, info->severity);
+ pci_do_recovery(dev, info->severity);
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
@@ -633,7 +633,7 @@ static void aer_recover_work_func(struct work_struct *work)
continue;
}
cper_print_aer(pdev, entry.severity, entry.regs);
- do_recovery(pdev, entry.severity);
+ pci_do_recovery(pdev, entry.severity);
pci_dev_put(pdev);
}
}
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
Current DPC driver does not do recovery, e.g. calling end-point's driver's
callbacks, which sanitize the sw.
DPC driver implements link_reset callback, and calls pci_do_recovery.
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 665ff6c..3b79593 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -343,8 +343,12 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
void pci_enable_acs(struct pci_dev *dev);
/* PCI error reporting and recovery */
+#define DPC_FATAL 4
+
void pci_do_recovery(struct pci_dev *dev, int severity);
+
+
#ifdef CONFIG_PCIE_PTM
void pci_ptm_init(struct pci_dev *dev);
#else
diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index 2d976a6..b5c9fbd 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -15,6 +15,9 @@
#include <linux/pci.h>
#include <linux/pcieport_if.h>
#include "../pci.h"
+#include "portdrv.h"
+
+static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
struct rp_pio_header_log_regs {
u32 dw0;
@@ -67,6 +70,60 @@ struct dpc_dev {
"Memory Request Completion Timeout", /* Bit Position 18 */
};
+static int find_dpc_dev_iter(struct device *device, void *data)
+{
+ struct pcie_port_service_driver *service_driver;
+ struct device **dev;
+
+ dev = (struct device **) data;
+
+ if (device->bus == &pcie_port_bus_type && device->driver) {
+ service_driver = to_service_driver(device->driver);
+ if (service_driver->service == PCIE_PORT_SERVICE_DPC) {
+ *dev = device;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static struct device *pci_find_dpc_dev(struct pci_dev *pdev)
+{
+ struct device *dev = NULL;
+
+ device_for_each_child(&pdev->dev, &dev, find_dpc_dev_iter);
+
+ return dev;
+}
+
+static int find_dpc_service_iter(struct device *device, void *data)
+{
+ struct pcie_port_service_driver *service_driver, **drv;
+
+ drv = (struct pcie_port_service_driver **) data;
+
+ if (device->bus == &pcie_port_bus_type && device->driver) {
+ service_driver = to_service_driver(device->driver);
+ if (service_driver->service == PCIE_PORT_SERVICE_DPC) {
+ *drv = service_driver;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+struct pcie_port_service_driver *pci_find_dpc_service(struct pci_dev *dev)
+{
+ struct pcie_port_service_driver *drv = NULL;
+
+ device_for_each_child(&dev->dev, &drv, find_dpc_service_iter);
+
+ return drv;
+}
+EXPORT_SYMBOL(pci_find_dpc_service);
+
static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
{
unsigned long timeout = jiffies + HZ;
@@ -104,11 +161,23 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc)
dev_warn(dev, "Link state not disabled for DPC event\n");
}
-static void interrupt_event_handler(struct work_struct *work)
+/**
+ * dpc_reset_link - reset link DPC routine
+ * @dev: pointer to Root Port's pci_dev data structure
+ *
+ * Invoked by Port Bus driver when performing link reset at Root Port.
+ */
+static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
{
- struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
- struct pci_dev *dev, *temp, *pdev = dpc->dev->port;
struct pci_bus *parent = pdev->subordinate;
+ struct pci_dev *dev, *temp;
+ struct dpc_dev *dpc;
+ struct pcie_device *pciedev;
+ struct device *devdpc;
+
+ devdpc = pci_find_dpc_dev(pdev);
+ pciedev = to_pcie_device(devdpc);
+ dpc = get_service_data(pciedev);
pci_lock_rescan_remove();
list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
@@ -125,7 +194,7 @@ static void interrupt_event_handler(struct work_struct *work)
dpc_wait_link_inactive(dpc);
if (dpc->rp && dpc_wait_rp_inactive(dpc))
- return;
+ return PCI_ERS_RESULT_DISCONNECT;
if (dpc->rp && dpc->rp_pio_status) {
pci_write_config_dword(pdev,
dpc->cap_pos + PCI_EXP_DPC_RP_PIO_STATUS,
@@ -135,6 +204,17 @@ static void interrupt_event_handler(struct work_struct *work)
pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_STATUS,
PCI_EXP_DPC_STATUS_TRIGGER | PCI_EXP_DPC_STATUS_INTERRUPT);
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void interrupt_event_handler(struct work_struct *work)
+{
+ struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
+ struct pci_dev *pdev = dpc->dev->port;
+
+ /* From DPC point of view error is always FATAL. */
+ pci_do_recovery(pdev, DPC_FATAL);
}
static void dpc_rp_pio_print_tlp_header(struct device *dev,
@@ -339,6 +419,7 @@ static void dpc_remove(struct pcie_device *dev)
.service = PCIE_PORT_SERVICE_DPC,
.probe = dpc_probe,
.remove = dpc_remove,
+ .reset_link = dpc_reset_link,
};
static int __init dpc_service_init(void)
diff --git a/drivers/pci/pcie/pcie-err.c b/drivers/pci/pcie/pcie-err.c
index 8318c84..05385c0 100644
--- a/drivers/pci/pcie/pcie-err.c
+++ b/drivers/pci/pcie/pcie-err.c
@@ -19,6 +19,7 @@
#include <linux/aer.h>
#include <linux/pcieport_if.h>
#include "portdrv.h"
+#include "./../pci.h"
static DEFINE_MUTEX(pci_err_recovery_lock);
@@ -181,7 +182,7 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev)
return PCI_ERS_RESULT_RECOVERED;
}
-static pci_ers_result_t reset_link(struct pci_dev *dev)
+static pci_ers_result_t reset_link(struct pci_dev *dev, int severity)
{
struct pci_dev *udev;
pci_ers_result_t status;
@@ -195,9 +196,17 @@ static pci_ers_result_t reset_link(struct pci_dev *dev)
udev = dev->bus->self;
}
+
+ /* Use the service driver of the component firstly */
+#if IS_ENABLED(CONFIG_PCIE_DPC)
+ if (severity == DPC_FATAL)
+ driver = pci_find_dpc_service(udev);
+#endif
#if IS_ENABLED(CONFIG_PCIEAER)
- /* Use the aer driver of the component firstly */
- driver = pci_find_aer_service(udev);
+ if ((severity == AER_FATAL) ||
+ (severity == AER_NONFATAL) ||
+ (severity == AER_CORRECTABLE))
+ driver = pci_find_aer_service(udev);
#endif
if (driver && driver->reset_link) {
@@ -287,7 +296,8 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
mutex_lock(&pci_err_recovery_lock);
- if (severity == AER_FATAL)
+ if ((severity == AER_FATAL) ||
+ (severity == DPC_FATAL))
state = pci_channel_io_frozen;
else
state = pci_channel_io_normal;
@@ -297,8 +307,9 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
"error_detected",
report_error_detected);
- if (severity == AER_FATAL) {
- result = reset_link(dev);
+ if ((severity == AER_FATAL) ||
+ (severity == DPC_FATAL)) {
+ result = reset_link(dev, severity);
if (result != PCI_ERS_RESULT_RECOVERED)
goto failed;
}
@@ -335,6 +346,6 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
failed:
/* TODO: Should kernel panic here? */
- dev_info(&dev->dev, "Device recovery failed\n");
mutex_unlock(&pci_err_recovery_lock);
+ dev_info(&dev->dev, "Device recovery failed\n");
}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 4f1992d..b013e24 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -80,4 +80,5 @@ static inline void pcie_port_platform_notify(struct pci_dev *port, int *mask){}
#endif /* !CONFIG_ACPI */
struct pcie_port_service_driver *pci_find_aer_service(struct pci_dev *dev);
+struct pcie_port_service_driver *pci_find_dpc_service(struct pci_dev *dev);
#endif /* _PORTDRV_H_ */
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
This patch protects pci_do_recovery with mutex.
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/pci/pcie/pcie-err.c b/drivers/pci/pcie/pcie-err.c
index a532fe0..8318c84 100644
--- a/drivers/pci/pcie/pcie-err.c
+++ b/drivers/pci/pcie/pcie-err.c
@@ -20,6 +20,8 @@
#include <linux/pcieport_if.h>
#include "portdrv.h"
+static DEFINE_MUTEX(pci_err_recovery_lock);
+
struct aer_broadcast_data {
enum pci_channel_state state;
enum pci_ers_result result;
@@ -283,6 +285,8 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
enum pci_channel_state state;
+ mutex_lock(&pci_err_recovery_lock);
+
if (severity == AER_FATAL)
state = pci_channel_io_frozen;
else
@@ -326,9 +330,11 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
report_resume);
dev_info(&dev->dev, "Device recovery successful\n");
+ mutex_unlock(&pci_err_recovery_lock);
return;
failed:
/* TODO: Should kernel panic here? */
dev_info(&dev->dev, "Device recovery failed\n");
+ mutex_unlock(&pci_err_recovery_lock);
}
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
This patch factors out error reporting callbacks, which are currently
tightly coupled with AER.
DPC should be able to register callbacks and attmept recovery when DPC
trigger event occurs.
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f6b58b3..665ff6c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -342,6 +342,9 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
void pci_enable_acs(struct pci_dev *dev);
+/* PCI error reporting and recovery */
+void pci_do_recovery(struct pci_dev *dev, int severity);
+
#ifdef CONFIG_PCIE_PTM
void pci_ptm_init(struct pci_dev *dev);
#else
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 223e4c3..d669497 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -6,7 +6,7 @@
# Build PCI Express ASPM if needed
obj-$(CONFIG_PCIEASPM) += aspm.o
-pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o
+pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o pcie-err.o
pcieportdrv-$(CONFIG_ACPI) += portdrv_acpi.o
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 5449e5c..bc9db53 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -76,36 +76,6 @@ struct aer_rpc {
*/
};
-struct aer_broadcast_data {
- enum pci_channel_state state;
- enum pci_ers_result result;
-};
-
-static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
- enum pci_ers_result new)
-{
- if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
- return PCI_ERS_RESULT_NO_AER_DRIVER;
-
- if (new == PCI_ERS_RESULT_NONE)
- return orig;
-
- switch (orig) {
- case PCI_ERS_RESULT_CAN_RECOVER:
- case PCI_ERS_RESULT_RECOVERED:
- orig = new;
- break;
- case PCI_ERS_RESULT_DISCONNECT:
- if (new == PCI_ERS_RESULT_NEED_RESET)
- orig = PCI_ERS_RESULT_NEED_RESET;
- break;
- default:
- break;
- }
-
- return orig;
-}
-
extern struct bus_type pcie_port_bus_type;
void aer_isr(struct work_struct *work);
void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 6cb1b36..7934de0 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/kfifo.h>
#include "aerdrv.h"
+#include "../../pci.h"
#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
@@ -234,189 +235,6 @@ static bool find_source_device(struct pci_dev *parent,
return true;
}
-static int report_error_detected(struct pci_dev *dev, void *data)
-{
- pci_ers_result_t vote;
- const struct pci_error_handlers *err_handler;
- struct aer_broadcast_data *result_data;
- result_data = (struct aer_broadcast_data *) data;
-
- device_lock(&dev->dev);
- dev->error_state = result_data->state;
-
- if (!dev->driver ||
- !dev->driver->err_handler ||
- !dev->driver->err_handler->error_detected) {
- if (result_data->state == pci_channel_io_frozen &&
- dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
- /*
- * In case of fatal recovery, if one of down-
- * stream device has no driver. We might be
- * unable to recover because a later insmod
- * of a driver for this device is unaware of
- * its hw state.
- */
- dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
- dev->driver ?
- "no AER-aware driver" : "no driver");
- }
-
- /*
- * If there's any device in the subtree that does not
- * have an error_detected callback, returning
- * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
- * the subsequent mmio_enabled/slot_reset/resume
- * callbacks of "any" device in the subtree. All the
- * devices in the subtree are left in the error state
- * without recovery.
- */
-
- if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
- vote = PCI_ERS_RESULT_NO_AER_DRIVER;
- else
- vote = PCI_ERS_RESULT_NONE;
- } else {
- err_handler = dev->driver->err_handler;
- vote = err_handler->error_detected(dev, result_data->state);
- }
-
- result_data->result = merge_result(result_data->result, vote);
- device_unlock(&dev->dev);
- return 0;
-}
-
-static int report_mmio_enabled(struct pci_dev *dev, void *data)
-{
- pci_ers_result_t vote;
- const struct pci_error_handlers *err_handler;
- struct aer_broadcast_data *result_data;
- result_data = (struct aer_broadcast_data *) data;
-
- device_lock(&dev->dev);
- if (!dev->driver ||
- !dev->driver->err_handler ||
- !dev->driver->err_handler->mmio_enabled)
- goto out;
-
- err_handler = dev->driver->err_handler;
- vote = err_handler->mmio_enabled(dev);
- result_data->result = merge_result(result_data->result, vote);
-out:
- device_unlock(&dev->dev);
- return 0;
-}
-
-static int report_slot_reset(struct pci_dev *dev, void *data)
-{
- pci_ers_result_t vote;
- const struct pci_error_handlers *err_handler;
- struct aer_broadcast_data *result_data;
- result_data = (struct aer_broadcast_data *) data;
-
- device_lock(&dev->dev);
- if (!dev->driver ||
- !dev->driver->err_handler ||
- !dev->driver->err_handler->slot_reset)
- goto out;
-
- err_handler = dev->driver->err_handler;
- vote = err_handler->slot_reset(dev);
- result_data->result = merge_result(result_data->result, vote);
-out:
- device_unlock(&dev->dev);
- return 0;
-}
-
-static int report_resume(struct pci_dev *dev, void *data)
-{
- const struct pci_error_handlers *err_handler;
-
- device_lock(&dev->dev);
- dev->error_state = pci_channel_io_normal;
-
- if (!dev->driver ||
- !dev->driver->err_handler ||
- !dev->driver->err_handler->resume)
- goto out;
-
- err_handler = dev->driver->err_handler;
- err_handler->resume(dev);
-out:
- device_unlock(&dev->dev);
- return 0;
-}
-
-/**
- * broadcast_error_message - handle message broadcast to downstream drivers
- * @dev: pointer to from where in a hierarchy message is broadcasted down
- * @state: error state
- * @error_mesg: message to print
- * @cb: callback to be broadcasted
- *
- * Invoked during error recovery process. Once being invoked, the content
- * of error severity will be broadcasted to all downstream drivers in a
- * hierarchy in question.
- */
-static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
- enum pci_channel_state state,
- char *error_mesg,
- int (*cb)(struct pci_dev *, void *))
-{
- struct aer_broadcast_data result_data;
-
- dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
- result_data.state = state;
- if (cb == report_error_detected)
- result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
- else
- result_data.result = PCI_ERS_RESULT_RECOVERED;
-
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- /*
- * If the error is reported by a bridge, we think this error
- * is related to the downstream link of the bridge, so we
- * do error recovery on all subordinates of the bridge instead
- * of the bridge and clear the error status of the bridge.
- */
- if (cb == report_error_detected)
- dev->error_state = state;
- pci_walk_bus(dev->subordinate, cb, &result_data);
- if (cb == report_resume) {
- pci_cleanup_aer_uncorrect_error_status(dev);
- dev->error_state = pci_channel_io_normal;
- }
- } else {
- /*
- * If the error is reported by an end point, we think this
- * error is related to the upstream link of the end point.
- */
- if (state == pci_channel_io_normal)
- /*
- * the error is non fatal so the bus is ok, just invoke
- * the callback for the function that logged the error.
- */
- cb(dev, &result_data);
- else
- pci_walk_bus(dev->bus, cb, &result_data);
- }
-
- return result_data.result;
-}
-
-/**
- * default_reset_link - default reset function
- * @dev: pointer to pci_dev data structure
- *
- * Invoked when performing link reset on a Downstream Port or a
- * Root Port with no aer driver.
- */
-static pci_ers_result_t default_reset_link(struct pci_dev *dev)
-{
- pci_reset_bridge_secondary_bus(dev);
- dev_printk(KERN_DEBUG, &dev->dev, "downstream link has been reset\n");
- return PCI_ERS_RESULT_RECOVERED;
-}
-
static int find_aer_service_iter(struct device *device, void *data)
{
struct pcie_port_service_driver *service_driver, **drv;
@@ -434,7 +252,7 @@ static int find_aer_service_iter(struct device *device, void *data)
return 0;
}
-static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev)
+struct pcie_port_service_driver *pci_find_aer_service(struct pci_dev *dev)
{
struct pcie_port_service_driver *drv = NULL;
@@ -442,108 +260,7 @@ static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev)
return drv;
}
-
-static pci_ers_result_t reset_link(struct pci_dev *dev)
-{
- struct pci_dev *udev;
- pci_ers_result_t status;
- struct pcie_port_service_driver *driver;
-
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- /* Reset this port for all subordinates */
- udev = dev;
- } else {
- /* Reset the upstream component (likely downstream port) */
- udev = dev->bus->self;
- }
-
- /* Use the aer driver of the component firstly */
- driver = find_aer_service(udev);
-
- if (driver && driver->reset_link) {
- status = driver->reset_link(udev);
- } else if (udev->has_secondary_link) {
- status = default_reset_link(udev);
- } else {
- dev_printk(KERN_DEBUG, &dev->dev,
- "no link-reset support at upstream device %s\n",
- pci_name(udev));
- return PCI_ERS_RESULT_DISCONNECT;
- }
-
- if (status != PCI_ERS_RESULT_RECOVERED) {
- dev_printk(KERN_DEBUG, &dev->dev,
- "link reset at upstream device %s failed\n",
- pci_name(udev));
- return PCI_ERS_RESULT_DISCONNECT;
- }
-
- return status;
-}
-
-/**
- * pci_do_recovery - handle nonfatal/fatal error recovery process
- * @dev: pointer to a pci_dev data structure of agent detecting an error
- * @severity: error severity type
- *
- * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
- * error detected message to all downstream drivers within a hierarchy in
- * question and return the returned code.
- */
-static void pci_do_recovery(struct pci_dev *dev, int severity)
-{
- pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
- enum pci_channel_state state;
-
- if (severity == AER_FATAL)
- state = pci_channel_io_frozen;
- else
- state = pci_channel_io_normal;
-
- status = broadcast_error_message(dev,
- state,
- "error_detected",
- report_error_detected);
-
- if (severity == AER_FATAL) {
- result = reset_link(dev);
- if (result != PCI_ERS_RESULT_RECOVERED)
- goto failed;
- }
-
- if (status == PCI_ERS_RESULT_CAN_RECOVER)
- status = broadcast_error_message(dev,
- state,
- "mmio_enabled",
- report_mmio_enabled);
-
- if (status == PCI_ERS_RESULT_NEED_RESET) {
- /*
- * TODO: Should call platform-specific
- * functions to reset slot before calling
- * drivers' slot_reset callbacks?
- */
- status = broadcast_error_message(dev,
- state,
- "slot_reset",
- report_slot_reset);
- }
-
- if (status != PCI_ERS_RESULT_RECOVERED)
- goto failed;
-
- broadcast_error_message(dev,
- state,
- "resume",
- report_resume);
-
- dev_info(&dev->dev, "AER: Device recovery successful\n");
- return;
-
-failed:
- /* TODO: Should kernel panic here? */
- dev_info(&dev->dev, "AER: Device recovery failed\n");
-}
+EXPORT_SYMBOL(pci_find_aer_service);
/**
* handle_error_source - handle logging error into an event log
diff --git a/drivers/pci/pcie/pcie-err.c b/drivers/pci/pcie/pcie-err.c
new file mode 100644
index 0000000..a532fe0
--- /dev/null
+++ b/drivers/pci/pcie/pcie-err.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file implements the error recovery as a core part of PCIe error reporting.
+ * When a PCIe error is delivered, an error message will be collected and printed
+ * to console, then, an error recovery procedure will be executed by following
+ * the PCI error recovery rules.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ * Tom Long Nguyen ([email protected])
+ * Zhang Yanmin ([email protected])
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/aer.h>
+#include <linux/pcieport_if.h>
+#include "portdrv.h"
+
+struct aer_broadcast_data {
+ enum pci_channel_state state;
+ enum pci_ers_result result;
+};
+
+static pci_ers_result_t merge_result(enum pci_ers_result orig,
+ enum pci_ers_result new)
+{
+ if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
+ return PCI_ERS_RESULT_NO_AER_DRIVER;
+
+ if (new == PCI_ERS_RESULT_NONE)
+ return orig;
+
+ switch (orig) {
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ case PCI_ERS_RESULT_RECOVERED:
+ orig = new;
+ break;
+ case PCI_ERS_RESULT_DISCONNECT:
+ if (new == PCI_ERS_RESULT_NEED_RESET)
+ orig = PCI_ERS_RESULT_NEED_RESET;
+ break;
+ default:
+ break;
+ }
+
+ return orig;
+}
+
+static int report_mmio_enabled(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+ struct aer_broadcast_data *result_data;
+
+ result_data = (struct aer_broadcast_data *) data;
+
+ device_lock(&dev->dev);
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->mmio_enabled)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->mmio_enabled(dev);
+ result_data->result = merge_result(result_data->result, vote);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int report_slot_reset(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+ struct aer_broadcast_data *result_data;
+
+ result_data = (struct aer_broadcast_data *) data;
+
+ device_lock(&dev->dev);
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->slot_reset)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->slot_reset(dev);
+ result_data->result = merge_result(result_data->result, vote);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int report_resume(struct pci_dev *dev, void *data)
+{
+ const struct pci_error_handlers *err_handler;
+
+ device_lock(&dev->dev);
+ dev->error_state = pci_channel_io_normal;
+
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->resume)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ err_handler->resume(dev);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int report_error_detected(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+ struct aer_broadcast_data *result_data;
+
+ result_data = (struct aer_broadcast_data *) data;
+
+ device_lock(&dev->dev);
+ dev->error_state = result_data->state;
+
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->error_detected) {
+ if (result_data->state == pci_channel_io_frozen &&
+ dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+ /*
+ * In case of fatal recovery, if one of down-
+ * stream device has no driver. We might be
+ * unable to recover because a later insmod
+ * of a driver for this device is unaware of
+ * its hw state.
+ */
+ dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
+ dev->driver ?
+ "no error-aware driver" : "no driver");
+ }
+
+ /*
+ * If there's any device in the subtree that does not
+ * have an error_detected callback, returning
+ * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
+ * the subsequent mmio_enabled/slot_reset/resume
+ * callbacks of "any" device in the subtree. All the
+ * devices in the subtree are left in the error state
+ * without recovery.
+ */
+
+ if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+ vote = PCI_ERS_RESULT_NO_AER_DRIVER;
+ else
+ vote = PCI_ERS_RESULT_NONE;
+ } else {
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->error_detected(dev, result_data->state);
+ }
+
+ result_data->result = merge_result(result_data->result, vote);
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+/**
+ * default_reset_link - default reset function
+ * @dev: pointer to pci_dev data structure
+ *
+ * Invoked when performing link reset on a Downstream Port or a
+ * Root Port with no aer driver.
+ */
+static pci_ers_result_t default_reset_link(struct pci_dev *dev)
+{
+ pci_reset_bridge_secondary_bus(dev);
+ dev_printk(KERN_DEBUG, &dev->dev, "downstream link has been reset\n");
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t reset_link(struct pci_dev *dev)
+{
+ struct pci_dev *udev;
+ pci_ers_result_t status;
+ struct pcie_port_service_driver *driver = NULL;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ /* Reset this port for all subordinates */
+ udev = dev;
+ } else {
+ /* Reset the upstream component (likely downstream port) */
+ udev = dev->bus->self;
+ }
+
+#if IS_ENABLED(CONFIG_PCIEAER)
+ /* Use the aer driver of the component firstly */
+ driver = pci_find_aer_service(udev);
+#endif
+
+ if (driver && driver->reset_link) {
+ status = driver->reset_link(udev);
+ } else if (udev->has_secondary_link) {
+ status = default_reset_link(udev);
+ } else {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "no link-reset support at upstream device %s\n",
+ pci_name(udev));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ if (status != PCI_ERS_RESULT_RECOVERED) {
+ dev_printk(KERN_DEBUG, &dev->dev,
+ "link reset at upstream device %s failed\n",
+ pci_name(udev));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return status;
+}
+
+/**
+ * broadcast_error_message - handle message broadcast to downstream drivers
+ * @dev: pointer to from where in a hierarchy message is broadcasted down
+ * @state: error state
+ * @error_mesg: message to print
+ * @cb: callback to be broadcasted
+ *
+ * Invoked during error recovery process. Once being invoked, the content
+ * of error severity will be broadcasted to all downstream drivers in a
+ * hierarchy in question.
+ */
+static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
+ enum pci_channel_state state,
+ char *error_mesg,
+ int (*cb)(struct pci_dev *, void *))
+{
+ struct aer_broadcast_data result_data;
+
+ dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
+ result_data.state = state;
+ if (cb == report_error_detected)
+ result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
+ else
+ result_data.result = PCI_ERS_RESULT_RECOVERED;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ /*
+ * If the error is reported by a bridge, we think this error
+ * is related to the downstream link of the bridge, so we
+ * do error recovery on all subordinates of the bridge instead
+ * of the bridge and clear the error status of the bridge.
+ */
+ if (cb == report_error_detected)
+ dev->error_state = state;
+ pci_walk_bus(dev->subordinate, cb, &result_data);
+ if (cb == report_resume) {
+ pci_cleanup_aer_uncorrect_error_status(dev);
+ dev->error_state = pci_channel_io_normal;
+ }
+ } else {
+ /*
+ * If the error is reported by an end point, we think this
+ * error is related to the upstream link of the end point.
+ */
+ pci_walk_bus(dev->bus, cb, &result_data);
+ }
+
+ return result_data.result;
+}
+
+/**
+ * pci_do_recovery - handle nonfatal/fatal error recovery process
+ * @dev: pointer to a pci_dev data structure of agent detecting an error
+ * @severity: error severity type
+ *
+ * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
+ * error detected message to all downstream drivers within a hierarchy in
+ * question and return the returned code.
+ */
+void pci_do_recovery(struct pci_dev *dev, int severity)
+{
+ pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
+ enum pci_channel_state state;
+
+ if (severity == AER_FATAL)
+ state = pci_channel_io_frozen;
+ else
+ state = pci_channel_io_normal;
+
+ status = broadcast_error_message(dev,
+ state,
+ "error_detected",
+ report_error_detected);
+
+ if (severity == AER_FATAL) {
+ result = reset_link(dev);
+ if (result != PCI_ERS_RESULT_RECOVERED)
+ goto failed;
+ }
+
+ if (status == PCI_ERS_RESULT_CAN_RECOVER)
+ status = broadcast_error_message(dev,
+ state,
+ "mmio_enabled",
+ report_mmio_enabled);
+
+ if (status == PCI_ERS_RESULT_NEED_RESET) {
+ /*
+ * TODO: Should call platform-specific
+ * functions to reset slot before calling
+ * drivers' slot_reset callbacks?
+ */
+ status = broadcast_error_message(dev,
+ state,
+ "slot_reset",
+ report_slot_reset);
+ }
+
+ if (status != PCI_ERS_RESULT_RECOVERED)
+ goto failed;
+
+ broadcast_error_message(dev,
+ state,
+ "resume",
+ report_resume);
+
+ dev_info(&dev->dev, "Device recovery successful\n");
+ return;
+
+failed:
+ /* TODO: Should kernel panic here? */
+ dev_info(&dev->dev, "Device recovery failed\n");
+}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index a854bc5..4f1992d 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -79,4 +79,5 @@ static inline void pcie_port_platform_notify(struct pci_dev *port, int *mask)
static inline void pcie_port_platform_notify(struct pci_dev *port, int *mask){}
#endif /* !CONFIG_ACPI */
+struct pcie_port_service_driver *pci_find_aer_service(struct pci_dev *dev);
#endif /* _PORTDRV_H_ */
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
Clients such as pciehp, dpc are using pcie_wait_link_active, which waits
till the link becomes active or inactive.
Made generic function and moved it to drivers/pci/pci.c
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 7bab060..26afeff 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -245,25 +245,12 @@ bool pciehp_check_link_active(struct controller *ctrl)
return ret;
}
-static void __pcie_wait_link_active(struct controller *ctrl, bool active)
+static bool pcie_wait_link_active(struct controller *ctrl)
{
- int timeout = 1000;
-
- if (pciehp_check_link_active(ctrl) == active)
- return;
- while (timeout > 0) {
- msleep(10);
- timeout -= 10;
- if (pciehp_check_link_active(ctrl) == active)
- return;
- }
- ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
- active ? "set" : "cleared");
-}
+ struct pci_dev *pdev = ctrl_dev(ctrl);
+ bool active = true;
-static void pcie_wait_link_active(struct controller *ctrl)
-{
- __pcie_wait_link_active(ctrl, true);
+ return pci_wait_for_link(pdev, active);
}
static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4a7c686..0de83ea 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2805,7 +2805,7 @@ static void pci_std_enable_acs(struct pci_dev *dev)
pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
/* Source Validation */
- ctrl |= (cap & PCI_ACS_SV);
+// ctrl |= (cap & PCI_ACS_SV);
/* P2P Request Redirect */
ctrl |= (cap & PCI_ACS_RR);
@@ -4079,6 +4079,43 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
return 0;
}
+/**
+ * pci__wait_for_link - Wait for link till its active/inactive
+ * @dev: Bridge device
+ * @active: waiting for active or inactive ?
+ *
+ * Use this to wait till link becomes active or inactive.
+ */
+
+bool pci_wait_for_link(struct pci_dev *pdev, bool active)
+{
+ int timeout = 1000;
+ bool ret;
+ u16 lnk_status;
+
+check_link:
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+ ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
+
+ if (ret == active) {
+ dev_printk(KERN_DEBUG, &pdev->dev,
+ "%s: lnk_status = %x\n", __func__, lnk_status);
+ return true;
+ }
+
+ while (timeout > 0) {
+ msleep(10);
+ timeout -= 10;
+ goto check_link;
+ }
+ dev_printk(KERN_DEBUG, &pdev->dev,
+ "Data Link Layer Link Active not %s in 1000 msec\n",
+ active ? "set" : "cleared");
+
+ return false;
+}
+EXPORT_SYMBOL(pci_wait_for_link);
+
void pci_reset_secondary_bus(struct pci_dev *dev)
{
u16 ctrl;
diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index 1b06a8e..67502a5 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -150,37 +150,9 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc)
struct pci_dev *pdev = dpc->dev->port;
struct device *dev = &dpc->dev->device;
u16 lnk_status;
+ bool active = false;
- pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
- while (lnk_status & PCI_EXP_LNKSTA_DLLLA &&
- !time_after(jiffies, timeout)) {
- msleep(10);
- pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
- }
- if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
- dev_warn(dev, "Link state not disabled for DPC event\n");
-}
-
-static bool dpc_wait_link_active(struct pci_dev *pdev)
-{
- unsigned long timeout = jiffies + HZ;
- u16 lnk_status;
- bool ret = true;
-
- pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
-
- while (!(lnk_status & PCI_EXP_LNKSTA_DLLLA) &&
- !time_after(jiffies, timeout)) {
- msleep(10);
- pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
- }
-
- if (!(lnk_status & PCI_EXP_LNKSTA_DLLLA)) {
- dev_warn(&pdev->dev, "Link state not enabled after DPC event\n");
- ret = false;
- }
-
- return ret;
+ pci_wait_for_link(pdev, active);
}
/**
@@ -191,7 +163,9 @@ static bool dpc_wait_link_active(struct pci_dev *pdev)
*/
static void dpc_error_resume(struct pci_dev *pdev)
{
- if (dpc_wait_link_active(pdev)) {
+ bool active = true;
+
+ if (pci_wait_for_link(pdev, active)) {
pci_lock_rescan_remove();
pci_rescan_bus(pdev->bus);
pci_unlock_rescan_remove();
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c170c92..f9f6a3d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1201,6 +1201,7 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
int pci_request_selected_regions(struct pci_dev *, int, const char *);
int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
void pci_release_selected_regions(struct pci_dev *, int);
+bool pci_wait_for_link(struct pci_dev *pdev, bool active);
/* drivers/pci/bus.c */
struct pci_bus *pci_bus_get(struct pci_bus *bus);
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
This patch moves AER error defines to drivers/pci/pci.h.
So that it unifies the error repotting codes at single place along with dpc
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6402f7f..216ee47 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -56,6 +56,7 @@
#include <ras/ras_event.h>
#include "apei-internal.h"
+#include "../../pci/pci.h"
#define GHES_PFX "GHES: "
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 3b79593..2e19953 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -343,7 +343,11 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
void pci_enable_acs(struct pci_dev *dev);
/* PCI error reporting and recovery */
-#define DPC_FATAL 4
+#define AER_NONFATAL 0
+#define AER_FATAL 1
+#define AER_CORRECTABLE 2
+
+#define DPC_FATAL 4
void pci_do_recovery(struct pci_dev *dev, int severity);
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 54c4b69..cd3ad55 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -22,6 +22,7 @@
#include <linux/cper.h>
#include "aerdrv.h"
+#include "../../pci.h"
#include <ras/ras_event.h>
#define AER_AGENT_RECEIVER 0
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 8f87bbe..3eac8ed 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -11,10 +11,6 @@
#include <linux/errno.h>
#include <linux/types.h>
-#define AER_NONFATAL 0
-#define AER_FATAL 1
-#define AER_CORRECTABLE 2
-
struct pci_dev;
struct aer_header_log_regs {
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
Implement error_resume callback in DPC so, after DPC trigger event
enumerates the devices beneath.
Signed-off-by: Oza Pawandeep <[email protected]>
diff --git a/drivers/pci/pcie/pcie-dpc.c b/drivers/pci/pcie/pcie-dpc.c
index b5c9fbd..1b06a8e 100644
--- a/drivers/pci/pcie/pcie-dpc.c
+++ b/drivers/pci/pcie/pcie-dpc.c
@@ -161,6 +161,43 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc)
dev_warn(dev, "Link state not disabled for DPC event\n");
}
+static bool dpc_wait_link_active(struct pci_dev *pdev)
+{
+ unsigned long timeout = jiffies + HZ;
+ u16 lnk_status;
+ bool ret = true;
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+
+ while (!(lnk_status & PCI_EXP_LNKSTA_DLLLA) &&
+ !time_after(jiffies, timeout)) {
+ msleep(10);
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
+ }
+
+ if (!(lnk_status & PCI_EXP_LNKSTA_DLLLA)) {
+ dev_warn(&pdev->dev, "Link state not enabled after DPC event\n");
+ ret = false;
+ }
+
+ return ret;
+}
+
+/**
+ * dpc_error_resume - enumerate the devices beneath
+ * @dev: pointer to Root Port's pci_dev data structure
+ *
+ * Invoked by Port Bus driver during nonfatal recovery.
+ */
+static void dpc_error_resume(struct pci_dev *pdev)
+{
+ if (dpc_wait_link_active(pdev)) {
+ pci_lock_rescan_remove();
+ pci_rescan_bus(pdev->bus);
+ pci_unlock_rescan_remove();
+ }
+}
+
/**
* dpc_reset_link - reset link DPC routine
* @dev: pointer to Root Port's pci_dev data structure
@@ -419,6 +456,7 @@ static void dpc_remove(struct pcie_device *dev)
.service = PCIE_PORT_SERVICE_DPC,
.probe = dpc_probe,
.remove = dpc_remove,
+ .error_resume = dpc_error_resume,
.reset_link = dpc_reset_link,
};
diff --git a/drivers/pci/pcie/pcie-err.c b/drivers/pci/pcie/pcie-err.c
index 05385c0..c4876ab 100644
--- a/drivers/pci/pcie/pcie-err.c
+++ b/drivers/pci/pcie/pcie-err.c
@@ -236,6 +236,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, int severity)
* @state: error state
* @error_mesg: message to print
* @cb: callback to be broadcasted
+ * @severity: error severity
*
* Invoked during error recovery process. Once being invoked, the content
* of error severity will be broadcasted to all downstream drivers in a
@@ -244,7 +245,8 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, int severity)
static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
enum pci_channel_state state,
char *error_mesg,
- int (*cb)(struct pci_dev *, void *))
+ int (*cb)(struct pci_dev *, void *),
+ int severity)
{
struct aer_broadcast_data result_data;
@@ -256,6 +258,15 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
result_data.result = PCI_ERS_RESULT_RECOVERED;
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ /* If DPC is triggered, call resume error hanlder
+ * because, at this point we can safely assume that
+ * link recovery has happened.
+ */
+ if ((severity == DPC_FATAL) &&
+ (cb == report_resume)) {
+ cb(dev, NULL);
+ return PCI_ERS_RESULT_RECOVERED;
+ }
/*
* If the error is reported by a bridge, we think this error
* is related to the downstream link of the bridge, so we
@@ -305,7 +316,8 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
status = broadcast_error_message(dev,
state,
"error_detected",
- report_error_detected);
+ report_error_detected,
+ severity);
if ((severity == AER_FATAL) ||
(severity == DPC_FATAL)) {
@@ -318,7 +330,8 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
status = broadcast_error_message(dev,
state,
"mmio_enabled",
- report_mmio_enabled);
+ report_mmio_enabled,
+ severity);
if (status == PCI_ERS_RESULT_NEED_RESET) {
/*
@@ -329,7 +342,8 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
status = broadcast_error_message(dev,
state,
"slot_reset",
- report_slot_reset);
+ report_slot_reset,
+ severity);
}
if (status != PCI_ERS_RESULT_RECOVERED)
@@ -338,7 +352,8 @@ void pci_do_recovery(struct pci_dev *dev, int severity)
broadcast_error_message(dev,
state,
"resume",
- report_resume);
+ report_resume,
+ severity);
dev_info(&dev->dev, "Device recovery successful\n");
mutex_unlock(&pci_err_recovery_lock);
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.,
a Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
On 1/19/2018 6:10 AM, Oza Pawandeep wrote:
> +check_link:
> + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
> + ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
> +
> + if (ret == active) {
> + dev_printk(KERN_DEBUG, &pdev->dev,
> + "%s: lnk_status = %x\n", __func__, lnk_status);
> + return true;
> + }
> +
> + while (timeout > 0) {
> + msleep(10);
> + timeout -= 10;
> + goto check_link;
> + }
this is weird.
I think following is a simpler approach.
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
while ((ret != active) && (timeout > 0)) {
msleep(10);
timeout -= 10;
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
}
--
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
On 1/19/2018 6:10 AM, Oza Pawandeep wrote:
> +++ b/drivers/pci/pcie/pcie-dpc.c
> @@ -150,37 +150,9 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc)
> struct pci_dev *pdev = dpc->dev->port;
> struct device *dev = &dpc->dev->device;
> u16 lnk_status;
> + bool active = false;
>
> - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
> - while (lnk_status & PCI_EXP_LNKSTA_DLLLA &&
> - !time_after(jiffies, timeout)) {
> - msleep(10);
> - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
> - }
> - if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
> - dev_warn(dev, "Link state not disabled for DPC event\n");
> -}
I think you should reorder 6 and 7 to eliminate this.
--
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
On 19.01.2018 12:10, Oza Pawandeep wrote:
> Clients such as pciehp, dpc are using pcie_wait_link_active, which waits
> till the link becomes active or inactive.
>
> Made generic function and moved it to drivers/pci/pci.c
>
> Signed-off-by: Oza Pawandeep <[email protected]>
>
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
> index 7bab060..26afeff 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -245,25 +245,12 @@ bool pciehp_check_link_active(struct controller *ctrl)
> return ret;
> }
>
> -static void __pcie_wait_link_active(struct controller *ctrl, bool active)
> +static bool pcie_wait_link_active(struct controller *ctrl)
> {
> - int timeout = 1000;
> -
> - if (pciehp_check_link_active(ctrl) == active)
> - return;
> - while (timeout > 0) {
> - msleep(10);
> - timeout -= 10;
> - if (pciehp_check_link_active(ctrl) == active)
> - return;
> - }
> - ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
> - active ? "set" : "cleared");
> -}
> + struct pci_dev *pdev = ctrl_dev(ctrl);
> + bool active = true;
>
> -static void pcie_wait_link_active(struct controller *ctrl)
> -{
> - __pcie_wait_link_active(ctrl, true);
> + return pci_wait_for_link(pdev, active);
> }
>
> static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 4a7c686..0de83ea 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -2805,7 +2805,7 @@ static void pci_std_enable_acs(struct pci_dev *dev)
> pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
>
> /* Source Validation */
> - ctrl |= (cap & PCI_ACS_SV);
> +// ctrl |= (cap & PCI_ACS_SV);
Could it be, that you missed to fix / clean something up here?
Thanks,
Stefan
On 2018-01-19 19:49, Stefan Roese wrote:
> On 19.01.2018 12:10, Oza Pawandeep wrote:
>> Clients such as pciehp, dpc are using pcie_wait_link_active, which
>> waits
>> till the link becomes active or inactive.
>>
>> Made generic function and moved it to drivers/pci/pci.c
>>
>> Signed-off-by: Oza Pawandeep <[email protected]>
>>
>> diff --git a/drivers/pci/hotplug/pciehp_hpc.c
>> b/drivers/pci/hotplug/pciehp_hpc.c
>> index 7bab060..26afeff 100644
>> --- a/drivers/pci/hotplug/pciehp_hpc.c
>> +++ b/drivers/pci/hotplug/pciehp_hpc.c
>> @@ -245,25 +245,12 @@ bool pciehp_check_link_active(struct controller
>> *ctrl)
>> return ret;
>> }
>>
>> -static void __pcie_wait_link_active(struct controller *ctrl, bool
>> active)
>> +static bool pcie_wait_link_active(struct controller *ctrl)
>> {
>> - int timeout = 1000;
>> -
>> - if (pciehp_check_link_active(ctrl) == active)
>> - return;
>> - while (timeout > 0) {
>> - msleep(10);
>> - timeout -= 10;
>> - if (pciehp_check_link_active(ctrl) == active)
>> - return;
>> - }
>> - ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
>> - active ? "set" : "cleared");
>> -}
>> + struct pci_dev *pdev = ctrl_dev(ctrl);
>> + bool active = true;
>>
>> -static void pcie_wait_link_active(struct controller *ctrl)
>> -{
>> - __pcie_wait_link_active(ctrl, true);
>> + return pci_wait_for_link(pdev, active);
>> }
>>
>> static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index 4a7c686..0de83ea 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -2805,7 +2805,7 @@ static void pci_std_enable_acs(struct pci_dev
>> *dev)
>> pci_read_config_word(dev, pos + PCI_ACS_CTRL, &ctrl);
>>
>> /* Source Validation */
>> - ctrl |= (cap & PCI_ACS_SV);
>> +// ctrl |= (cap & PCI_ACS_SV);
>
> Could it be, that you missed to fix / clean something up here?
>
> Thanks,
> Stefan
yes, will correct this and re-post.
Regards,
Oza.
On 2018-01-19 19:47, Sinan Kaya wrote:
> On 1/19/2018 6:10 AM, Oza Pawandeep wrote:
>> +++ b/drivers/pci/pcie/pcie-dpc.c
>> @@ -150,37 +150,9 @@ static void dpc_wait_link_inactive(struct dpc_dev
>> *dpc)
>> struct pci_dev *pdev = dpc->dev->port;
>> struct device *dev = &dpc->dev->device;
>> u16 lnk_status;
>> + bool active = false;
>>
>> - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
>> - while (lnk_status & PCI_EXP_LNKSTA_DLLLA &&
>> - !time_after(jiffies, timeout)) {
>> - msleep(10);
>> - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
>> - }
>> - if (lnk_status & PCI_EXP_LNKSTA_DLLLA)
>> - dev_warn(dev, "Link state not disabled for DPC event\n");
>> -}
>
> I think you should reorder 6 and 7 to eliminate this.
Sure will reorder them
Regards,
Oza.