From: Ira Weiny <[email protected]>
The CDAT read may fail for a number of reasons but mainly it is possible
to get different parts of a valid state. The checksum in the CDAT table
protects against this.
Now that the cdat data is validated issue a retries if the CDAT read
fails. For now 5 retries are implemented.
Cc: Alison Schofield <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Signed-off-by: Ira Weiny <[email protected]>
---
Changes from V9
Alison Schofield/Davidlohr Bueso
Print debug on each iteration and error only after failure
Changes from V8
Move code to cxl/core/pci.c
Changes from V6
Move to pci.c
Fix retries count
Change to 5 retries
Changes from V5:
New patch -- easy to push off or drop.
---
drivers/cxl/core/pci.c | 34 +++++++++++++++++++++++-----------
1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 73e28b82ffcf..e68f13e66fcf 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -631,20 +631,18 @@ static int cxl_cdat_read_table(struct cxl_port *port,
return rc;
}
-void read_cdat_data(struct cxl_port *port)
+static int __read_cdat_data(struct cxl_port *port)
{
struct device *dev = &port->dev;
size_t cdat_length;
int ret;
if (cxl_cdat_get_length(port, &cdat_length))
- return;
+ return 0;
port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
- if (!port->cdat.table) {
- ret = -ENOMEM;
- goto error;
- }
+ if (!port->cdat.table)
+ return -ENOMEM;
port->cdat.length = cdat_length;
ret = cxl_cdat_read_table(port, &port->cdat);
@@ -652,12 +650,26 @@ void read_cdat_data(struct cxl_port *port)
devm_kfree(dev, port->cdat.table);
port->cdat.table = NULL;
port->cdat.length = 0;
- ret = -EIO;
- goto error;
+ return -EIO;
}
- return;
-error:
- dev_err(dev, "CDAT data read error (%d)\n", ret);
+ return 0;
+}
+
+void read_cdat_data(struct cxl_port *port)
+{
+ int retries = 5;
+ int rc;
+
+ while (retries--) {
+ rc = __read_cdat_data(port);
+ if (!rc)
+ return;
+ dev_dbg(&port->dev,
+ "CDAT data read error rc=%d (retries %d)\n",
+ rc, retries);
+ }
+ dev_err(&port->dev, "CDAT data read failed after %d retries\n",
+ retries);
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
--
2.35.1
On 22-06-04 17:50:48, [email protected] wrote:
> From: Ira Weiny <[email protected]>
>
> The CDAT read may fail for a number of reasons but mainly it is possible
> to get different parts of a valid state. The checksum in the CDAT table
> protects against this.
>
> Now that the cdat data is validated issue a retries if the CDAT read
s/validated issue a retries/validated, issue a retry/
> fails. For now 5 retries are implemented.
>
> Cc: Alison Schofield <[email protected]>
> Cc: Davidlohr Bueso <[email protected]>
> Signed-off-by: Ira Weiny <[email protected]>
>
> ---
> Changes from V9
> Alison Schofield/Davidlohr Bueso
> Print debug on each iteration and error only after failure
>
> Changes from V8
> Move code to cxl/core/pci.c
>
> Changes from V6
> Move to pci.c
> Fix retries count
> Change to 5 retries
>
> Changes from V5:
> New patch -- easy to push off or drop.
> ---
> drivers/cxl/core/pci.c | 34 +++++++++++++++++++++++-----------
> 1 file changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 73e28b82ffcf..e68f13e66fcf 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -631,20 +631,18 @@ static int cxl_cdat_read_table(struct cxl_port *port,
> return rc;
> }
>
> -void read_cdat_data(struct cxl_port *port)
> +static int __read_cdat_data(struct cxl_port *port)
> {
> struct device *dev = &port->dev;
> size_t cdat_length;
> int ret;
>
> if (cxl_cdat_get_length(port, &cdat_length))
> - return;
> + return 0;
>
> port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
> - if (!port->cdat.table) {
> - ret = -ENOMEM;
> - goto error;
> - }
> + if (!port->cdat.table)
> + return -ENOMEM;
>
> port->cdat.length = cdat_length;
> ret = cxl_cdat_read_table(port, &port->cdat);
> @@ -652,12 +650,26 @@ void read_cdat_data(struct cxl_port *port)
> devm_kfree(dev, port->cdat.table);
> port->cdat.table = NULL;
> port->cdat.length = 0;
> - ret = -EIO;
> - goto error;
> + return -EIO;
> }
>
> - return;
> -error:
> - dev_err(dev, "CDAT data read error (%d)\n", ret);
> + return 0;
> +}
> +
> +void read_cdat_data(struct cxl_port *port)
> +{
> + int retries = 5;
> + int rc;
> +
> + while (retries--) {
> + rc = __read_cdat_data(port);
> + if (!rc)
> + return;
> + dev_dbg(&port->dev,
> + "CDAT data read error rc=%d (retries %d)\n",
> + rc, retries);
Out of curiousity, what is the purpose of the dev_dbg? To diagnose delays or
something?
> + }
> + dev_err(&port->dev, "CDAT data read failed after %d retries\n",
> + retries);
Reviewed-by: Ben Widawsky <[email protected]>
> }
> EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> --
> 2.35.1
>
On Mon, Jun 06, 2022 at 11:52:03AM -0700, Ben Widawsky wrote:
> On 22-06-04 17:50:48, [email protected] wrote:
> > From: Ira Weiny <[email protected]>
> >
> > The CDAT read may fail for a number of reasons but mainly it is possible
> > to get different parts of a valid state. The checksum in the CDAT table
> > protects against this.
> >
> > Now that the cdat data is validated issue a retries if the CDAT read
> s/validated issue a retries/validated, issue a retry/
Thanks!
> > fails. For now 5 retries are implemented.
> >
> > Cc: Alison Schofield <[email protected]>
> > Cc: Davidlohr Bueso <[email protected]>
> > Signed-off-by: Ira Weiny <[email protected]>
> >
> > ---
> > Changes from V9
> > Alison Schofield/Davidlohr Bueso
> > Print debug on each iteration and error only after failure
> >
> > Changes from V8
> > Move code to cxl/core/pci.c
> >
> > Changes from V6
> > Move to pci.c
> > Fix retries count
> > Change to 5 retries
> >
> > Changes from V5:
> > New patch -- easy to push off or drop.
> > ---
> > drivers/cxl/core/pci.c | 34 +++++++++++++++++++++++-----------
> > 1 file changed, 23 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> > index 73e28b82ffcf..e68f13e66fcf 100644
> > --- a/drivers/cxl/core/pci.c
> > +++ b/drivers/cxl/core/pci.c
> > @@ -631,20 +631,18 @@ static int cxl_cdat_read_table(struct cxl_port *port,
> > return rc;
> > }
> >
> > -void read_cdat_data(struct cxl_port *port)
> > +static int __read_cdat_data(struct cxl_port *port)
> > {
> > struct device *dev = &port->dev;
> > size_t cdat_length;
> > int ret;
> >
> > if (cxl_cdat_get_length(port, &cdat_length))
> > - return;
> > + return 0;
> >
> > port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
> > - if (!port->cdat.table) {
> > - ret = -ENOMEM;
> > - goto error;
> > - }
> > + if (!port->cdat.table)
> > + return -ENOMEM;
> >
> > port->cdat.length = cdat_length;
> > ret = cxl_cdat_read_table(port, &port->cdat);
> > @@ -652,12 +650,26 @@ void read_cdat_data(struct cxl_port *port)
> > devm_kfree(dev, port->cdat.table);
> > port->cdat.table = NULL;
> > port->cdat.length = 0;
> > - ret = -EIO;
> > - goto error;
> > + return -EIO;
> > }
> >
> > - return;
> > -error:
> > - dev_err(dev, "CDAT data read error (%d)\n", ret);
> > + return 0;
> > +}
> > +
> > +void read_cdat_data(struct cxl_port *port)
> > +{
> > + int retries = 5;
> > + int rc;
> > +
> > + while (retries--) {
> > + rc = __read_cdat_data(port);
> > + if (!rc)
> > + return;
> > + dev_dbg(&port->dev,
> > + "CDAT data read error rc=%d (retries %d)\n",
> > + rc, retries);
>
> Out of curiousity, what is the purpose of the dev_dbg? To diagnose delays or
> something?
Yes
>
> > + }
> > + dev_err(&port->dev, "CDAT data read failed after %d retries\n",
> > + retries);
>
>
> Reviewed-by: Ben Widawsky <[email protected]>
Thanks!
Ira
>
> > }
> > EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> > --
> > 2.35.1
> >