2020-05-19 10:47:48

by Robert Richter

[permalink] [raw]
Subject: [PATCH v3 0/5] EDAC/ghes: Cleanup and reworks

This series contains a general cleanup and rework of the edac ghes
driver:

* Some small code improvements (patches #1, #2).

* Code in functions ghes_edac_{register,unregister}() is move to new
functions ghes_mc_{create,destroy}() and ghes_mc_{add,del}() (patch
#3).

* Separated 'fake' controller code path (patches #4, #5).

Tested on a Marvell/Cavium ThunderX2 Sabre (dual socket) system.

v3:
* Rebased onto dc63e28efa19 ("Merge branch 'edac-i10nm' into
edac-for-next") plus patch "EDAC/ghes: Setup DIMM label from DMI
and use it in error reports" applied from
https://lore.kernel.org/patchwork/patch/1243203/
* Removed v2 patches 01/10 and 02/10 for edac_mc driver from this
series, both are unrelated.
* Dropped v2 patch 04/10 "EDAC/ghes: Make SMBIOS handle private data
to ghes", there is no consent with the maintainer to the code
introduced to get a private ghes_dimm data structure, nor there was
any feasible alternative suggested.
* Taken v2 patch 05/10 "EDAC/ghes: Setup DIMM label from DMI and use
it in error reports" out of this series and submitted separately
(see above patchwork link).
* Dropped v2 patch 06/10, keep rdr_mask variable.
* Fixed subject of v2 patch 07/10 to 'EDAC/ghes: Cleanup struct
dimm_fill'.
* Reworked function interface, there is now
ghes_mc_{create,destroy}() and ghes_mc_{add,del}().
* Aligned arguments on the opening brace (ghes_mc_*()).
* Remove ghes_ prefix from ghes_dimm_* definitions.
* Use sizeof(struct ghes_pvt) in edac_mc_alloc().
* Rename struct ghes_mci to struct ghes_pvt.

v2:
* https://lore.kernel.org/patchwork/cover/1229380/
* reordered patches to have fixes and struct changes first, code
refactoring patches last,
* dropped v1 patches #9 to #11 (multiple conrollers) to handle them
in a separate series,
* rewrote patch to remove smbios_handle (based on v1 #9): EDAC/ghes:
Move smbios_handle from struct dimm_info to ghes private data,
* added lockdep_assert_held() checkers,
* renamed struct ghes_dimm_fill to struct dimm_fill,
* renamed local variable dimms to dimm_list to avoid conflict with
the global variable,
* removed dimm list for "fake" controller,
* fixed return code check to use (rc < 0),
* added: EDAC/mc: Fix usage of snprintf() and dimm location setup

v1:
* https://lore.kernel.org/patchwork/cover/1205901/


Robert Richter (5):
EDAC/ghes: Remove unused members of struct ghes_edac_pvt, rename it to
ghes_pvt
EDAC/ghes: Cleanup struct dimm_fill
EDAC/ghes: Carve out MC device handling into separate functions
EDAC/ghes: Have a separate code path for creating the fake MC
EDAC/ghes: Carve out code into ghes_edac_register_{one,fake}()

drivers/edac/ghes_edac.c | 254 ++++++++++++++++++++++++---------------
1 file changed, 159 insertions(+), 95 deletions(-)

--
2.20.1


2020-05-19 10:47:50

by Robert Richter

[permalink] [raw]
Subject: [PATCH v3 4/5] EDAC/ghes: Have a separate code path for creating the fake MC

The code in ghes_edac_register() switches back and forth between
standard and fake controller creation. Do one thing only and separate
the code path that creates the fake MC.

Note: For better review the code is not yet carved out in separate
functions.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/ghes_edac.c | 73 +++++++++++++++++++++++++---------------
1 file changed, 46 insertions(+), 27 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 8329af037fbe..47b99e0fea6d 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -536,7 +536,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
struct dimm_fill dimm_fill;
int rc = 0, num_dimm = 0;
struct mem_ctl_info *mci;
- bool fake = false;
int idx;

if (IS_ENABLED(CONFIG_X86)) {
@@ -560,24 +559,44 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
/* Get the number of DIMMs */
dmi_walk(ghes_edac_count_dimms, &num_dimm);

- /* Check if we've got a bogus BIOS */
- if (num_dimm == 0) {
- fake = true;
- num_dimm = 1;
- }
+ if (!num_dimm) {
+ /*
+ * Bogus BIOS: Ignore DMI topology and use a single MC
+ * with only one DIMM for the whole address range to
+ * catch all errros.
+ */
+ struct dimm_info *dimm;

- mci = ghes_mc_create(dev, 0, num_dimm);
- if (!mci) {
- pr_info("Can't allocate memory for EDAC data\n");
- rc = -ENOMEM;
- goto unlock;
- }
+ mci = ghes_mc_create(dev, 0, 1);
+ if (!mci) {
+ rc = -ENOMEM;
+ goto unlock;
+ }
+
+ dimm = edac_get_dimm_by_index(mci, 0);
+
+ dimm->nr_pages = 1;
+ dimm->grain = 128;
+ dimm->mtype = MEM_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_SECDED;
+
+ snprintf(dimm->label, sizeof(dimm->label), "unknown memory");
+
+ rc = ghes_mc_add(mci);
+ if (rc) {
+ ghes_mc_destroy(mci);
+ goto unlock;
+ }

- if (fake) {
pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
pr_info("work on such system. Use this driver with caution\n");
- } else if (idx < 0) {
+
+ goto out;
+ }
+
+ if (idx < 0) {
pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
@@ -586,31 +605,31 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
pr_info("This system has %d DIMM sockets.\n", num_dimm);
}

- if (!fake) {
- dimm_fill.index = 0;
- dimm_fill.mci = mci;
- dmi_walk(ghes_edac_dmidecode, &dimm_fill);
- } else {
- struct dimm_info *dimm = edac_get_dimm_by_index(mci, 0);
-
- dimm->nr_pages = 1;
- dimm->grain = 128;
- dimm->mtype = MEM_UNKNOWN;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_SECDED;
+ mci = ghes_mc_create(dev, 0, num_dimm);
+ if (!mci) {
+ rc = -ENOMEM;
+ goto unlock;
}

+ dimm_fill.index = 0;
+ dimm_fill.mci = mci;
+
+ dmi_walk(ghes_edac_dmidecode, &dimm_fill);
+
rc = ghes_mc_add(mci);
if (rc < 0) {
- pr_info("Can't register at EDAC core\n");
ghes_mc_destroy(mci);
goto unlock;
}

+out:
/* only set on success */
refcount_set(&ghes_refcount, 1);

unlock:
+ if (rc < 0)
+ pr_info("Can't register at EDAC core\n");
+
mutex_unlock(&ghes_reg_mutex);

return rc;
--
2.20.1

2020-05-19 10:47:58

by Robert Richter

[permalink] [raw]
Subject: [PATCH v3 2/5] EDAC/ghes: Cleanup struct dimm_fill

The struct is used to store temporary data for the dmidecode callback.
Clean this up a bit:

1) Rename member count to index since this is what it is used for.

2) Move code close to ghes_edac_dmidecode() where it is used.

3) While at it, use edac_get_dimm_by_index().

Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/ghes_edac.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 2ed48a5d48d6..b72fe10b84d4 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -72,11 +72,6 @@ struct memdev_dmi_entry {
u16 conf_mem_clk_speed;
} __attribute__((__packed__));

-struct ghes_edac_dimm_fill {
- struct mem_ctl_info *mci;
- unsigned int count;
-};
-
static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
{
int *num_dimm = arg;
@@ -112,19 +107,24 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
"unknown memory (handle: 0x%.4x)", handle);
}

+struct dimm_fill {
+ struct mem_ctl_info *mci;
+ int index;
+};
+
static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
{
- struct ghes_edac_dimm_fill *dimm_fill = arg;
+ struct dimm_fill *dimm_fill = arg;
struct mem_ctl_info *mci = dimm_fill->mci;

if (dh->type == DMI_ENTRY_MEM_DEVICE) {
struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
- struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count, 0, 0);
+ struct dimm_info *dimm = edac_get_dimm_by_index(mci, dimm_fill->index);
u16 rdr_mask = BIT(7) | BIT(13);

if (entry->size == 0xffff) {
pr_info("Can't get DIMM%i size\n",
- dimm_fill->count);
+ dimm_fill->index);
dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
} else if (entry->size == 0x7fff) {
dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
@@ -196,7 +196,7 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

if (dimm->nr_pages) {
edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
- dimm_fill->count, edac_mem_types[dimm->mtype],
+ dimm_fill->index, edac_mem_types[dimm->mtype],
PAGES_TO_MiB(dimm->nr_pages),
(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
@@ -206,7 +206,7 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)

dimm->smbios_handle = entry->handle;

- dimm_fill->count++;
+ dimm_fill->index++;
}
}

@@ -468,7 +468,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
struct mem_ctl_info *mci;
struct ghes_pvt *pvt;
struct edac_mc_layer layers[1];
- struct ghes_edac_dimm_fill dimm_fill;
+ struct dimm_fill dimm_fill;
unsigned long flags;
int idx = -1;

@@ -535,11 +535,11 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
}

if (!fake) {
- dimm_fill.count = 0;
+ dimm_fill.index = 0;
dimm_fill.mci = mci;
dmi_walk(ghes_edac_dmidecode, &dimm_fill);
} else {
- struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);
+ struct dimm_info *dimm = edac_get_dimm_by_index(mci, 0);

dimm->nr_pages = 1;
dimm->grain = 128;
--
2.20.1

2020-05-19 10:48:30

by Robert Richter

[permalink] [raw]
Subject: [PATCH v3 5/5] EDAC/ghes: Carve out code into ghes_edac_register_{one,fake}()

Factor out code to register a memory controller including DIMMs. Do
this for standard and fake memory controller in the two functions
ghes_edac_register_one() and ghes_edac_register_fake().

Function ghes_edac_register_one() could be reused to register multiple
*mci structs.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/ghes_edac.c | 94 +++++++++++++++++++++++-----------------
1 file changed, 55 insertions(+), 39 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 47b99e0fea6d..b68cd22e68bf 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -531,11 +531,60 @@ static struct mem_ctl_info *ghes_mc_del(void)
return mci;
}

-int ghes_edac_register(struct ghes *ghes, struct device *dev)
+static int ghes_edac_register_fake(struct device *dev)
+{
+ struct mem_ctl_info *mci;
+ struct dimm_info *dimm;
+ int rc;
+
+ mci = ghes_mc_create(dev, 0, 1);
+ if (!mci)
+ return -ENOMEM;
+
+ dimm = edac_get_dimm_by_index(mci, 0);
+
+ dimm->nr_pages = 1;
+ dimm->grain = 128;
+ dimm->mtype = MEM_UNKNOWN;
+ dimm->dtype = DEV_UNKNOWN;
+ dimm->edac_mode = EDAC_SECDED;
+
+ snprintf(dimm->label, sizeof(dimm->label), "unknown memory");
+
+ rc = ghes_mc_add(mci);
+
+ if (rc < 0)
+ ghes_mc_destroy(mci);
+
+ return rc;
+}
+
+static int ghes_edac_register_one(struct device *dev, int mc_idx, int num_dimm)
{
struct dimm_fill dimm_fill;
- int rc = 0, num_dimm = 0;
struct mem_ctl_info *mci;
+ int rc;
+
+ mci = ghes_mc_create(dev, mc_idx, num_dimm);
+ if (!mci)
+ return -ENOMEM;
+
+ dimm_fill.index = 0;
+ dimm_fill.mci = mci;
+
+ dmi_walk(ghes_edac_dmidecode, &dimm_fill);
+
+ rc = ghes_mc_add(mci);
+
+ if (rc < 0)
+ ghes_mc_destroy(mci);
+
+ return rc;
+}
+
+int ghes_edac_register(struct ghes *ghes, struct device *dev)
+{
+ int rc = 0, num_dimm = 0;
int idx;

if (IS_ENABLED(CONFIG_X86)) {
@@ -565,29 +614,9 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
* with only one DIMM for the whole address range to
* catch all errros.
*/
- struct dimm_info *dimm;
-
- mci = ghes_mc_create(dev, 0, 1);
- if (!mci) {
- rc = -ENOMEM;
+ rc = ghes_edac_register_fake(dev);
+ if (rc < 0)
goto unlock;
- }
-
- dimm = edac_get_dimm_by_index(mci, 0);
-
- dimm->nr_pages = 1;
- dimm->grain = 128;
- dimm->mtype = MEM_UNKNOWN;
- dimm->dtype = DEV_UNKNOWN;
- dimm->edac_mode = EDAC_SECDED;
-
- snprintf(dimm->label, sizeof(dimm->label), "unknown memory");
-
- rc = ghes_mc_add(mci);
- if (rc) {
- ghes_mc_destroy(mci);
- goto unlock;
- }

pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
@@ -605,22 +634,9 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
pr_info("This system has %d DIMM sockets.\n", num_dimm);
}

- mci = ghes_mc_create(dev, 0, num_dimm);
- if (!mci) {
- rc = -ENOMEM;
- goto unlock;
- }
-
- dimm_fill.index = 0;
- dimm_fill.mci = mci;
-
- dmi_walk(ghes_edac_dmidecode, &dimm_fill);
-
- rc = ghes_mc_add(mci);
- if (rc < 0) {
- ghes_mc_destroy(mci);
+ rc = ghes_edac_register_one(dev, 0, num_dimm);
+ if (rc < 0)
goto unlock;
- }

out:
/* only set on success */
--
2.20.1

2020-05-19 10:48:39

by Robert Richter

[permalink] [raw]
Subject: [PATCH v3 1/5] EDAC/ghes: Remove unused members of struct ghes_edac_pvt, rename it to ghes_pvt

The struct members list and ghes of struct ghes_edac_pvt are unused,
remove them. On that occasion, rename it to the shorter name struct
ghes_pvt.

Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/ghes_edac.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index c7d404629863..2ed48a5d48d6 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -15,9 +15,7 @@
#include "edac_module.h"
#include <ras/ras_event.h>

-struct ghes_edac_pvt {
- struct list_head list;
- struct ghes *ghes;
+struct ghes_pvt {
struct mem_ctl_info *mci;

/* Buffers for the error handling routine */
@@ -32,7 +30,7 @@ static refcount_t ghes_refcount = REFCOUNT_INIT(0);
* also provides the necessary (implicit) memory barrier for the SMP
* case to make the pointer visible on another CPU.
*/
-static struct ghes_edac_pvt *ghes_pvt;
+static struct ghes_pvt *ghes_pvt;

/* GHES registration mutex */
static DEFINE_MUTEX(ghes_reg_mutex);
@@ -216,7 +214,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
struct edac_raw_error_desc *e;
struct mem_ctl_info *mci;
- struct ghes_edac_pvt *pvt;
+ struct ghes_pvt *pvt;
unsigned long flags;
char *p;

@@ -468,7 +466,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
bool fake = false;
int rc = 0, num_dimm = 0;
struct mem_ctl_info *mci;
- struct ghes_edac_pvt *pvt;
+ struct ghes_pvt *pvt;
struct edac_mc_layer layers[1];
struct ghes_edac_dimm_fill dimm_fill;
unsigned long flags;
@@ -505,7 +503,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
layers[0].size = num_dimm;
layers[0].is_virt_csrow = true;

- mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
+ mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt));
if (!mci) {
pr_info("Can't allocate memory for EDAC data\n");
rc = -ENOMEM;
@@ -513,7 +511,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
}

pvt = mci->pvt_info;
- pvt->ghes = ghes;
pvt->mci = mci;

mci->pdev = dev;
--
2.20.1