This patch set is part two of a rework of the ghes_edac and edac_mc
driver. It addresses issues found during code review and while working
with the code. Part one has been included to v5.5, see:
https://lore.kernel.org/patchwork/cover/1093488/
The changes of this series include:
* add helper functions and factor out code (#1, #2, #5)
* improve function interfaces and data structures to decrease
complexity such as number of function arguments, unused data, etc.
(#3, #4, #7, #8, #9, #10),
* minor functional fixes (#6)
* improve code readability (#9)
Changes compared to part one:
* rebased onto 5781823fd0d3 ("EDAC/altera: Use the Altera System
Manager driver")
* reworded patch subjects
* reordered patches
* collected Mauro's Reviewed-by-tags (note: I kept them though there
has been small conflicts but dropped it when reworked)
* dropped: "EDAC/mc: Rework edac_raw_mc_handle_error() to use struct
dimm_info"
* split "EDAC/mc: Remove per layer counters" into smaller changes
* added:
"EDAC/mc: Report "unknown memory" on too many DIMM labels found"
"EDAC/mc: Remove enable_per_layer_report function arguments"
"EDAC/mc: Pass the error descriptor to error reporting functions"
"EDAC/mc: Remove detail[] string and cleanup error string
generation"
* moved to the end:
"EDAC/mc: Remove per layer counters"
Robert Richter (10):
EDAC/mc: Split edac_mc_alloc() into smaller functions
EDAC/mc: Reorder functions edac_mc_alloc*()
EDAC: Store error type in struct edac_raw_error_desc
EDAC/mc: Determine mci pointer from the error descriptor
EDAC/mc: Create new function edac_inc_csrow()
EDAC/mc: Report "unknown memory" on too many DIMM labels found
EDAC/mc: Remove enable_per_layer_report function arguments
EDAC/mc: Pass the error descriptor to error reporting functions
EDAC/mc: Remove detail[] string and cleanup error string generation
EDAC/mc: Remove per layer counters
drivers/edac/edac_mc.c | 496 ++++++++++++++++-------------------
drivers/edac/edac_mc.h | 6 +-
drivers/edac/edac_mc_sysfs.c | 20 +-
drivers/edac/ghes_edac.c | 16 +-
include/linux/edac.h | 8 +-
5 files changed, 248 insertions(+), 298 deletions(-)
--
2.20.1
Looking at how mci->{ue,ce}_per_layer[EDAC_MAX_LAYERS] is used, it
turns out that only the leaves in the memory hierarchy are consumed
(in sysfs), but not the intermediate layers, e.g.:
count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
These unused counters only add complexity, remove them. The error
counter values are directly stored in struct dimm_info now.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/edac_mc.c | 65 +++++++++---------------------------
drivers/edac/edac_mc_sysfs.c | 20 +++++------
include/linux/edac.h | 4 ++-
3 files changed, 26 insertions(+), 63 deletions(-)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 5ea834fceb50..3d43b9fe8171 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -445,11 +445,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
{
struct mem_ctl_info *mci;
struct edac_mc_layer *layer;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
- unsigned int idx, size, tot_dimms = 1, count = 1;
- unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+ unsigned int idx, size, tot_dimms = 1;
+ unsigned int tot_csrows = 1, tot_channels = 1;
void *pvt, *ptr = NULL;
- int i;
bool per_rank = false;
if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
@@ -476,19 +474,10 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
* stringent as what the compiler would provide if we could simply
* hardcode everything into a single struct.
*/
- mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
- layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
- for (i = 0; i < n_layers; i++) {
- count *= layers[i].size;
- edac_dbg(4, "errcount layer %d size %d\n", i, count);
- ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
- tot_errcount += 2 * count;
- }
-
- edac_dbg(4, "allocating %d error counters\n", tot_errcount);
- pvt = edac_align_ptr(&ptr, sz_pvt, 1);
- size = ((unsigned long)pvt) + sz_pvt;
+ mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
+ layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
+ pvt = edac_align_ptr(&ptr, sz_pvt, 1);
+ size = ((unsigned long)pvt) + sz_pvt;
edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
size,
@@ -504,10 +493,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
* rather than an imaginary chunk of memory located at address 0.
*/
layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
- for (i = 0; i < n_layers; i++) {
- mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
- mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
- }
pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
/* setup index and various internal pointers */
@@ -949,48 +934,28 @@ static void edac_inc_ce_error(struct edac_raw_error_desc *e)
{
struct mem_ctl_info *mci = error_desc_to_mci(e);
int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
- int i, index = 0;
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
mci->ce_mc += e->error_count;
- if (pos[0] < 0) {
+ if (dimm)
+ dimm->ce_count += e->error_count;
+ else
mci->ce_noinfo_count += e->error_count;
- return;
- }
-
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ce_per_layer[i][index] += e->error_count;
-
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
}
static void edac_inc_ue_error(struct edac_raw_error_desc *e)
{
struct mem_ctl_info *mci = error_desc_to_mci(e);
int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
- int i, index = 0;
+ struct dimm_info *dimm = edac_get_dimm(mci, pos[0], pos[1], pos[2]);
mci->ue_mc += e->error_count;
- if (pos[0] < 0) {
+ if (dimm)
+ dimm->ue_count += e->error_count;
+ else
mci->ue_noinfo_count += e->error_count;
- return;
- }
-
- for (i = 0; i < mci->n_layers; i++) {
- if (pos[i] < 0)
- break;
- index += pos[i];
- mci->ue_per_layer[i][index] += e->error_count;
-
- if (i < mci->n_layers - 1)
- index *= mci->layers[i + 1].size;
- }
}
static void edac_ce_error(struct edac_raw_error_desc *e)
@@ -1137,7 +1102,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
/*
* Check if the event report is consistent and if the memory
* location is known. If it is known, the DIMM(s) label info
- * will be filled and the per-layer error counters will be
+ * will be filled and the DIMM's error counters will be
* incremented.
*/
for (i = 0; i < mci->n_layers; i++) {
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 0367554e7437..8682df2f7f4f 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -556,10 +556,8 @@ static ssize_t dimmdev_ce_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ce_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ce_count);
}
static ssize_t dimmdev_ue_count_show(struct device *dev,
@@ -567,10 +565,8 @@ static ssize_t dimmdev_ue_count_show(struct device *dev,
char *data)
{
struct dimm_info *dimm = to_dimm(dev);
- u32 count;
- count = dimm->mci->ue_per_layer[dimm->mci->n_layers-1][dimm->idx];
- return sprintf(data, "%u\n", count);
+ return sprintf(data, "%u\n", dimm->ue_count);
}
/* dimm/rank attribute files */
@@ -666,7 +662,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
- int cnt, row, chan, i;
+ struct dimm_info *dimm;
+ int row, chan;
+
mci->ue_mc = 0;
mci->ce_mc = 0;
mci->ue_noinfo_count = 0;
@@ -682,11 +680,9 @@ static ssize_t mci_reset_counters_store(struct device *dev,
ri->channels[chan]->ce_count = 0;
}
- cnt = 1;
- for (i = 0; i < mci->n_layers; i++) {
- cnt *= mci->layers[i].size;
- memset(mci->ce_per_layer[i], 0, cnt * sizeof(u32));
- memset(mci->ue_per_layer[i], 0, cnt * sizeof(u32));
+ mci_for_each_dimm(mci, dimm) {
+ dimm->ue_count = 0;
+ dimm->ce_count = 0;
}
mci->start_time = jiffies;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index d441026eeb1c..76e2c099463b 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -383,6 +383,9 @@ struct dimm_info {
unsigned int csrow, cschannel; /* Points to the old API data */
u16 smbios_handle; /* Handle for SMBIOS type 17 */
+
+ u32 ce_count;
+ u32 ue_count;
};
/**
@@ -558,7 +561,6 @@ struct mem_ctl_info {
*/
u32 ce_noinfo_count, ue_noinfo_count;
u32 ue_mc, ce_mc;
- u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
struct completion complete;
--
2.20.1
Have a separate function to count errors in csrow/channel. This better
separates code and reduces the indentation level. No functional
changes.
Signed-off-by: Robert Richter <[email protected]>
Reviewed-by: Mauro Carvalho Chehab <[email protected]>
---
drivers/edac/edac_mc.c | 40 +++++++++++++++++++++++++---------------
1 file changed, 25 insertions(+), 15 deletions(-)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index aff0630c02fc..e81d33960a0c 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1089,6 +1089,26 @@ static void edac_ue_error(struct mem_ctl_info *mci,
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
}
+static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
+{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ u16 count = e->error_count;
+ enum hw_event_mc_err_type type = e->type;
+
+ if (row < 0)
+ return;
+
+ edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
+
+ if (type == HW_EVENT_ERR_CORRECTED) {
+ mci->csrows[row]->ce_count += count;
+ if (chan >= 0)
+ mci->csrows[row]->channels[chan]->ce_count += count;
+ } else {
+ mci->csrows[row]->ue_count += count;
+ }
+}
+
void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
{
struct mem_ctl_info *mci = error_desc_to_mci(e);
@@ -1256,22 +1276,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
chan = -2;
}
- if (!e->enable_per_layer_report) {
+ if (!e->enable_per_layer_report)
strcpy(e->label, "any memory");
- } else {
- edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
- if (p == e->label)
- strcpy(e->label, "unknown memory");
- if (type == HW_EVENT_ERR_CORRECTED) {
- if (row >= 0) {
- mci->csrows[row]->ce_count += error_count;
- if (chan >= 0)
- mci->csrows[row]->channels[chan]->ce_count += error_count;
- }
- } else
- if (row >= 0)
- mci->csrows[row]->ue_count += error_count;
- }
+ else if (!*e->label)
+ strcpy(e->label, "unknown memory");
+
+ edac_inc_csrow(e, row, chan);
/* Fill the RAM location data */
p = e->location;
--
2.20.1
Most arguments of error reporting functions are already stored in
struct edac_raw_error_desc error descriptor. Pass the error descriptor
to the functions and reduce the functions' arg list.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/edac_mc.c | 100 +++++++++++++++++------------------------
1 file changed, 42 insertions(+), 58 deletions(-)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 38a9a68eebfe..545d25c8654e 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -945,16 +945,16 @@ const char *edac_layer_name[] = {
};
EXPORT_SYMBOL_GPL(edac_layer_name);
-static void edac_inc_ce_error(struct mem_ctl_info *mci,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ce_error(struct edac_raw_error_desc *e)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
int i, index = 0;
- mci->ce_mc += count;
+ mci->ce_mc += e->error_count;
if (pos[0] < 0) {
- mci->ce_noinfo_count += count;
+ mci->ce_noinfo_count += e->error_count;
return;
}
@@ -962,23 +962,23 @@ static void edac_inc_ce_error(struct mem_ctl_info *mci,
if (pos[i] < 0)
break;
index += pos[i];
- mci->ce_per_layer[i][index] += count;
+ mci->ce_per_layer[i][index] += e->error_count;
if (i < mci->n_layers - 1)
index *= mci->layers[i + 1].size;
}
}
-static void edac_inc_ue_error(struct mem_ctl_info *mci,
- const int pos[EDAC_MAX_LAYERS],
- const u16 count)
+static void edac_inc_ue_error(struct edac_raw_error_desc *e)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
+ int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
int i, index = 0;
- mci->ue_mc += count;
+ mci->ue_mc += e->error_count;
if (pos[0] < 0) {
- mci->ue_noinfo_count += count;
+ mci->ue_noinfo_count += e->error_count;
return;
}
@@ -986,44 +986,37 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
if (pos[i] < 0)
break;
index += pos[i];
- mci->ue_per_layer[i][index] += count;
+ mci->ue_per_layer[i][index] += e->error_count;
if (i < mci->n_layers - 1)
index *= mci->layers[i + 1].size;
}
}
-static void edac_ce_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail,
- const unsigned long page_frame_number,
- const unsigned long offset_in_page,
- long grain)
+static void edac_ce_error(struct edac_raw_error_desc *e,
+ const char *detail)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
unsigned long remapped_page;
char *msg_aux = "";
- if (*msg)
+ if (*e->msg)
msg_aux = " ";
if (edac_mc_get_log_ce()) {
- if (other_detail && *other_detail)
+ if (e->other_detail && *e->other_detail)
edac_mc_printk(mci, KERN_WARNING,
"%d CE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
+ e->error_count, e->msg, msg_aux, e->label,
+ e->location, detail, e->other_detail);
else
edac_mc_printk(mci, KERN_WARNING,
"%d CE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ e->error_count, e->msg, msg_aux, e->label,
+ e->location, detail);
}
- edac_inc_ce_error(mci, pos, error_count);
+
+ edac_inc_ce_error(e);
if (mci->scrub_mode == SCRUB_SW_SRC) {
/*
@@ -1038,51 +1031,46 @@ static void edac_ce_error(struct mem_ctl_info *mci,
* be scrubbed.
*/
remapped_page = mci->ctl_page_to_phys ?
- mci->ctl_page_to_phys(mci, page_frame_number) :
- page_frame_number;
+ mci->ctl_page_to_phys(mci, e->page_frame_number) :
+ e->page_frame_number;
- edac_mc_scrub_block(remapped_page,
- offset_in_page, grain);
+ edac_mc_scrub_block(remapped_page, e->offset_in_page, e->grain);
}
}
-static void edac_ue_error(struct mem_ctl_info *mci,
- const u16 error_count,
- const int pos[EDAC_MAX_LAYERS],
- const char *msg,
- const char *location,
- const char *label,
- const char *detail,
- const char *other_detail)
+static void edac_ue_error(struct edac_raw_error_desc *e,
+ const char *detail)
{
+ struct mem_ctl_info *mci = error_desc_to_mci(e);
char *msg_aux = "";
- if (*msg)
+ if (*e->msg)
msg_aux = " ";
if (edac_mc_get_log_ue()) {
- if (other_detail && *other_detail)
+ if (e->other_detail && *e->other_detail)
edac_mc_printk(mci, KERN_WARNING,
"%d UE %s%son %s (%s %s - %s)\n",
- error_count, msg, msg_aux, label,
- location, detail, other_detail);
+ e->error_count, e->msg, msg_aux, e->label,
+ e->location, detail, e->other_detail);
else
edac_mc_printk(mci, KERN_WARNING,
"%d UE %s%son %s (%s %s)\n",
- error_count, msg, msg_aux, label,
- location, detail);
+ e->error_count, e->msg, msg_aux, e->label,
+ e->location, detail);
}
if (edac_mc_get_panic_on_ue()) {
- if (other_detail && *other_detail)
+ if (e->other_detail && *e->other_detail)
panic("UE %s%son %s (%s%s - %s)\n",
- msg, msg_aux, label, location, detail, other_detail);
+ e->msg, msg_aux, e->label, e->location, detail,
+ e->other_detail);
else
panic("UE %s%son %s (%s%s)\n",
- msg, msg_aux, label, location, detail);
+ e->msg, msg_aux, e->label, e->location, detail);
}
- edac_inc_ue_error(mci, pos, error_count);
+ edac_inc_ue_error(e);
}
static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
@@ -1109,7 +1097,6 @@ void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
{
struct mem_ctl_info *mci = error_desc_to_mci(e);
char detail[80];
- int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
u8 grain_bits;
/* Sanity-check driver-supplied grain value. */
@@ -1132,16 +1119,13 @@ void edac_raw_mc_handle_error(struct edac_raw_error_desc *e)
"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
e->page_frame_number, e->offset_in_page,
e->grain, e->syndrome);
- edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
- e->label, detail, e->other_detail,
- e->page_frame_number, e->offset_in_page, e->grain);
+ edac_ce_error(e, detail);
} else {
snprintf(detail, sizeof(detail),
"page:0x%lx offset:0x%lx grain:%ld",
e->page_frame_number, e->offset_in_page, e->grain);
- edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
- e->label, detail, e->other_detail);
+ edac_ue_error(e, detail);
}
--
2.20.1
There is a limitation to report only EDAC_MAX_LABELS in e->label of
the error descriptor. This is to prevent a possible string overflow.
Current implementation falls back to "any memory" in this case and
also stops all further processing to find a unique row and channel of
the possible error location. Reporting "any memory" is wrong as the
memory controller reported an error location for one of the layers.
Instead, report "unknown memory" and also do not break early in the
loop to further check row and channel for uniqueness.
Signed-off-by: Robert Richter <[email protected]>
---
drivers/edac/edac_mc.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index e81d33960a0c..2b12320ce2f1 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1243,20 +1243,21 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
* channel/memory controller/... may be affected.
* Also, don't show errors for empty DIMM slots.
*/
- if (!e->enable_per_layer_report || !dimm->nr_pages)
+ if (!dimm->nr_pages)
continue;
- if (n_labels >= EDAC_MAX_LABELS) {
- e->enable_per_layer_report = false;
- break;
- }
n_labels++;
- if (p != e->label) {
- strcpy(p, OTHER_LABEL);
- p += strlen(OTHER_LABEL);
+ if (n_labels > EDAC_MAX_LABELS) {
+ p = e->label;
+ *p = '\0';
+ } else {
+ if (p != e->label) {
+ strcpy(p, OTHER_LABEL);
+ p += strlen(OTHER_LABEL);
+ }
+ strcpy(p, dimm->label);
+ p += strlen(p);
}
- strcpy(p, dimm->label);
- p += strlen(p);
/*
* get csrow/channel of the DIMM, in order to allow
--
2.20.1
edac_mc_alloc() is huge. Factor out code by moving it to the two new
functions edac_mc_alloc_csrows() and edac_mc_alloc_dimms(). Do not
move code yet for better review.
Signed-off-by: Robert Richter <[email protected]>
Reviewed-by: Mauro Carvalho Chehab <[email protected]>
---
drivers/edac/edac_mc.c | 105 +++++++++++++++++++++++++++--------------
1 file changed, 70 insertions(+), 35 deletions(-)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 7243b88f81d8..9068287604dd 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -305,6 +305,9 @@ static void _edac_mc_free(struct mem_ctl_info *mci)
kfree(mci);
}
+static int edac_mc_alloc_csrows(struct mem_ctl_info *mci);
+static int edac_mc_alloc_dimms(struct mem_ctl_info *mci);
+
struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
unsigned int n_layers,
struct edac_mc_layer *layers,
@@ -312,15 +315,11 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
{
struct mem_ctl_info *mci;
struct edac_mc_layer *layer;
- struct csrow_info *csr;
- struct rank_info *chan;
- struct dimm_info *dimm;
u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
- unsigned int pos[EDAC_MAX_LAYERS];
unsigned int idx, size, tot_dimms = 1, count = 1;
unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
- void *pvt, *p, *ptr = NULL;
- int i, j, row, chn, n, len;
+ void *pvt, *ptr = NULL;
+ int i;
bool per_rank = false;
if (WARN_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0))
@@ -392,16 +391,43 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
mci->num_cschannel = tot_channels;
mci->csbased = per_rank;
+ if (edac_mc_alloc_csrows(mci))
+ goto error;
+
+ if (edac_mc_alloc_dimms(mci))
+ goto error;
+
+ mci->op_state = OP_ALLOC;
+
+ return mci;
+
+error:
+ _edac_mc_free(mci);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(edac_mc_alloc);
+
+static int edac_mc_alloc_csrows(struct mem_ctl_info *mci)
+{
+ unsigned int tot_csrows = mci->nr_csrows;
+ unsigned int tot_channels = mci->num_cschannel;
+ unsigned int row, chn;
+
/*
* Alocate and fill the csrow/channels structs
*/
mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
if (!mci->csrows)
- goto error;
+ return -ENOMEM;
+
for (row = 0; row < tot_csrows; row++) {
+ struct csrow_info *csr;
+
csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
if (!csr)
- goto error;
+ return -ENOMEM;
+
mci->csrows[row] = csr;
csr->csrow_idx = row;
csr->mci = mci;
@@ -409,34 +435,51 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
GFP_KERNEL);
if (!csr->channels)
- goto error;
+ return -ENOMEM;
for (chn = 0; chn < tot_channels; chn++) {
+ struct rank_info *chan;
+
chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
if (!chan)
- goto error;
+ return -ENOMEM;
+
csr->channels[chn] = chan;
chan->chan_idx = chn;
chan->csrow = csr;
}
}
+ return 0;
+}
+
+static int edac_mc_alloc_dimms(struct mem_ctl_info *mci)
+{
+ void *p;
+ unsigned int pos[EDAC_MAX_LAYERS];
+ unsigned int row, chn, idx;
+ int layer;
+
/*
* Allocate and fill the dimm structs
*/
- mci->dimms = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
+ mci->dimms = kcalloc(mci->tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
if (!mci->dimms)
- goto error;
+ return -ENOMEM;
memset(&pos, 0, sizeof(pos));
row = 0;
chn = 0;
- for (idx = 0; idx < tot_dimms; idx++) {
+ for (idx = 0; idx < mci->tot_dimms; idx++) {
+ struct dimm_info *dimm;
+ struct rank_info *chan;
+ int n, len;
+
chan = mci->csrows[row]->channels[chn];
dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
if (!dimm)
- goto error;
+ return -ENOMEM;
mci->dimms[idx] = dimm;
dimm->mci = mci;
dimm->idx = idx;
@@ -446,16 +489,16 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
*/
len = sizeof(dimm->label);
p = dimm->label;
- n = snprintf(p, len, "mc#%u", mc_num);
+ n = snprintf(p, len, "mc#%u", mci->mc_idx);
p += n;
len -= n;
- for (j = 0; j < n_layers; j++) {
+ for (layer = 0; layer < mci->n_layers; layer++) {
n = snprintf(p, len, "%s#%u",
- edac_layer_name[layers[j].type],
- pos[j]);
+ edac_layer_name[mci->layers[layer].type],
+ pos[layer]);
p += n;
len -= n;
- dimm->location[j] = pos[j];
+ dimm->location[layer] = pos[layer];
if (len <= 0)
break;
@@ -467,39 +510,31 @@ struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
dimm->cschannel = chn;
/* Increment csrow location */
- if (layers[0].is_virt_csrow) {
+ if (mci->layers[0].is_virt_csrow) {
chn++;
- if (chn == tot_channels) {
+ if (chn == mci->num_cschannel) {
chn = 0;
row++;
}
} else {
row++;
- if (row == tot_csrows) {
+ if (row == mci->nr_csrows) {
row = 0;
chn++;
}
}
/* Increment dimm location */
- for (j = n_layers - 1; j >= 0; j--) {
- pos[j]++;
- if (pos[j] < layers[j].size)
+ for (layer = mci->n_layers - 1; layer >= 0; layer--) {
+ pos[layer]++;
+ if (pos[layer] < mci->layers[layer].size)
break;
- pos[j] = 0;
+ pos[layer] = 0;
}
}
- mci->op_state = OP_ALLOC;
-
- return mci;
-
-error:
- _edac_mc_free(mci);
-
- return NULL;
+ return 0;
}
-EXPORT_SYMBOL_GPL(edac_mc_alloc);
void edac_mc_free(struct mem_ctl_info *mci)
{
--
2.20.1