2023-09-20 22:38:21

by Serge Semin

[permalink] [raw]
Subject: [PATCH v4 12/18] EDAC/synopsys: Read full data+ecc pattern on errors

DW uMCTL2 DDRC calculates ECC for the Full DQ-bus word. If non-Full bus
width mode is activated the leftover DQ-bits will be padded with zeros,
but the ECC code is calculated for the whole width anyway [1]. For some
reason the DW uMCTL2 DDRC driver currently doesn't read the whole SDRAM
word in if ECC errors happens even though the 64-bits DQ-bus has been
supported for a long time. Moreover a Full ECC value is also available in
the ECC(C|U)SYN2 register. In a less than 64-bits DQ-bus setups the higher
ECC bits are just unused.

So update the errors handler to reading the entire data+ecc pattern:
extend the data field of the ECC error info structure since it may contain
64-bit data; add a new ECC field there since it's a part of the erroneous
data pattern; read the upper 32-bits part of the data pattern only if an
ECC error happens and the DDR controller has been configured with the
64-bits DQ bus; read the full ECC value from the ECC(C|U)SYN2 register.
The data+ecc couple will be printed as a part of the custom error message
passed then to the edac_mc_handle_error() method.

Note since the full data+ecc info is now always logged into the EDAC core
there is no longer need in the debug print of the Syndrome Registers
content. Drop it then.

[1] DesignWare® Cores Enhanced Universal DDR Memory Controller (uMCTL2)
Databook, Version 3.91a, October 2020, p.424-425

Signed-off-by: Serge Semin <[email protected]>

---

Changelog v4:
- Retrieve ECC too.
---
drivers/edac/synopsys_edac.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index fbf1f8af9788..7376a0fc6394 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -305,6 +305,7 @@ struct snps_ddrc_info {
* @syndrome: Error syndrome.
* @bitpos: Bit position.
* @data: Data causing the error.
+ * @ecc: Data ECC.
*/
struct snps_ecc_error_info {
u32 row;
@@ -313,7 +314,8 @@ struct snps_ecc_error_info {
u32 bankgrp;
u32 syndrome;
u32 bitpos;
- u32 data;
+ u64 data;
+ u32 ecc;
};

/**
@@ -422,10 +424,10 @@ static int snps_get_error_info(struct snps_edac_priv *priv)
p->ceinfo.col = FIELD_GET(ECC_CEADDR1_COL_MASK, regval);

p->ceinfo.data = readl(base + ECC_CSYND0_OFST);
+ if (priv->info.dq_width == SNPS_DQ_64)
+ p->ceinfo.data |= (u64)readl(base + ECC_CSYND1_OFST) << 32;

- edac_dbg(2, "ECCCSYN0: 0x%08X ECCCSYN1: 0x%08X ECCCSYN2: 0x%08X\n",
- readl(base + ECC_CSYND0_OFST), readl(base + ECC_CSYND1_OFST),
- readl(base + ECC_CSYND2_OFST));
+ p->ceinfo.ecc = readl(base + ECC_CSYND2_OFST);

ue_err:
if (!p->ue_cnt)
@@ -440,6 +442,11 @@ static int snps_get_error_info(struct snps_edac_priv *priv)
p->ueinfo.col = FIELD_GET(ECC_CEADDR1_COL_MASK, regval);

p->ueinfo.data = readl(base + ECC_UESYND0_OFST);
+ if (priv->info.dq_width == SNPS_DQ_64)
+ p->ueinfo.data |= (u64)readl(base + ECC_UESYND1_OFST) << 32;
+
+ p->ueinfo.ecc = readl(base + ECC_UESYND2_OFST);
+
out:
spin_lock_irqsave(&priv->reglock, flags);

@@ -469,9 +476,9 @@ static void snps_handle_error(struct mem_ctl_info *mci, struct snps_ecc_status *
pinf = &p->ceinfo;

snprintf(priv->message, SNPS_EDAC_MSG_SIZE,
- "Row %d Col %d Bank %d Bank Group %d Bit %d Data 0x%08x",
+ "Row %d Col %d Bank %d Bank Group %d Bit %d Data 0x%08llx:0x%02x",
pinf->row, pinf->col, pinf->bank, pinf->bankgrp,
- pinf->bitpos, pinf->data);
+ pinf->bitpos, pinf->data, pinf->ecc);

edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
p->ce_cnt, 0, 0, pinf->syndrome, 0, 0, -1,
@@ -482,8 +489,9 @@ static void snps_handle_error(struct mem_ctl_info *mci, struct snps_ecc_status *
pinf = &p->ueinfo;

snprintf(priv->message, SNPS_EDAC_MSG_SIZE,
- "Row %d Col %d Bank %d Bank Group %d",
- pinf->row, pinf->col, pinf->bank, pinf->bankgrp);
+ "Row %d Col %d Bank %d Bank Group %d Data 0x%08llx:0x%02x",
+ pinf->row, pinf->col, pinf->bank, pinf->bankgrp,
+ pinf->data, pinf->ecc);

edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
p->ue_cnt, 0, 0, 0, 0, 0, -1,
--
2.41.0