2020-08-19 14:37:56

by Alex Kluver

[permalink] [raw]
Subject: [PATCH v2 0/2] UEFI v2.8 Memory Error Record Updates

The UEFI Specification v2.8, Table 299, Memory Error Record has
several changes from previous versions. Bits 18 through 21 have been
added to the memory validation bits to include an extended version
of row, an option to print bank address and group separately, and chip id.
These patches implement bits 18 through 21 into the Memory Error Record.

Change reserved field to extended field in cper_sec_mem_err structure
and added the extended field to the cper_mem_err_compact structure.

Print correct versions of row, bank, and chip ID.
---
v1 -> v2:
* Add static inline cper_get_mem_extension to make
it more readable, as suggested by Borislav Petkov

* Add second patch for bank field, bank group, and chip id.
---
Alex Kluver (2):
edac,ghes,cper: Add Row Extension to Memory Error Record
cper,edac,efi: Memory Error Record: bank group/address and chip id

drivers/edac/ghes_edac.c | 17 +++++++++++++++--
drivers/firmware/efi/cper.c | 18 ++++++++++++++++--
include/linux/cper.h | 24 ++++++++++++++++++++++--
3 files changed, 53 insertions(+), 6 deletions(-)

--
2.26.2


2020-08-19 14:38:32

by Alex Kluver

[permalink] [raw]
Subject: [PATCH v2 2/2] cper,edac,efi: Memory Error Record: bank group/address and chip id

Updates to the UEFI 2.8 Memory Error Record allow splitting the bank field
into bank address and bank group, and using the last 3 bits of the extended
field as a chip identifier.

When needed, print correct version of bank field, bank group, and chip
identification

Based on UEFI 2.8 Table 299. Memory Error Record

Reviewed-by: Steve Wahl <[email protected]>
Reviewed-by: Kyle Meyer <[email protected]>
Reviewed-by: Russ Anderson <[email protected]>
Signed-off-by: Alex Kluver <[email protected]>
---

v1 -> v2:
* Add static inline cper_get_mem_extension() to make it
more readable, as suggested by Borislav Petkov.

* Add second patch for bank field, bank group, and chip id.

---
drivers/edac/ghes_edac.c | 9 +++++++++
drivers/firmware/efi/cper.c | 9 +++++++++
include/linux/cper.h | 8 ++++++++
3 files changed, 26 insertions(+)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 98fcdaf72a09..31eb72b67265 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -337,6 +337,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
+ p += sprintf(p, "bank_group:%d ",
+ mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
+ p += sprintf(p, "bank_address:%d ",
+ mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
u32 row = mem_err->row;

@@ -362,6 +368,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
if (index >= 0)
e->top_layer = index;
}
+ if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
+ p += sprintf(p, "chipID: %d ",
+ mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
if (p > e->location)
*(p - 1) = '\0';

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index a60acd17bcaa..e15d484b6a5a 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -232,6 +232,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
if (mem->validation_bits & CPER_MEM_VALID_BANK)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK_GROUP)
+ n += scnprintf(msg + n, len - n, "bank_group: %d ",
+ mem->bank >> CPER_MEM_BANK_GROUP_SHIFT);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
+ n += scnprintf(msg + n, len - n, "bank_address: %d ",
+ mem->bank & CPER_MEM_BANK_ADDRESS_MASK);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
@@ -254,6 +260,9 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
mem->target_id);
+ if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
+ scnprintf(msg + n, len - n, "chip_id: %d ",
+ mem->extended >> CPER_MEM_CHIP_ID_SHIFT);

msg[n] = '\0';
return n;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index bd2d8a77a784..6a511a1078ca 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -231,10 +231,18 @@ enum {
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
#define CPER_MEM_VALID_ROW_EXT 0x40000
+#define CPER_MEM_VALID_BANK_GROUP 0x80000
+#define CPER_MEM_VALID_BANK_ADDRESS 0x100000
+#define CPER_MEM_VALID_CHIP_ID 0x200000

#define CPER_MEM_EXT_ROW_MASK 0x3
#define CPER_MEM_EXT_ROW_SHIFT 16

+#define CPER_MEM_BANK_ADDRESS_MASK 0xff
+#define CPER_MEM_BANK_GROUP_SHIFT 8
+
+#define CPER_MEM_CHIP_ID_SHIFT 5
+
#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
#define CPER_PCIE_VALID_COMMAND_STATUS 0x0004
--
2.26.2

2020-08-19 14:40:17

by Alex Kluver

[permalink] [raw]
Subject: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

Memory errors could be printed with incorrect row values since the DIMM
size has outgrown the 16 bit row field in the CPER structure. UEFI
Specification Version 2.8 has increased the size of row by allowing it to
use the first 2 bits from a previously reserved space within the structure.

When needed, add the extension bits to the row value printed.

Based on UEFI 2.8 Table 299. Memory Error Record

Reviewed-by: Kyle Meyer <[email protected]>
Reviewed-by: Steve Wahl <[email protected]>
Tested-by: Russ Anderson <[email protected]>
Signed-off-by: Alex Kluver <[email protected]>
---

v1 -> v2:
* Add static inline cper_get_mem_extension() to make it
more readable, as suggested by Borislav Petkov.

* Add second patch for bank field, bank group, and chip id.

---
drivers/edac/ghes_edac.c | 8 ++++++--
drivers/firmware/efi/cper.c | 9 +++++++--
include/linux/cper.h | 16 ++++++++++++++--
3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index cb3dab56a875..98fcdaf72a09 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -337,8 +337,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
- if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
- p += sprintf(p, "row:%d ", mem_err->row);
+ if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+ u32 row = mem_err->row;
+
+ row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
+ p += sprintf(p, "row:%d ", row);
+ }
if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
p += sprintf(p, "col:%d ", mem_err->column);
if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index f564e15fbc7e..a60acd17bcaa 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -234,8 +234,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
- if (mem->validation_bits & CPER_MEM_VALID_ROW)
- n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
+ if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+ u32 row = mem->row;
+
+ row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
+ n += scnprintf(msg + n, len - n, "row: %d ", row);
+ }
if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
@@ -292,6 +296,7 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
cmem->requestor_id = mem->requestor_id;
cmem->responder_id = mem->responder_id;
cmem->target_id = mem->target_id;
+ cmem->extended = mem->extended;
cmem->rank = mem->rank;
cmem->mem_array_handle = mem->mem_array_handle;
cmem->mem_dev_handle = mem->mem_dev_handle;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 8537e9282a65..bd2d8a77a784 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -230,6 +230,10 @@ enum {
#define CPER_MEM_VALID_RANK_NUMBER 0x8000
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
+#define CPER_MEM_VALID_ROW_EXT 0x40000
+
+#define CPER_MEM_EXT_ROW_MASK 0x3
+#define CPER_MEM_EXT_ROW_SHIFT 16

#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
@@ -443,7 +447,7 @@ struct cper_sec_mem_err_old {
u8 error_type;
};

-/* Memory Error Section (UEFI >= v2.3), UEFI v2.7 sec N.2.5 */
+/* Memory Error Section (UEFI >= v2.3), UEFI v2.8 sec N.2.5 */
struct cper_sec_mem_err {
u64 validation_bits;
u64 error_status;
@@ -461,7 +465,7 @@ struct cper_sec_mem_err {
u64 responder_id;
u64 target_id;
u8 error_type;
- u8 reserved;
+ u8 extended;
u16 rank;
u16 mem_array_handle; /* "card handle" in UEFI 2.4 */
u16 mem_dev_handle; /* "module handle" in UEFI 2.4 */
@@ -483,8 +487,16 @@ struct cper_mem_err_compact {
u16 rank;
u16 mem_array_handle;
u16 mem_dev_handle;
+ u8 extended;
};

+static inline u32 cper_get_mem_extension(u64 mem_valid, u8 mem_extended)
+{
+ if (!(mem_valid & CPER_MEM_VALID_ROW_EXT))
+ return 0;
+ return (mem_extended & CPER_MEM_EXT_ROW_MASK) << CPER_MEM_EXT_ROW_SHIFT;
+}
+
/* PCI Express Error Section, UEFI v2.7 sec N.2.7 */
struct cper_sec_pcie {
u64 validation_bits;
--
2.26.2

2020-09-14 16:49:17

by Russ Anderson

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] UEFI v2.8 Memory Error Record Updates

On Wed, Aug 19, 2020 at 09:35:42AM -0500, Alex Kluver wrote:
> The UEFI Specification v2.8, Table 299, Memory Error Record has
> several changes from previous versions. Bits 18 through 21 have been
> added to the memory validation bits to include an extended version
> of row, an option to print bank address and group separately, and chip id.
> These patches implement bits 18 through 21 into the Memory Error Record.
>
> Change reserved field to extended field in cper_sec_mem_err structure
> and added the extended field to the cper_mem_err_compact structure.
>
> Print correct versions of row, bank, and chip ID.

Are there any community comment on this patch set?
Questions/comments/concerns?

Thanks.

> ---
> v1 -> v2:
> * Add static inline cper_get_mem_extension to make
> it more readable, as suggested by Borislav Petkov
>
> * Add second patch for bank field, bank group, and chip id.
> ---
> Alex Kluver (2):
> edac,ghes,cper: Add Row Extension to Memory Error Record
> cper,edac,efi: Memory Error Record: bank group/address and chip id
>
> drivers/edac/ghes_edac.c | 17 +++++++++++++++--
> drivers/firmware/efi/cper.c | 18 ++++++++++++++++--
> include/linux/cper.h | 24 ++++++++++++++++++++++--
> 3 files changed, 53 insertions(+), 6 deletions(-)
>
> --
> 2.26.2
>

--
Russ Anderson, SuperDome Flex Linux Kernel Group Manager
HPE - Hewlett Packard Enterprise (formerly SGI) [email protected]

2020-09-15 17:40:42

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Tue, 15 Sep 2020 at 20:07, Ard Biesheuvel <[email protected]> wrote:
>
> On Tue, 15 Sep 2020 at 19:33, Borislav Petkov <[email protected]> wrote:
> >
> > On Wed, Aug 19, 2020 at 09:35:43AM -0500, Alex Kluver wrote:
> > > Memory errors could be printed with incorrect row values since the DIMM
> > > size has outgrown the 16 bit row field in the CPER structure. UEFI
> > > Specification Version 2.8 has increased the size of row by allowing it to
> > > use the first 2 bits from a previously reserved space within the structure.
> > >
> > > When needed, add the extension bits to the row value printed.
> > >
> > > Based on UEFI 2.8 Table 299. Memory Error Record
> > >
> > > Reviewed-by: Kyle Meyer <[email protected]>
> > > Reviewed-by: Steve Wahl <[email protected]>
> > > Tested-by: Russ Anderson <[email protected]>
> > > Signed-off-by: Alex Kluver <[email protected]>
> > > ---
> > >
> > > v1 -> v2:
> > > * Add static inline cper_get_mem_extension() to make it
> > > more readable, as suggested by Borislav Petkov.
> > >
> > > * Add second patch for bank field, bank group, and chip id.
> > >
> > > ---
> > > drivers/edac/ghes_edac.c | 8 ++++++--
> > > drivers/firmware/efi/cper.c | 9 +++++++--
> > > include/linux/cper.h | 16 ++++++++++++++--
> > > 3 files changed, 27 insertions(+), 6 deletions(-)
> >
> > For the EDAC bits:
> >
> > Acked-by: Borislav Petkov <[email protected]>
> >
> > Also, I could take both through the EDAC tree, if people prefer.
> >
>
> I'll take this via the EFI tree - I was just preparing the branch for
> a PR anyways.

Alex - these patches do not apply cleanly. Could you please respin
them on top of the next branch in
https://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git?

Boris - do you anticipate any conflicts? If so, please take these via
the EDAC tree - the CPER code is mostly self contained so I don't
expect any conflicts with the EFI tree in that case.

2020-09-15 17:44:15

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Tue, Sep 15, 2020 at 08:12:31PM +0300, Ard Biesheuvel wrote:
> Boris - do you anticipate any conflicts? If so, please take these via
> the EDAC tree - the CPER code is mostly self contained so I don't
> expect any conflicts with the EFI tree in that case.

None so far, and I applied them for testing ontop of my EDAC queue for
5.10 so it should be all good. But if you want me, I can test-merge your
branch once ready, just in case...

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-09-15 19:55:22

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Tue, 15 Sep 2020 at 19:33, Borislav Petkov <[email protected]> wrote:
>
> On Wed, Aug 19, 2020 at 09:35:43AM -0500, Alex Kluver wrote:
> > Memory errors could be printed with incorrect row values since the DIMM
> > size has outgrown the 16 bit row field in the CPER structure. UEFI
> > Specification Version 2.8 has increased the size of row by allowing it to
> > use the first 2 bits from a previously reserved space within the structure.
> >
> > When needed, add the extension bits to the row value printed.
> >
> > Based on UEFI 2.8 Table 299. Memory Error Record
> >
> > Reviewed-by: Kyle Meyer <[email protected]>
> > Reviewed-by: Steve Wahl <[email protected]>
> > Tested-by: Russ Anderson <[email protected]>
> > Signed-off-by: Alex Kluver <[email protected]>
> > ---
> >
> > v1 -> v2:
> > * Add static inline cper_get_mem_extension() to make it
> > more readable, as suggested by Borislav Petkov.
> >
> > * Add second patch for bank field, bank group, and chip id.
> >
> > ---
> > drivers/edac/ghes_edac.c | 8 ++++++--
> > drivers/firmware/efi/cper.c | 9 +++++++--
> > include/linux/cper.h | 16 ++++++++++++++--
> > 3 files changed, 27 insertions(+), 6 deletions(-)
>
> For the EDAC bits:
>
> Acked-by: Borislav Petkov <[email protected]>
>
> Also, I could take both through the EDAC tree, if people prefer.
>

I'll take this via the EFI tree - I was just preparing the branch for
a PR anyways.

2020-09-15 22:15:38

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] cper,edac,efi: Memory Error Record: bank group/address and chip id

On Wed, Aug 19, 2020 at 09:35:44AM -0500, Alex Kluver wrote:
> Updates to the UEFI 2.8 Memory Error Record allow splitting the bank field
> into bank address and bank group, and using the last 3 bits of the extended
> field as a chip identifier.
>
> When needed, print correct version of bank field, bank group, and chip
> identification
>
> Based on UEFI 2.8 Table 299. Memory Error Record

Whoever commits this - those last two sentences need fullstops.

> Reviewed-by: Steve Wahl <[email protected]>
> Reviewed-by: Kyle Meyer <[email protected]>
> Reviewed-by: Russ Anderson <[email protected]>
> Signed-off-by: Alex Kluver <[email protected]>
> ---
>
> v1 -> v2:
> * Add static inline cper_get_mem_extension() to make it
> more readable, as suggested by Borislav Petkov.
>
> * Add second patch for bank field, bank group, and chip id.
>
> ---
> drivers/edac/ghes_edac.c | 9 +++++++++
> drivers/firmware/efi/cper.c | 9 +++++++++
> include/linux/cper.h | 8 ++++++++
> 3 files changed, 26 insertions(+)

For the EDAC bits:

Acked-by: Borislav Petkov <[email protected]>

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-09-15 22:20:42

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Wed, Aug 19, 2020 at 09:35:43AM -0500, Alex Kluver wrote:
> Memory errors could be printed with incorrect row values since the DIMM
> size has outgrown the 16 bit row field in the CPER structure. UEFI
> Specification Version 2.8 has increased the size of row by allowing it to
> use the first 2 bits from a previously reserved space within the structure.
>
> When needed, add the extension bits to the row value printed.
>
> Based on UEFI 2.8 Table 299. Memory Error Record
>
> Reviewed-by: Kyle Meyer <[email protected]>
> Reviewed-by: Steve Wahl <[email protected]>
> Tested-by: Russ Anderson <[email protected]>
> Signed-off-by: Alex Kluver <[email protected]>
> ---
>
> v1 -> v2:
> * Add static inline cper_get_mem_extension() to make it
> more readable, as suggested by Borislav Petkov.
>
> * Add second patch for bank field, bank group, and chip id.
>
> ---
> drivers/edac/ghes_edac.c | 8 ++++++--
> drivers/firmware/efi/cper.c | 9 +++++++--
> include/linux/cper.h | 16 ++++++++++++++--
> 3 files changed, 27 insertions(+), 6 deletions(-)

For the EDAC bits:

Acked-by: Borislav Petkov <[email protected]>

Also, I could take both through the EDAC tree, if people prefer.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-09-15 23:00:14

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] UEFI v2.8 Memory Error Record Updates

On Wed, 19 Aug 2020 at 17:36, Alex Kluver <[email protected]> wrote:
>
> The UEFI Specification v2.8, Table 299, Memory Error Record has
> several changes from previous versions. Bits 18 through 21 have been
> added to the memory validation bits to include an extended version
> of row, an option to print bank address and group separately, and chip id.
> These patches implement bits 18 through 21 into the Memory Error Record.
>
> Change reserved field to extended field in cper_sec_mem_err structure
> and added the extended field to the cper_mem_err_compact structure.
>
> Print correct versions of row, bank, and chip ID.
> ---
> v1 -> v2:
> * Add static inline cper_get_mem_extension to make
> it more readable, as suggested by Borislav Petkov
>
> * Add second patch for bank field, bank group, and chip id.
> ---
> Alex Kluver (2):
> edac,ghes,cper: Add Row Extension to Memory Error Record
> cper,edac,efi: Memory Error Record: bank group/address and chip id
>
> drivers/edac/ghes_edac.c | 17 +++++++++++++++--
> drivers/firmware/efi/cper.c | 18 ++++++++++++++++--
> include/linux/cper.h | 24 ++++++++++++++++++++++--
> 3 files changed, 53 insertions(+), 6 deletions(-)
>

For the series,

Acked-by: Ard Biesheuvel <[email protected]>

2020-09-16 18:12:23

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Wed, Sep 16, 2020 at 04:09:36PM +0300, Ard Biesheuvel wrote:
> git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next

Looks good and no conflicts, builds fine too.

[boris@zn: ~/kernel/linux> git fetch efi
remote: Enumerating objects: 85, done.
remote: Counting objects: 100% (85/85), done.
remote: Compressing objects: 100% (14/14), done.
remote: Total 131 (delta 71), reused 85 (delta 71), pack-reused 46
Receiving objects: 100% (131/131), 113.14 KiB | 1.69 MiB/s, done.
Resolving deltas: 100% (89/89), completed with 33 local objects.
From git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi
+ 84780c5438ef...744de4180a43 next -> efi/next (forced update)
fb1201aececc..46908326c6b8 urgent -> efi/urgent
* [new tag] efi-next-for-v5.10 -> efi-next-for-v5.10
* [new tag] efi-urgent-for-v5.9-rc5 -> efi-urgent-for-v5.9-rc5
* [new tag] efi-riscv-shared-for-v5.10 -> efi-riscv-shared-for-v5.10
[boris@zn: ~/kernel/linux> git checkout -b test-merge ras/edac-for-next
Branch 'test-merge' set up to track remote branch 'edac-for-next' from 'ras'.
Switched to a new branch 'test-merge'
[boris@zn: ~/kernel/linux> git merge efi/next
Auto-merging drivers/firmware/efi/libstub/efi-stub-helper.c
Auto-merging drivers/firmware/efi/efi.c
Auto-merging drivers/edac/ghes_edac.c
Auto-merging arch/x86/platform/efi/efi.c
Merge made by the 'recursive' strategy.
arch/arm/include/asm/efi.h | 23 +++--
arch/arm64/include/asm/efi.h | 5 +-
arch/x86/kernel/setup.c | 1 +
arch/x86/platform/efi/efi.c | 3 +
drivers/edac/ghes_edac.c | 17 +++-
drivers/firmware/efi/Makefile | 3 +-
drivers/firmware/efi/cper.c | 18 +++-
drivers/firmware/efi/{arm-init.c => efi-init.c} | 1 +
drivers/firmware/efi/efi.c | 6 ++
drivers/firmware/efi/libstub/arm32-stub.c | 178 +++++++---------------------------
drivers/firmware/efi/libstub/arm64-stub.c | 1 -
drivers/firmware/efi/libstub/efi-stub-helper.c | 101 +++++++++++++++++++-
drivers/firmware/efi/libstub/efi-stub.c | 48 +---------
drivers/firmware/efi/libstub/efistub.h | 61 +++++++++++-
drivers/firmware/efi/libstub/file.c | 5 +-
drivers/firmware/efi/libstub/relocate.c | 4 +-
drivers/firmware/efi/libstub/vsprintf.c | 2 +-
drivers/firmware/efi/mokvar-table.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/cper.h | 24 ++++-
include/linux/efi.h | 34 +++++++
include/linux/pe.h | 3 +
security/integrity/platform_certs/load_uefi.c | 85 +++++++++++++----
22 files changed, 746 insertions(+), 237 deletions(-)
rename drivers/firmware/efi/{arm-init.c => efi-init.c} (99%)
create mode 100644 drivers/firmware/efi/mokvar-table.c

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-09-16 18:15:26

by Russ Anderson

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Wed, Sep 16, 2020 at 08:10:30PM +0200, Borislav Petkov wrote:
> On Wed, Sep 16, 2020 at 04:09:36PM +0300, Ard Biesheuvel wrote:
> > git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next
>
> Looks good and no conflicts, builds fine too.

Excellent.
Thanks!

> [boris@zn: ~/kernel/linux> git fetch efi
> remote: Enumerating objects: 85, done.
> remote: Counting objects: 100% (85/85), done.
> remote: Compressing objects: 100% (14/14), done.
> remote: Total 131 (delta 71), reused 85 (delta 71), pack-reused 46
> Receiving objects: 100% (131/131), 113.14 KiB | 1.69 MiB/s, done.
> Resolving deltas: 100% (89/89), completed with 33 local objects.
> From git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi
> + 84780c5438ef...744de4180a43 next -> efi/next (forced update)
> fb1201aececc..46908326c6b8 urgent -> efi/urgent
> * [new tag] efi-next-for-v5.10 -> efi-next-for-v5.10
> * [new tag] efi-urgent-for-v5.9-rc5 -> efi-urgent-for-v5.9-rc5
> * [new tag] efi-riscv-shared-for-v5.10 -> efi-riscv-shared-for-v5.10
> [boris@zn: ~/kernel/linux> git checkout -b test-merge ras/edac-for-next
> Branch 'test-merge' set up to track remote branch 'edac-for-next' from 'ras'.
> Switched to a new branch 'test-merge'
> [boris@zn: ~/kernel/linux> git merge efi/next
> Auto-merging drivers/firmware/efi/libstub/efi-stub-helper.c
> Auto-merging drivers/firmware/efi/efi.c
> Auto-merging drivers/edac/ghes_edac.c
> Auto-merging arch/x86/platform/efi/efi.c
> Merge made by the 'recursive' strategy.
> arch/arm/include/asm/efi.h | 23 +++--
> arch/arm64/include/asm/efi.h | 5 +-
> arch/x86/kernel/setup.c | 1 +
> arch/x86/platform/efi/efi.c | 3 +
> drivers/edac/ghes_edac.c | 17 +++-
> drivers/firmware/efi/Makefile | 3 +-
> drivers/firmware/efi/cper.c | 18 +++-
> drivers/firmware/efi/{arm-init.c => efi-init.c} | 1 +
> drivers/firmware/efi/efi.c | 6 ++
> drivers/firmware/efi/libstub/arm32-stub.c | 178 +++++++---------------------------
> drivers/firmware/efi/libstub/arm64-stub.c | 1 -
> drivers/firmware/efi/libstub/efi-stub-helper.c | 101 +++++++++++++++++++-
> drivers/firmware/efi/libstub/efi-stub.c | 48 +---------
> drivers/firmware/efi/libstub/efistub.h | 61 +++++++++++-
> drivers/firmware/efi/libstub/file.c | 5 +-
> drivers/firmware/efi/libstub/relocate.c | 4 +-
> drivers/firmware/efi/libstub/vsprintf.c | 2 +-
> drivers/firmware/efi/mokvar-table.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/cper.h | 24 ++++-
> include/linux/efi.h | 34 +++++++
> include/linux/pe.h | 3 +
> security/integrity/platform_certs/load_uefi.c | 85 +++++++++++++----
> 22 files changed, 746 insertions(+), 237 deletions(-)
> rename drivers/firmware/efi/{arm-init.c => efi-init.c} (99%)
> create mode 100644 drivers/firmware/efi/mokvar-table.c
>
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette

--
Russ Anderson, SuperDome Flex Linux Kernel Group Manager
HPE - Hewlett Packard Enterprise (formerly SGI) [email protected]

2020-09-16 19:07:21

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] edac,ghes,cper: Add Row Extension to Memory Error Record

On Tue, 15 Sep 2020 at 20:19, Borislav Petkov <[email protected]> wrote:
>
> On Tue, Sep 15, 2020 at 08:12:31PM +0300, Ard Biesheuvel wrote:
> > Boris - do you anticipate any conflicts? If so, please take these via
> > the EDAC tree - the CPER code is mostly self contained so I don't
> > expect any conflicts with the EFI tree in that case.
>
> None so far, and I applied them for testing ontop of my EDAC queue for
> 5.10 so it should be all good. But if you want me, I can test-merge your
> branch once ready, just in case...
>

I managed to apply these patches by using a different base and
cherrypicking them into efi/next

I expect to send out a couple of PRs tomorrow, once the bots have had
a go at building the branches. In the meantime, you can take a look at

git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git next

Subject: [tip: efi/core] cper,edac,efi: Memory Error Record: bank group/address and chip id

The following commit has been merged into the efi/core branch of tip:

Commit-ID: 744de4180a43cb5264c1ba39d98c9eadcb228491
Gitweb: https://git.kernel.org/tip/744de4180a43cb5264c1ba39d98c9eadcb228491
Author: Alex Kluver <[email protected]>
AuthorDate: Wed, 19 Aug 2020 09:35:44 -05:00
Committer: Ard Biesheuvel <[email protected]>
CommitterDate: Wed, 16 Sep 2020 18:53:42 +03:00

cper,edac,efi: Memory Error Record: bank group/address and chip id

Updates to the UEFI 2.8 Memory Error Record allow splitting the bank field
into bank address and bank group, and using the last 3 bits of the extended
field as a chip identifier.

When needed, print correct version of bank field, bank group, and chip
identification.

Based on UEFI 2.8 Table 299. Memory Error Record.

Signed-off-by: Alex Kluver <[email protected]>
Reviewed-by: Russ Anderson <[email protected]>
Reviewed-by: Kyle Meyer <[email protected]>
Reviewed-by: Steve Wahl <[email protected]>
Acked-by: Borislav Petkov <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Ard Biesheuvel <[email protected]>
---
drivers/edac/ghes_edac.c | 9 +++++++++
drivers/firmware/efi/cper.c | 9 +++++++++
include/linux/cper.h | 8 ++++++++
3 files changed, 26 insertions(+)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 741e760..8a44f32 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -372,6 +372,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
+ p += sprintf(p, "bank_group:%d ",
+ mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
+ p += sprintf(p, "bank_address:%d ",
+ mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
u32 row = mem_err->row;

@@ -399,6 +405,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
strcpy(e->label, dimm->label);
}
}
+ if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
+ p += sprintf(p, "chipID: %d ",
+ mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
if (p > e->location)
*(p - 1) = '\0';

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index a60acd1..e15d484 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -232,6 +232,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
if (mem->validation_bits & CPER_MEM_VALID_BANK)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK_GROUP)
+ n += scnprintf(msg + n, len - n, "bank_group: %d ",
+ mem->bank >> CPER_MEM_BANK_GROUP_SHIFT);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
+ n += scnprintf(msg + n, len - n, "bank_address: %d ",
+ mem->bank & CPER_MEM_BANK_ADDRESS_MASK);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
@@ -254,6 +260,9 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
mem->target_id);
+ if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
+ scnprintf(msg + n, len - n, "chip_id: %d ",
+ mem->extended >> CPER_MEM_CHIP_ID_SHIFT);

msg[n] = '\0';
return n;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index bd2d8a7..6a511a1 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -231,10 +231,18 @@ enum {
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
#define CPER_MEM_VALID_ROW_EXT 0x40000
+#define CPER_MEM_VALID_BANK_GROUP 0x80000
+#define CPER_MEM_VALID_BANK_ADDRESS 0x100000
+#define CPER_MEM_VALID_CHIP_ID 0x200000

#define CPER_MEM_EXT_ROW_MASK 0x3
#define CPER_MEM_EXT_ROW_SHIFT 16

+#define CPER_MEM_BANK_ADDRESS_MASK 0xff
+#define CPER_MEM_BANK_GROUP_SHIFT 8
+
+#define CPER_MEM_CHIP_ID_SHIFT 5
+
#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
#define CPER_PCIE_VALID_COMMAND_STATUS 0x0004

Subject: [tip: efi/core] edac,ghes,cper: Add Row Extension to Memory Error Record

The following commit has been merged into the efi/core branch of tip:

Commit-ID: 3c029b01da60906878870a00fbf2a98709f71f3f
Gitweb: https://git.kernel.org/tip/3c029b01da60906878870a00fbf2a98709f71f3f
Author: Alex Kluver <[email protected]>
AuthorDate: Wed, 19 Aug 2020 09:35:43 -05:00
Committer: Ard Biesheuvel <[email protected]>
CommitterDate: Wed, 16 Sep 2020 18:53:42 +03:00

edac,ghes,cper: Add Row Extension to Memory Error Record

Memory errors could be printed with incorrect row values since the DIMM
size has outgrown the 16 bit row field in the CPER structure. UEFI
Specification Version 2.8 has increased the size of row by allowing it to
use the first 2 bits from a previously reserved space within the structure.

When needed, add the extension bits to the row value printed.

Based on UEFI 2.8 Table 299. Memory Error Record

Signed-off-by: Alex Kluver <[email protected]>
Tested-by: Russ Anderson <[email protected]>
Reviewed-by: Steve Wahl <[email protected]>
Reviewed-by: Kyle Meyer <[email protected]>
Acked-by: Borislav Petkov <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Ard Biesheuvel <[email protected]>
---
drivers/edac/ghes_edac.c | 8 ++++++--
drivers/firmware/efi/cper.c | 9 +++++++--
include/linux/cper.h | 16 ++++++++++++++--
3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index da60c29..741e760 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -372,8 +372,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
- if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
- p += sprintf(p, "row:%d ", mem_err->row);
+ if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+ u32 row = mem_err->row;
+
+ row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
+ p += sprintf(p, "row:%d ", row);
+ }
if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
p += sprintf(p, "col:%d ", mem_err->column);
if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index f564e15..a60acd1 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -234,8 +234,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
- if (mem->validation_bits & CPER_MEM_VALID_ROW)
- n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
+ if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+ u32 row = mem->row;
+
+ row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
+ n += scnprintf(msg + n, len - n, "row: %d ", row);
+ }
if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
@@ -292,6 +296,7 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
cmem->requestor_id = mem->requestor_id;
cmem->responder_id = mem->responder_id;
cmem->target_id = mem->target_id;
+ cmem->extended = mem->extended;
cmem->rank = mem->rank;
cmem->mem_array_handle = mem->mem_array_handle;
cmem->mem_dev_handle = mem->mem_dev_handle;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 8537e92..bd2d8a7 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -230,6 +230,10 @@ enum {
#define CPER_MEM_VALID_RANK_NUMBER 0x8000
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
+#define CPER_MEM_VALID_ROW_EXT 0x40000
+
+#define CPER_MEM_EXT_ROW_MASK 0x3
+#define CPER_MEM_EXT_ROW_SHIFT 16

#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
@@ -443,7 +447,7 @@ struct cper_sec_mem_err_old {
u8 error_type;
};

-/* Memory Error Section (UEFI >= v2.3), UEFI v2.7 sec N.2.5 */
+/* Memory Error Section (UEFI >= v2.3), UEFI v2.8 sec N.2.5 */
struct cper_sec_mem_err {
u64 validation_bits;
u64 error_status;
@@ -461,7 +465,7 @@ struct cper_sec_mem_err {
u64 responder_id;
u64 target_id;
u8 error_type;
- u8 reserved;
+ u8 extended;
u16 rank;
u16 mem_array_handle; /* "card handle" in UEFI 2.4 */
u16 mem_dev_handle; /* "module handle" in UEFI 2.4 */
@@ -483,8 +487,16 @@ struct cper_mem_err_compact {
u16 rank;
u16 mem_array_handle;
u16 mem_dev_handle;
+ u8 extended;
};

+static inline u32 cper_get_mem_extension(u64 mem_valid, u8 mem_extended)
+{
+ if (!(mem_valid & CPER_MEM_VALID_ROW_EXT))
+ return 0;
+ return (mem_extended & CPER_MEM_EXT_ROW_MASK) << CPER_MEM_EXT_ROW_SHIFT;
+}
+
/* PCI Express Error Section, UEFI v2.7 sec N.2.7 */
struct cper_sec_pcie {
u64 validation_bits;

Subject: [tip: efi/core] edac,ghes,cper: Add Row Extension to Memory Error Record

The following commit has been merged into the efi/core branch of tip:

Commit-ID: 9baf68cc4544056f33797b78ec09388f54ecc8f0
Gitweb: https://git.kernel.org/tip/9baf68cc4544056f33797b78ec09388f54ecc8f0
Author: Alex Kluver <[email protected]>
AuthorDate: Wed, 19 Aug 2020 09:35:43 -05:00
Committer: Ard Biesheuvel <[email protected]>
CommitterDate: Thu, 17 Sep 2020 10:19:52 +03:00

edac,ghes,cper: Add Row Extension to Memory Error Record

Memory errors could be printed with incorrect row values since the DIMM
size has outgrown the 16 bit row field in the CPER structure. UEFI
Specification Version 2.8 has increased the size of row by allowing it to
use the first 2 bits from a previously reserved space within the structure.

When needed, add the extension bits to the row value printed.

Based on UEFI 2.8 Table 299. Memory Error Record

Signed-off-by: Alex Kluver <[email protected]>
Tested-by: Russ Anderson <[email protected]>
Reviewed-by: Steve Wahl <[email protected]>
Reviewed-by: Kyle Meyer <[email protected]>
Acked-by: Borislav Petkov <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Ard Biesheuvel <[email protected]>
---
drivers/edac/ghes_edac.c | 8 ++++++--
drivers/firmware/efi/cper.c | 9 +++++++--
include/linux/cper.h | 16 ++++++++++++++--
3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index da60c29..741e760 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -372,8 +372,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
- if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
- p += sprintf(p, "row:%d ", mem_err->row);
+ if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+ u32 row = mem_err->row;
+
+ row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
+ p += sprintf(p, "row:%d ", row);
+ }
if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
p += sprintf(p, "col:%d ", mem_err->column);
if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index f564e15..a60acd1 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -234,8 +234,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
- if (mem->validation_bits & CPER_MEM_VALID_ROW)
- n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
+ if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
+ u32 row = mem->row;
+
+ row |= cper_get_mem_extension(mem->validation_bits, mem->extended);
+ n += scnprintf(msg + n, len - n, "row: %d ", row);
+ }
if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
@@ -292,6 +296,7 @@ void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
cmem->requestor_id = mem->requestor_id;
cmem->responder_id = mem->responder_id;
cmem->target_id = mem->target_id;
+ cmem->extended = mem->extended;
cmem->rank = mem->rank;
cmem->mem_array_handle = mem->mem_array_handle;
cmem->mem_dev_handle = mem->mem_dev_handle;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 8537e92..bd2d8a7 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -230,6 +230,10 @@ enum {
#define CPER_MEM_VALID_RANK_NUMBER 0x8000
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
+#define CPER_MEM_VALID_ROW_EXT 0x40000
+
+#define CPER_MEM_EXT_ROW_MASK 0x3
+#define CPER_MEM_EXT_ROW_SHIFT 16

#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
@@ -443,7 +447,7 @@ struct cper_sec_mem_err_old {
u8 error_type;
};

-/* Memory Error Section (UEFI >= v2.3), UEFI v2.7 sec N.2.5 */
+/* Memory Error Section (UEFI >= v2.3), UEFI v2.8 sec N.2.5 */
struct cper_sec_mem_err {
u64 validation_bits;
u64 error_status;
@@ -461,7 +465,7 @@ struct cper_sec_mem_err {
u64 responder_id;
u64 target_id;
u8 error_type;
- u8 reserved;
+ u8 extended;
u16 rank;
u16 mem_array_handle; /* "card handle" in UEFI 2.4 */
u16 mem_dev_handle; /* "module handle" in UEFI 2.4 */
@@ -483,8 +487,16 @@ struct cper_mem_err_compact {
u16 rank;
u16 mem_array_handle;
u16 mem_dev_handle;
+ u8 extended;
};

+static inline u32 cper_get_mem_extension(u64 mem_valid, u8 mem_extended)
+{
+ if (!(mem_valid & CPER_MEM_VALID_ROW_EXT))
+ return 0;
+ return (mem_extended & CPER_MEM_EXT_ROW_MASK) << CPER_MEM_EXT_ROW_SHIFT;
+}
+
/* PCI Express Error Section, UEFI v2.7 sec N.2.7 */
struct cper_sec_pcie {
u64 validation_bits;

Subject: [tip: efi/core] cper,edac,efi: Memory Error Record: bank group/address and chip id

The following commit has been merged into the efi/core branch of tip:

Commit-ID: 612b5d506d066cdf0a739963e7cd28642d500ec1
Gitweb: https://git.kernel.org/tip/612b5d506d066cdf0a739963e7cd28642d500ec1
Author: Alex Kluver <[email protected]>
AuthorDate: Wed, 19 Aug 2020 09:35:44 -05:00
Committer: Ard Biesheuvel <[email protected]>
CommitterDate: Thu, 17 Sep 2020 10:19:52 +03:00

cper,edac,efi: Memory Error Record: bank group/address and chip id

Updates to the UEFI 2.8 Memory Error Record allow splitting the bank field
into bank address and bank group, and using the last 3 bits of the extended
field as a chip identifier.

When needed, print correct version of bank field, bank group, and chip
identification.

Based on UEFI 2.8 Table 299. Memory Error Record.

Signed-off-by: Alex Kluver <[email protected]>
Reviewed-by: Russ Anderson <[email protected]>
Reviewed-by: Kyle Meyer <[email protected]>
Reviewed-by: Steve Wahl <[email protected]>
Acked-by: Borislav Petkov <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Ard Biesheuvel <[email protected]>
---
drivers/edac/ghes_edac.c | 9 +++++++++
drivers/firmware/efi/cper.c | 9 +++++++++
include/linux/cper.h | 8 ++++++++
3 files changed, 26 insertions(+)

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 741e760..8a44f32 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -372,6 +372,12 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
p += sprintf(p, "rank:%d ", mem_err->rank);
if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
p += sprintf(p, "bank:%d ", mem_err->bank);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
+ p += sprintf(p, "bank_group:%d ",
+ mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
+ if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
+ p += sprintf(p, "bank_address:%d ",
+ mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
u32 row = mem_err->row;

@@ -399,6 +405,9 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
strcpy(e->label, dimm->label);
}
}
+ if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
+ p += sprintf(p, "chipID: %d ",
+ mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
if (p > e->location)
*(p - 1) = '\0';

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index a60acd1..e15d484 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -232,6 +232,12 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
if (mem->validation_bits & CPER_MEM_VALID_BANK)
n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK_GROUP)
+ n += scnprintf(msg + n, len - n, "bank_group: %d ",
+ mem->bank >> CPER_MEM_BANK_GROUP_SHIFT);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
+ n += scnprintf(msg + n, len - n, "bank_address: %d ",
+ mem->bank & CPER_MEM_BANK_ADDRESS_MASK);
if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
if (mem->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
@@ -254,6 +260,9 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
mem->target_id);
+ if (mem->validation_bits & CPER_MEM_VALID_CHIP_ID)
+ scnprintf(msg + n, len - n, "chip_id: %d ",
+ mem->extended >> CPER_MEM_CHIP_ID_SHIFT);

msg[n] = '\0';
return n;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index bd2d8a7..6a511a1 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -231,10 +231,18 @@ enum {
#define CPER_MEM_VALID_CARD_HANDLE 0x10000
#define CPER_MEM_VALID_MODULE_HANDLE 0x20000
#define CPER_MEM_VALID_ROW_EXT 0x40000
+#define CPER_MEM_VALID_BANK_GROUP 0x80000
+#define CPER_MEM_VALID_BANK_ADDRESS 0x100000
+#define CPER_MEM_VALID_CHIP_ID 0x200000

#define CPER_MEM_EXT_ROW_MASK 0x3
#define CPER_MEM_EXT_ROW_SHIFT 16

+#define CPER_MEM_BANK_ADDRESS_MASK 0xff
+#define CPER_MEM_BANK_GROUP_SHIFT 8
+
+#define CPER_MEM_CHIP_ID_SHIFT 5
+
#define CPER_PCIE_VALID_PORT_TYPE 0x0001
#define CPER_PCIE_VALID_VERSION 0x0002
#define CPER_PCIE_VALID_COMMAND_STATUS 0x0004