2020-12-15 11:09:27

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 2/2] EDAC/amd64: Merge error injection sysfs facilities

From: Borislav Petkov <[email protected]>

Merge them into the main driver and put them inside an EDAC_DEBUG
ifdeffery to simplify the driver and have all debugging/injection stuff
behind a debug build-time switch.

No functional changes.

Signed-off-by: Borislav Petkov <[email protected]>
---
drivers/edac/Kconfig | 7 +-
drivers/edac/Makefile | 6 +-
drivers/edac/amd64_edac.c | 237 +++++++++++++++++++++++++++++++++-
drivers/edac/amd64_edac.h | 8 --
drivers/edac/amd64_edac_inj.c | 235 ---------------------------------
5 files changed, 236 insertions(+), 257 deletions(-)
delete mode 100644 drivers/edac/amd64_edac_inj.c

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 7a47680d6f07..9c2e719cb86a 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -81,10 +81,9 @@ config EDAC_AMD64
Support for error detection and correction of DRAM ECC errors on
the AMD64 families (>= K8) of memory controllers.

-config EDAC_AMD64_ERROR_INJECTION
- bool "Sysfs HW Error injection facilities"
- depends on EDAC_AMD64
- help
+ When EDAC_DEBUG is enabled, hardware error injection facilities
+ through sysfs are available:
+
Recent Opterons (Family 10h and later) provide for Memory Error
Injection into the ECC detection circuits. The amd64_edac module
allows the operator/user to inject Uncorrectable and Correctable
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 195e851652b6..b8133cd32059 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -43,11 +43,7 @@ obj-$(CONFIG_EDAC_IE31200) += ie31200_edac.o
obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
-
-amd64_edac_mod-y := amd64_edac.o
-amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
-
-obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o
+obj-$(CONFIG_EDAC_AMD64) += amd64_edac.o

obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index b793ccd6c6bd..f5de5857a84e 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -601,11 +601,240 @@ static struct attribute *dbg_attrs[] = {
NULL
};

-const struct attribute_group dbg_group = {
+static const struct attribute_group dbg_group = {
.attrs = dbg_attrs,
};
-#endif /* CONFIG_EDAC_DEBUG */

+static ssize_t inject_section_show(struct device *dev,
+ struct device_attribute *mattr, char *buf)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ return sprintf(buf, "0x%x\n", pvt->injection.section);
+}
+
+/*
+ * store error injection section value which refers to one of 4 16-byte sections
+ * within a 64-byte cacheline
+ *
+ * range: 0..3
+ */
+static ssize_t inject_section_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
+
+ if (value > 3) {
+ amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
+ return -EINVAL;
+ }
+
+ pvt->injection.section = (u32) value;
+ return count;
+}
+
+static ssize_t inject_word_show(struct device *dev,
+ struct device_attribute *mattr, char *buf)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ return sprintf(buf, "0x%x\n", pvt->injection.word);
+}
+
+/*
+ * store error injection word value which refers to one of 9 16-bit word of the
+ * 16-byte (128-bit + ECC bits) section
+ *
+ * range: 0..8
+ */
+static ssize_t inject_word_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
+
+ if (value > 8) {
+ amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
+ return -EINVAL;
+ }
+
+ pvt->injection.word = (u32) value;
+ return count;
+}
+
+static ssize_t inject_ecc_vector_show(struct device *dev,
+ struct device_attribute *mattr,
+ char *buf)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ return sprintf(buf, "0x%x\n", pvt->injection.bit_map);
+}
+
+/*
+ * store 16 bit error injection vector which enables injecting errors to the
+ * corresponding bit within the error injection word above. When used during a
+ * DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
+ */
+static ssize_t inject_ecc_vector_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(data, 16, &value);
+ if (ret < 0)
+ return ret;
+
+ if (value & 0xFFFF0000) {
+ amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
+ return -EINVAL;
+ }
+
+ pvt->injection.bit_map = (u32) value;
+ return count;
+}
+
+/*
+ * Do a DRAM ECC read. Assemble staged values in the pvt area, format into
+ * fields needed by the injection registers and read the NB Array Data Port.
+ */
+static ssize_t inject_read_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ unsigned long value;
+ u32 section, word_bits;
+ int ret;
+
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
+
+ /* Form value to choose 16-byte section of cacheline */
+ section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
+
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
+
+ word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
+
+ /* Issue 'word' and 'bit' along with the READ request */
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
+
+ edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
+
+ return count;
+}
+
+/*
+ * Do a DRAM ECC write. Assemble staged values in the pvt area and format into
+ * fields needed by the injection registers.
+ */
+static ssize_t inject_write_store(struct device *dev,
+ struct device_attribute *mattr,
+ const char *data, size_t count)
+{
+ struct mem_ctl_info *mci = to_mci(dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+ u32 section, word_bits, tmp;
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(data, 10, &value);
+ if (ret < 0)
+ return ret;
+
+ /* Form value to choose 16-byte section of cacheline */
+ section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
+
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
+
+ word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
+
+ pr_notice_once("Don't forget to decrease MCE polling interval in\n"
+ "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
+ "so that you can get the error report faster.\n");
+
+ on_each_cpu(disable_caches, NULL, 1);
+
+ /* Issue 'word' and 'bit' along with the READ request */
+ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
+
+ retry:
+ /* wait until injection happens */
+ amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
+ if (tmp & F10_NB_ARR_ECC_WR_REQ) {
+ cpu_relax();
+ goto retry;
+ }
+
+ on_each_cpu(enable_caches, NULL, 1);
+
+ edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
+
+ return count;
+}
+
+/*
+ * update NUM_INJ_ATTRS in case you add new members
+ */
+
+static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
+ inject_section_show, inject_section_store);
+static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR,
+ inject_word_show, inject_word_store);
+static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR,
+ inject_ecc_vector_show, inject_ecc_vector_store);
+static DEVICE_ATTR(inject_write, S_IWUSR,
+ NULL, inject_write_store);
+static DEVICE_ATTR(inject_read, S_IWUSR,
+ NULL, inject_read_store);
+
+static struct attribute *inj_attrs[] = {
+ &dev_attr_inject_section.attr,
+ &dev_attr_inject_word.attr,
+ &dev_attr_inject_ecc_vector.attr,
+ &dev_attr_inject_write.attr,
+ &dev_attr_inject_read.attr,
+ NULL
+};
+
+static umode_t inj_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ if (pvt->fam < 0x10)
+ return 0;
+ return attr->mode;
+}
+
+static const struct attribute_group inj_group = {
+ .attrs = inj_attrs,
+ .is_visible = inj_is_visible,
+};
+#endif /* CONFIG_EDAC_DEBUG */

/*
* Return the DramAddr that the SysAddr given by @sys_addr maps to. It is
@@ -3465,9 +3694,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
static const struct attribute_group *amd64_edac_attr_groups[] = {
#ifdef CONFIG_EDAC_DEBUG
&dbg_group,
-#endif
-#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
- &amd64_edac_inj_group,
+ &inj_group,
#endif
NULL
};
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 7c9f8c0b46d7..85aa820bc165 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -462,14 +462,6 @@ struct ecc_settings {
} flags;
};

-#ifdef CONFIG_EDAC_DEBUG
-extern const struct attribute_group amd64_edac_dbg_group;
-#endif
-
-#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
-extern const struct attribute_group amd64_edac_inj_group;
-#endif
-
/*
* Each of the PCI Device IDs types have their own set of hardware accessor
* functions and per device encoding/decoding logic.
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
deleted file mode 100644
index d96d6116f0fb..000000000000
--- a/drivers/edac/amd64_edac_inj.c
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "amd64_edac.h"
-
-static ssize_t amd64_inject_section_show(struct device *dev,
- struct device_attribute *mattr,
- char *buf)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- return sprintf(buf, "0x%x\n", pvt->injection.section);
-}
-
-/*
- * store error injection section value which refers to one of 4 16-byte sections
- * within a 64-byte cacheline
- *
- * range: 0..3
- */
-static ssize_t amd64_inject_section_store(struct device *dev,
- struct device_attribute *mattr,
- const char *data, size_t count)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- int ret;
-
- ret = kstrtoul(data, 10, &value);
- if (ret < 0)
- return ret;
-
- if (value > 3) {
- amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.section = (u32) value;
- return count;
-}
-
-static ssize_t amd64_inject_word_show(struct device *dev,
- struct device_attribute *mattr,
- char *buf)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- return sprintf(buf, "0x%x\n", pvt->injection.word);
-}
-
-/*
- * store error injection word value which refers to one of 9 16-bit word of the
- * 16-byte (128-bit + ECC bits) section
- *
- * range: 0..8
- */
-static ssize_t amd64_inject_word_store(struct device *dev,
- struct device_attribute *mattr,
- const char *data, size_t count)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- int ret;
-
- ret = kstrtoul(data, 10, &value);
- if (ret < 0)
- return ret;
-
- if (value > 8) {
- amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.word = (u32) value;
- return count;
-}
-
-static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
- struct device_attribute *mattr,
- char *buf)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- return sprintf(buf, "0x%x\n", pvt->injection.bit_map);
-}
-
-/*
- * store 16 bit error injection vector which enables injecting errors to the
- * corresponding bit within the error injection word above. When used during a
- * DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
- */
-static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
- struct device_attribute *mattr,
- const char *data, size_t count)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- int ret;
-
- ret = kstrtoul(data, 16, &value);
- if (ret < 0)
- return ret;
-
- if (value & 0xFFFF0000) {
- amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
- return -EINVAL;
- }
-
- pvt->injection.bit_map = (u32) value;
- return count;
-}
-
-/*
- * Do a DRAM ECC read. Assemble staged values in the pvt area, format into
- * fields needed by the injection registers and read the NB Array Data Port.
- */
-static ssize_t amd64_inject_read_store(struct device *dev,
- struct device_attribute *mattr,
- const char *data, size_t count)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- unsigned long value;
- u32 section, word_bits;
- int ret;
-
- ret = kstrtoul(data, 10, &value);
- if (ret < 0)
- return ret;
-
- /* Form value to choose 16-byte section of cacheline */
- section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
-
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
-
- word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
-
- /* Issue 'word' and 'bit' along with the READ request */
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
-
- edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
-
- return count;
-}
-
-/*
- * Do a DRAM ECC write. Assemble staged values in the pvt area and format into
- * fields needed by the injection registers.
- */
-static ssize_t amd64_inject_write_store(struct device *dev,
- struct device_attribute *mattr,
- const char *data, size_t count)
-{
- struct mem_ctl_info *mci = to_mci(dev);
- struct amd64_pvt *pvt = mci->pvt_info;
- u32 section, word_bits, tmp;
- unsigned long value;
- int ret;
-
- ret = kstrtoul(data, 10, &value);
- if (ret < 0)
- return ret;
-
- /* Form value to choose 16-byte section of cacheline */
- section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
-
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
-
- word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
-
- pr_notice_once("Don't forget to decrease MCE polling interval in\n"
- "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
- "so that you can get the error report faster.\n");
-
- on_each_cpu(disable_caches, NULL, 1);
-
- /* Issue 'word' and 'bit' along with the READ request */
- amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
-
- retry:
- /* wait until injection happens */
- amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
- if (tmp & F10_NB_ARR_ECC_WR_REQ) {
- cpu_relax();
- goto retry;
- }
-
- on_each_cpu(enable_caches, NULL, 1);
-
- edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
-
- return count;
-}
-
-/*
- * update NUM_INJ_ATTRS in case you add new members
- */
-
-static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
- amd64_inject_section_show, amd64_inject_section_store);
-static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR,
- amd64_inject_word_show, amd64_inject_word_store);
-static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR,
- amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store);
-static DEVICE_ATTR(inject_write, S_IWUSR,
- NULL, amd64_inject_write_store);
-static DEVICE_ATTR(inject_read, S_IWUSR,
- NULL, amd64_inject_read_store);
-
-static struct attribute *amd64_edac_inj_attrs[] = {
- &dev_attr_inject_section.attr,
- &dev_attr_inject_word.attr,
- &dev_attr_inject_ecc_vector.attr,
- &dev_attr_inject_write.attr,
- &dev_attr_inject_read.attr,
- NULL
-};
-
-static umode_t amd64_edac_inj_is_visible(struct kobject *kobj,
- struct attribute *attr, int idx)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
- struct amd64_pvt *pvt = mci->pvt_info;
-
- if (pvt->fam < 0x10)
- return 0;
- return attr->mode;
-}
-
-const struct attribute_group amd64_edac_inj_group = {
- .attrs = amd64_edac_inj_attrs,
- .is_visible = amd64_edac_inj_is_visible,
-};
--
2.29.2


2020-12-15 16:15:04

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH 2/2] EDAC/amd64: Merge error injection sysfs facilities

On Tue, Dec 15, 2020 at 12:05:17PM +0100, Borislav Petkov wrote:
> From: Borislav Petkov <[email protected]>
>
> Merge them into the main driver and put them inside an EDAC_DEBUG
> ifdeffery to simplify the driver and have all debugging/injection stuff
> behind a debug build-time switch.
>
> No functional changes.
>
> Signed-off-by: Borislav Petkov <[email protected]>
> ---
> drivers/edac/Kconfig | 7 +-
> drivers/edac/Makefile | 6 +-
> drivers/edac/amd64_edac.c | 237 +++++++++++++++++++++++++++++++++-
> drivers/edac/amd64_edac.h | 8 --
> drivers/edac/amd64_edac_inj.c | 235 ---------------------------------
> 5 files changed, 236 insertions(+), 257 deletions(-)
> delete mode 100644 drivers/edac/amd64_edac_inj.c
>
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 7a47680d6f07..9c2e719cb86a 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -81,10 +81,9 @@ config EDAC_AMD64
> Support for error detection and correction of DRAM ECC errors on
> the AMD64 families (>= K8) of memory controllers.
>
> -config EDAC_AMD64_ERROR_INJECTION
> - bool "Sysfs HW Error injection facilities"
> - depends on EDAC_AMD64
> - help
> + When EDAC_DEBUG is enabled, hardware error injection facilities
> + through sysfs are available:
> +
> Recent Opterons (Family 10h and later) provide for Memory Error

Can we say "Opterons (Family 10h to Family 15h)"? It may also apply to
Family 16h, but I don't know if they were branded as Opterons.

The injection code in this module doesn't apply to Family 17h and later.

Also, Family 17h and later doesn't allow the OS direct access to the error
injection registers. They're locked down by security policy, etc.

> Injection into the ECC detection circuits. The amd64_edac module
> allows the operator/user to inject Uncorrectable and Correctable

...

> +
> +static umode_t inj_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
> +{
> + struct device *dev = kobj_to_dev(kobj);
> + struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
> + struct amd64_pvt *pvt = mci->pvt_info;
> +
> + if (pvt->fam < 0x10)

Related to the comment above, can this be changed to the following?

if (pvt->fam < 0x10 || pvt->fam >= 0x17)

> + return 0;
> + return attr->mode;
> +}
> +

Everything else looks good to me.

Reviewed-by: Yazen Ghannam <[email protected]>

Thanks,
Yazen

2020-12-15 19:02:15

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/2] EDAC/amd64: Merge error injection sysfs facilities

On Tue, Dec 15, 2020 at 10:11:20AM -0600, Yazen Ghannam wrote:
> Can we say "Opterons (Family 10h to Family 15h)"? It may also apply to
> Family 16h, but I don't know if they were branded as Opterons.
>
> The injection code in this module doesn't apply to Family 17h and later.
>
> Also, Family 17h and later doesn't allow the OS direct access to the error
> injection registers. They're locked down by security policy, etc.

Yeah, figured as much after I started getting all 0s while poking at
them with setpci...

Ok, I'll fix that ontop - this patch should be only code movement and
trivial cleanups, functionality changes ontop.

> Related to the comment above, can this be changed to the following?
>
> if (pvt->fam < 0x10 || pvt->fam >= 0x17)

Right.

> Everything else looks good to me.
>
> Reviewed-by: Yazen Ghannam <[email protected]>

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2020-12-22 18:03:19

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/2] EDAC/amd64: Merge error injection sysfs facilities

On Tue, Dec 15, 2020 at 10:11:20AM -0600, Yazen Ghannam wrote:
> Related to the comment above, can this be changed to the following?
>
> if (pvt->fam < 0x10 || pvt->fam >= 0x17)

I made that a "positive" list so that it is explicit which do support
it out of the box:

---
From: Borislav Petkov <[email protected]>
Date: Tue, 22 Dec 2020 18:55:06 +0100
Subject: [PATCH] EDAC/amd64: Limit error injection functionality to supported hw

Families up to and including 0x16 allow access to the injection
hardware. Starting with family 0x17, access to those registers is
blocked by security policy.

Limit that only on the families which support it.

Suggested-by: Yazen Ghannam <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
---
drivers/edac/Kconfig | 8 ++++----
drivers/edac/amd64_edac.c | 8 +++++---
2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 47953b06d6c8..27d0c4cdc58d 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -84,10 +84,10 @@ config EDAC_AMD64
When EDAC_DEBUG is enabled, hardware error injection facilities
through sysfs are available:

- Recent Opterons (Family 10h and later) provide for Memory Error
- Injection into the ECC detection circuits. The amd64_edac module
- allows the operator/user to inject Uncorrectable and Correctable
- errors into DRAM.
+ AMD CPUs up to and excluding family 0x17 provide for Memory
+ Error Injection into the ECC detection circuits. The amd64_edac
+ module allows the operator/user to inject Uncorrectable and
+ Correctable errors into DRAM.

When enabled, in each of the respective memory controller directories
(/sys/devices/system/edac/mc/mcX), there are 3 input files:
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index d55f8ef2240c..9868f95a5622 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -828,9 +828,11 @@ static umode_t inj_is_visible(struct kobject *kobj, struct attribute *attr, int
struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
struct amd64_pvt *pvt = mci->pvt_info;

- if (pvt->fam < 0x10)
- return 0;
- return attr->mode;
+ /* Families which have that injection hw */
+ if (pvt->fam >= 0x10 && pvt->fam <= 0x16)
+ return attr->mode;
+
+ return 0;
}

static const struct attribute_group inj_group = {
--
2.29.2

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette