2013-04-15 15:19:26

by Aravind Gopalakrishnan

[permalink] [raw]
Subject: [PATCH] edac: Handle EDAC ECC errors for Family 16h

Add code to handle ECC decoding for fam16h. Support exists for
previous families already, so code has been reused werever applicable
and some code has been added to handle fam16h specific operations.

The patch was tested on Fam16h with ECC turned on
using the mce_amd_inj facility and works fine.

Signed-off-by: Aravind Gopalakrishnan <[email protected]>
---
arch/x86/kernel/amd_nb.c | 4 +-
drivers/edac/amd64_edac.c | 96 +++++++++++++++++++++++++++++++++++++++++++--
drivers/edac/amd64_edac.h | 12 +++++-
include/linux/pci_ids.h | 2 +
4 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa..9df1034 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -16,12 +16,14 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{}
};
EXPORT_SYMBOL(amd_nb_misc_ids);

static struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{}
};

@@ -79,7 +81,7 @@ int amd_cache_northbridges(void)

/* some CPU families (e.g. family 0x11) do not support GART */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- boot_cpu_data.x86 == 0x15)
+ boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
amd_northbridges.flags |= AMD_NB_GART;

/*
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9a8bebc..f43b608 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -98,6 +98,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
*
* F15h: we select which DCT we access using F1x10C[DctCfgSel]
*
+ * F16h has only 1 DCT
*/
static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
const char *func)
@@ -133,6 +134,15 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
}

+static int f16_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
+ const char *func)
+{
+ if (addr >= 0x100)
+ return -EINVAL;
+
+ return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
+}
+
/*
* Memory scrubber control interface. For K8, memory scrubbing is handled by
* hardware and can involve L2 cache, dcache as well as the main memory. With
@@ -320,13 +330,48 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
u64 csbase, csmask, base_bits, mask_bits;
u8 addr_shift;

+ /* variables specific to F16h */
+ u64 base_bits_f16_low, base_bits_f16_high;
+ u64 mask_bits_f16_low, mask_bits_f16_high;
+ u8 addr_shift_f16_low, addr_shift_f16_high;
+
if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
csbase = pvt->csels[dct].csbases[csrow];
csmask = pvt->csels[dct].csmasks[csrow];
base_bits = GENMASK(21, 31) | GENMASK(9, 15);
mask_bits = GENMASK(21, 29) | GENMASK(9, 15);
addr_shift = 4;
- } else {
+ }
+
+ /*
+ * there are two addr_shift values for F16h.
+ * addr_shift of 8 for high bits and addr_shift of 6
+ * for low bits. So handle it differently.
+ */
+ else if (boot_cpu_data.x86 == 0x16) {
+ csbase = pvt->csels[dct].csbases[csrow];
+ csmask = pvt->csels[dct].csmasks[csrow >> 1];
+ base_bits_f16_low = mask_bits_f16_low = GENMASK(5 , 15);
+ base_bits_f16_high = mask_bits_f16_high = GENMASK(19 , 30);
+ addr_shift_f16_low = 6;
+ addr_shift_f16_high = 8;
+ *base = (((csbase & base_bits_f16_high)
+ << addr_shift_f16_high) |
+ ((csbase & base_bits_f16_low)
+ << addr_shift_f16_low));
+ *mask = ~0ULL;
+ *mask &= ~((mask_bits_f16_high
+ << addr_shift_f16_high) |
+ (mask_bits_f16_low
+ << addr_shift_f16_low));
+ *mask |= (((csmask & mask_bits_f16_high)
+ << addr_shift_f16_high) |
+ ((csmask & mask_bits_f16_low)
+ << addr_shift_f16_low));
+ return;
+ }
+
+ else {
csbase = pvt->csels[dct].csbases[csrow];
csmask = pvt->csels[dct].csmasks[csrow >> 1];
addr_shift = 8;
@@ -1224,6 +1269,23 @@ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
return ddr3_cs_size(cs_mode, false);
}

+/*
+ * F16h supports 64 bit DCT interfaces
+ * and has only limited cs_modes
+ */
+static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
+ unsigned cs_mode)
+{
+ WARN_ON(cs_mode > 12);
+
+ if (cs_mode == 0 || cs_mode == 3 || cs_mode == 4
+ || cs_mode == 6 || cs_mode == 8 || cs_mode == 9
+ || cs_mode == 12)
+ return -1;
+ else
+ return ddr3_cs_size(cs_mode, false);
+}
+
static void read_dram_ctl_register(struct amd64_pvt *pvt)
{

@@ -1681,6 +1743,17 @@ static struct amd64_family_type amd64_family_types[] = {
.read_dct_pci_cfg = f15_read_dct_pci_cfg,
}
},
+ [F16_CPUS] = {
+ .ctl_name = "F16h",
+ .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
+ .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
+ .ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .read_dct_pci_cfg = f16_read_dct_pci_cfg,
+ }
+ },
};

static struct pci_dev *pci_get_related_function(unsigned int vendor,
@@ -2071,8 +2144,12 @@ static void read_mc_regs(struct amd64_pvt *pvt)
pvt->ecc_sym_sz = 4;

if (c->x86 >= 0x10) {
- amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
- amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
+
+ /* F16h has only one DCT, so don't take the branch if f16h */
+ if (c->x86 != 0x16) {
+ amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
+ amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
+ }

/* F10h, revD and later can do x8 ECC too */
if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25))
@@ -2483,6 +2560,11 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
pvt->ops = &amd64_family_types[F15_CPUS].ops;
break;

+ case 0x16:
+ fam_type = &amd64_family_types[F16_CPUS];
+ pvt->ops = &amd64_family_types[F16_CPUS].ops;
+ break;
+
default:
amd64_err("Unsupported family!\n");
return NULL;
@@ -2695,6 +2777,14 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
.class = 0,
.class_mask = 0,
},
+ {
+ .vendor = PCI_VENDOR_ID_AMD,
+ .device = PCI_DEVICE_ID_AMD_16H_NB_F2,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .class = 0,
+ .class_mask = 0,
+ },

{0, }
};
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 9a666cb..36e8c6b 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -36,6 +36,10 @@
* Changes/Fixes by Borislav Petkov <[email protected]>:
* - misc fixes and code cleanups
*
+ * Changes by Aravind Gopalakrishnan <[email protected]>
+ * - family 16h support added. New PCI Device IDs and some code
+ * to perform fam 16h specific ops.
+ *
* This module is based on the following documents
* (available from http://www.amd.com/):
*
@@ -172,7 +176,12 @@
*/
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
-
+#define PCI_DEVICE_ID_AMD_16H_NB_F0 0x1530
+#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
+#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
+#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
+#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
+#define PCI_DEVICE_ID_AMD_16H_NB_F5 0x1535

/*
* Function 1 - Address Map
@@ -300,6 +309,7 @@ enum amd_families {
K8_CPUS = 0,
F10_CPUS,
F15_CPUS,
+ F16_CPUS,
NUM_FAMILIES,
};

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1679ff6..59f732f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -519,6 +519,8 @@
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603
#define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604
+#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
+#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
--
1.7.10.4


2013-04-15 15:56:32

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH] edac: Handle EDAC ECC errors for Family 16h

On Mon, Apr 15, 2013 at 9:17 AM, Aravind Gopalakrishnan
<[email protected]> wrote:
> Add code to handle ECC decoding for fam16h. Support exists for
> previous families already, so code has been reused werever applicable
> and some code has been added to handle fam16h specific operations.
>
> The patch was tested on Fam16h with ECC turned on
> using the mce_amd_inj facility and works fine.
>
> Signed-off-by: Aravind Gopalakrishnan <[email protected]>
> ---
> arch/x86/kernel/amd_nb.c | 4 +-
> drivers/edac/amd64_edac.c | 96 +++++++++++++++++++++++++++++++++++++++++++--
> drivers/edac/amd64_edac.h | 12 +++++-
> include/linux/pci_ids.h | 2 +
> 4 files changed, 109 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
> index 4c39baa..9df1034 100644
> --- a/arch/x86/kernel/amd_nb.c
> +++ b/arch/x86/kernel/amd_nb.c
> @@ -16,12 +16,14 @@ const struct pci_device_id amd_nb_misc_ids[] = {
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
> + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
> {}
> };
> EXPORT_SYMBOL(amd_nb_misc_ids);
>
> static struct pci_device_id amd_nb_link_ids[] = {
> { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
> + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
> {}
> };
>
> @@ -79,7 +81,7 @@ int amd_cache_northbridges(void)
>
> /* some CPU families (e.g. family 0x11) do not support GART */
> if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
> - boot_cpu_data.x86 == 0x15)
> + boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
> amd_northbridges.flags |= AMD_NB_GART;
>
> /*
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 9a8bebc..f43b608 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -98,6 +98,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
> *
> * F15h: we select which DCT we access using F1x10C[DctCfgSel]
> *
> + * F16h has only 1 DCT
> */
> static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
> const char *func)
> @@ -133,6 +134,15 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
> return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
> }
>
> +static int f16_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
> + const char *func)
> +{
> + if (addr >= 0x100)
> + return -EINVAL;
> +
> + return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
> +}
> +
> /*
> * Memory scrubber control interface. For K8, memory scrubbing is handled by
> * hardware and can involve L2 cache, dcache as well as the main memory. With
> @@ -320,13 +330,48 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
> u64 csbase, csmask, base_bits, mask_bits;
> u8 addr_shift;
>
> + /* variables specific to F16h */
> + u64 base_bits_f16_low, base_bits_f16_high;
> + u64 mask_bits_f16_low, mask_bits_f16_high;
> + u8 addr_shift_f16_low, addr_shift_f16_high;
> +
> if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) {
> csbase = pvt->csels[dct].csbases[csrow];
> csmask = pvt->csels[dct].csmasks[csrow];
> base_bits = GENMASK(21, 31) | GENMASK(9, 15);
> mask_bits = GENMASK(21, 29) | GENMASK(9, 15);
> addr_shift = 4;
> - } else {
> + }
> +
> + /*
> + * there are two addr_shift values for F16h.
> + * addr_shift of 8 for high bits and addr_shift of 6
> + * for low bits. So handle it differently.
> + */
> + else if (boot_cpu_data.x86 == 0x16) {
> + csbase = pvt->csels[dct].csbases[csrow];
> + csmask = pvt->csels[dct].csmasks[csrow >> 1];
> + base_bits_f16_low = mask_bits_f16_low = GENMASK(5 , 15);
> + base_bits_f16_high = mask_bits_f16_high = GENMASK(19 , 30);
> + addr_shift_f16_low = 6;
> + addr_shift_f16_high = 8;
> + *base = (((csbase & base_bits_f16_high)
> + << addr_shift_f16_high) |
> + ((csbase & base_bits_f16_low)
> + << addr_shift_f16_low));
> + *mask = ~0ULL;
> + *mask &= ~((mask_bits_f16_high
> + << addr_shift_f16_high) |
> + (mask_bits_f16_low
> + << addr_shift_f16_low));
> + *mask |= (((csmask & mask_bits_f16_high)
> + << addr_shift_f16_high) |
> + ((csmask & mask_bits_f16_low)
> + << addr_shift_f16_low));
> + return;
> + }
> +
> + else {
> csbase = pvt->csels[dct].csbases[csrow];
> csmask = pvt->csels[dct].csmasks[csrow >> 1];
> addr_shift = 8;
> @@ -1224,6 +1269,23 @@ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
> return ddr3_cs_size(cs_mode, false);
> }
>
> +/*
> + * F16h supports 64 bit DCT interfaces
> + * and has only limited cs_modes
> + */
> +static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
> + unsigned cs_mode)
> +{
> + WARN_ON(cs_mode > 12);
> +
> + if (cs_mode == 0 || cs_mode == 3 || cs_mode == 4
> + || cs_mode == 6 || cs_mode == 8 || cs_mode == 9
> + || cs_mode == 12)
> + return -1;
> + else
> + return ddr3_cs_size(cs_mode, false);
> +}
> +
> static void read_dram_ctl_register(struct amd64_pvt *pvt)
> {
>
> @@ -1681,6 +1743,17 @@ static struct amd64_family_type amd64_family_types[] = {
> .read_dct_pci_cfg = f15_read_dct_pci_cfg,
> }
> },
> + [F16_CPUS] = {
> + .ctl_name = "F16h",
> + .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
> + .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
> + .ops = {
> + .early_channel_count = f1x_early_channel_count,
> + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
> + .dbam_to_cs = f16_dbam_to_chip_select,
> + .read_dct_pci_cfg = f16_read_dct_pci_cfg,
> + }
> + },
> };
>
> static struct pci_dev *pci_get_related_function(unsigned int vendor,
> @@ -2071,8 +2144,12 @@ static void read_mc_regs(struct amd64_pvt *pvt)
> pvt->ecc_sym_sz = 4;
>
> if (c->x86 >= 0x10) {
> - amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
> - amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
> +
> + /* F16h has only one DCT, so don't take the branch if f16h */
> + if (c->x86 != 0x16) {
> + amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
> + amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1);
> + }
>
> /* F10h, revD and later can do x8 ECC too */
> if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25))
> @@ -2483,6 +2560,11 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
> pvt->ops = &amd64_family_types[F15_CPUS].ops;
> break;
>
> + case 0x16:
> + fam_type = &amd64_family_types[F16_CPUS];
> + pvt->ops = &amd64_family_types[F16_CPUS].ops;
> + break;
> +
> default:
> amd64_err("Unsupported family!\n");
> return NULL;
> @@ -2695,6 +2777,14 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
> .class = 0,
> .class_mask = 0,
> },
> + {
> + .vendor = PCI_VENDOR_ID_AMD,
> + .device = PCI_DEVICE_ID_AMD_16H_NB_F2,
> + .subvendor = PCI_ANY_ID,
> + .subdevice = PCI_ANY_ID,
> + .class = 0,
> + .class_mask = 0,
> + },
>
> {0, }
> };
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index 9a666cb..36e8c6b 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -36,6 +36,10 @@
> * Changes/Fixes by Borislav Petkov <[email protected]>:
> * - misc fixes and code cleanups
> *
> + * Changes by Aravind Gopalakrishnan <[email protected]>
> + * - family 16h support added. New PCI Device IDs and some code
> + * to perform fam 16h specific ops.
> + *
> * This module is based on the following documents
> * (available from http://www.amd.com/):
> *
> @@ -172,7 +176,12 @@
> */
> #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
> #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
> -
> +#define PCI_DEVICE_ID_AMD_16H_NB_F0 0x1530
> +#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
> +#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
> +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
> +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
> +#define PCI_DEVICE_ID_AMD_16H_NB_F5 0x1535
>
> /*
> * Function 1 - Address Map
> @@ -300,6 +309,7 @@ enum amd_families {
> K8_CPUS = 0,
> F10_CPUS,
> F15_CPUS,
> + F16_CPUS,
> NUM_FAMILIES,
> };
>
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index 1679ff6..59f732f 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -519,6 +519,8 @@
> #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
> #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603
> #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604
> +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
> +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534

What is the point of adding identical #defines both here and in
amd64_edac.h? Also, read the note at the top of
include/linux/pci_ids.h.

> #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
> #define PCI_DEVICE_ID_AMD_LANCE 0x2000
> #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
> --
> 1.7.10.4
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2013-04-15 16:11:26

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] edac: Handle EDAC ECC errors for Family 16h

On Mon, Apr 15, 2013 at 09:56:08AM -0600, Bjorn Helgaas wrote:
> On Mon, Apr 15, 2013 at 9:17 AM, Aravind Gopalakrishnan
> > @@ -172,7 +176,12 @@
> > */
> > #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
> > #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
> > -
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F0 0x1530
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F5 0x1535
> >
> > /*
> > * Function 1 - Address Map
> > @@ -300,6 +309,7 @@ enum amd_families {
> > K8_CPUS = 0,
> > F10_CPUS,
> > F15_CPUS,
> > + F16_CPUS,
> > NUM_FAMILIES,
> > };
> >
> > diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> > index 1679ff6..59f732f 100644
> > --- a/include/linux/pci_ids.h
> > +++ b/include/linux/pci_ids.h
> > @@ -519,6 +519,8 @@
> > #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
> > #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603
> > #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
> > +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
>
> What is the point of adding identical #defines both here and in
> amd64_edac.h? Also, read the note at the top of
> include/linux/pci_ids.h.

Yeah, they should be in pci_ids.h since they're used in amd_nb.c and
amd64_edac.c.

But, what shouldn't be is adding unused defines (*_F0 and *_F5).
Aravind, please drop them.

I'll review the rest of the patch when I get around to it this week.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2013-04-15 16:43:19

by Aravind Gopalakrishnan

[permalink] [raw]
Subject: Re: [PATCH] edac: Handle EDAC ECC errors for Family 16h

On 04/15/2013 11:11 AM, Borislav Petkov wrote:
> On Mon, Apr 15, 2013 at 09:56:08AM -0600, Bjorn Helgaas wrote:
>> On Mon, Apr 15, 2013 at 9:17 AM, Aravind Gopalakrishnan
>>> @@ -172,7 +176,12 @@
>>> */
>>> #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
>>> #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
>>> -
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F0 0x1530
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F5 0x1535
>>>
>>> /*
>>> * Function 1 - Address Map
>>> @@ -300,6 +309,7 @@ enum amd_families {
>>> K8_CPUS = 0,
>>> F10_CPUS,
>>> F15_CPUS,
>>> + F16_CPUS,
>>> NUM_FAMILIES,
>>> };
>>>
>>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>>> index 1679ff6..59f732f 100644
>>> --- a/include/linux/pci_ids.h
>>> +++ b/include/linux/pci_ids.h
>>> @@ -519,6 +519,8 @@
>>> #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
>>> #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603
>>> #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
>> What is the point of adding identical #defines both here and in
>> amd64_edac.h? Also, read the note at the top of
>> include/linux/pci_ids.h.
> Yeah, they should be in pci_ids.h since they're used in amd_nb.c and
> amd64_edac.c.
>
> But, what shouldn't be is adding unused defines (*_F0 and *_F5).
> Aravind, please drop them.

Yes, I have made the changes and sent it out as version 2 of the PATCH.
Do have a look and let me know if I need to make any changes.


> I'll review the rest of the patch when I get around to it this week.
>

2013-04-15 18:57:24

by Aravind Gopalakrishnan

[permalink] [raw]
Subject: Re: [PATCH] edac: Handle EDAC ECC errors for Family 16h

On 04/15/2013 11:45 AM, Aravind wrote:
> On 04/15/2013 11:11 AM, Borislav Petkov wrote:
>> On Mon, Apr 15, 2013 at 09:56:08AM -0600, Bjorn Helgaas wrote:
>>> On Mon, Apr 15, 2013 at 9:17 AM, Aravind Gopalakrishnan
>>>> @@ -172,7 +176,12 @@
>>>> */
>>>> #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
>>>> #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
>>>> -
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F0 0x1530
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F5 0x1535
>>>>
>>>> /*
>>>> * Function 1 - Address Map
>>>> @@ -300,6 +309,7 @@ enum amd_families {
>>>> K8_CPUS = 0,
>>>> F10_CPUS,
>>>> F15_CPUS,
>>>> + F16_CPUS,
>>>> NUM_FAMILIES,
>>>> };
>>>>
>>>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>>>> index 1679ff6..59f732f 100644
>>>> --- a/include/linux/pci_ids.h
>>>> +++ b/include/linux/pci_ids.h
>>>> @@ -519,6 +519,8 @@
>>>> #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
>>>> #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603
>>>> #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533
>>>> +#define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534
>>> What is the point of adding identical #defines both here and in
>>> amd64_edac.h? Also, read the note at the top of
>>> include/linux/pci_ids.h.
>> Yeah, they should be in pci_ids.h since they're used in amd_nb.c and
>> amd64_edac.c.
>>
>> But, what shouldn't be is adding unused defines (*_F0 and *_F5).
>> Aravind, please drop them.
>
> Yes, I have made the changes and sent it out as version 2 of the
> PATCH.
> Do have a look and let me know if I need to make any changes.
>
>
>> I'll review the rest of the patch when I get around to it this week.
>>
>

Correction: Fam16 does not support GART. I had mistakenly included
it in amd_nb.c (Apologies)
I have fixed it up and made few other cosmetic changes to the
patch. I have tested it once again to
make certain it works fine and it does..
Sending it out as V3 of the patch..