Subject: [PATCH v5 0/5] x86/edac/amd64: Add heterogeneous node support

On newer heterogeneous systems with AMD CPUs the data fabrics of GPUs
can be connected directly via custom links.

This series of patchset does the following
1. amd_nb.c:
a. Add support for northbridges on Aldebaran GPU nodes
b. export AMD node map details to be used by edac and mce modules

2. mce_amd module:
a. Identify the node ID where the error occurred and map the node
id to linux enumerated node id.

3. amd64_edac module
a. Add new family op routines
b. Enumerate UMCs and HBMs on the GPU nodes
c. Move fam_type structure into amd64_pvt struct

This patchset is rebased on top of
"
commit 07416cadfdfa38283b840e700427ae3782c76f6b
Author: Yazen Ghannam <[email protected]>
Date: Tue Oct 5 15:44:19 2021 +0000

EDAC/amd64: Handle three rank interleaving mode
"

Muralidhara M K (3):
x86/amd_nb: Add support for northbridges on Aldebaran
EDAC/amd64: Extend family ops functions
EDAC/amd64: Move struct fam_type into amd64_pvt structure

Naveen Krishna Chatradhi (2):
EDAC/mce_amd: Extract node id from MCA_IPID
EDAC/amd64: Enumerate memory on Aldebaran GPU nodes

arch/x86/include/asm/amd_nb.h | 9 +
arch/x86/kernel/amd_nb.c | 150 +++++++--
drivers/edac/amd64_edac.c | 592 +++++++++++++++++++++++++---------
drivers/edac/amd64_edac.h | 39 ++-
drivers/edac/mce_amd.c | 24 +-
include/linux/pci_ids.h | 1 +
6 files changed, 630 insertions(+), 185 deletions(-)

--
2.25.1


Subject: [PATCH v5 4/5] EDAC/amd64: Move struct fam_type into amd64_pvt structure

From: Muralidhara M K <[email protected]>

On heterogeneous systems, the GPU nodes are probed after the CPU
nodes and will overwrites the family type set by CPU nodes.

Moving struct fam_type to struct amd64_pvt, instead of using fam_type
as a global variable.

Signed-off-by: Muralidhara M K <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
---
Changes since v4:
New patch, created based on a comment.

drivers/edac/amd64_edac.c | 58 +++++++++++++++++++--------------------
drivers/edac/amd64_edac.h | 2 ++
2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 88d38dbb4e7e..fbb1284f3c18 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -13,8 +13,6 @@ module_param(ecc_enable_override, int, 0644);

static struct msr __percpu *msrs;

-static struct amd64_family_type *fam_type;
-
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;

@@ -448,7 +446,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
for (i = 0; i < pvt->csels[dct].m_cnt; i++)

#define for_each_umc(i) \
- for (i = 0; i < fam_type->max_mcs; i++)
+ for (i = 0; i < pvt->fam_type->max_mcs; i++)

/*
* @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -3637,7 +3635,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci)

mci->edac_cap = determine_edac_cap(pvt);
mci->mod_name = EDAC_MOD_STR;
- mci->ctl_name = fam_type->ctl_name;
+ mci->ctl_name = pvt->fam_type->ctl_name;
mci->dev_name = pci_name(pvt->F3);
mci->ctl_page_to_phys = NULL;

@@ -3658,64 +3656,64 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)

switch (pvt->fam) {
case 0xf:
- fam_type = &family_types[K8_CPUS];
+ pvt->fam_type = &family_types[K8_CPUS];
pvt->ops = &family_types[K8_CPUS].ops;
break;

case 0x10:
- fam_type = &family_types[F10_CPUS];
+ pvt->fam_type = &family_types[F10_CPUS];
pvt->ops = &family_types[F10_CPUS].ops;
break;

case 0x15:
if (pvt->model == 0x30) {
- fam_type = &family_types[F15_M30H_CPUS];
+ pvt->fam_type = &family_types[F15_M30H_CPUS];
pvt->ops = &family_types[F15_M30H_CPUS].ops;
break;
} else if (pvt->model == 0x60) {
- fam_type = &family_types[F15_M60H_CPUS];
+ pvt->fam_type = &family_types[F15_M60H_CPUS];
pvt->ops = &family_types[F15_M60H_CPUS].ops;
break;
/* Richland is only client */
} else if (pvt->model == 0x13) {
return NULL;
} else {
- fam_type = &family_types[F15_CPUS];
+ pvt->fam_type = &family_types[F15_CPUS];
pvt->ops = &family_types[F15_CPUS].ops;
}
break;

case 0x16:
if (pvt->model == 0x30) {
- fam_type = &family_types[F16_M30H_CPUS];
+ pvt->fam_type = &family_types[F16_M30H_CPUS];
pvt->ops = &family_types[F16_M30H_CPUS].ops;
break;
}
- fam_type = &family_types[F16_CPUS];
+ pvt->fam_type = &family_types[F16_CPUS];
pvt->ops = &family_types[F16_CPUS].ops;
break;

case 0x17:
if (pvt->model >= 0x10 && pvt->model <= 0x2f) {
- fam_type = &family_types[F17_M10H_CPUS];
+ pvt->fam_type = &family_types[F17_M10H_CPUS];
pvt->ops = &family_types[F17_M10H_CPUS].ops;
break;
} else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
- fam_type = &family_types[F17_M30H_CPUS];
+ pvt->fam_type = &family_types[F17_M30H_CPUS];
pvt->ops = &family_types[F17_M30H_CPUS].ops;
break;
} else if (pvt->model >= 0x60 && pvt->model <= 0x6f) {
- fam_type = &family_types[F17_M60H_CPUS];
+ pvt->fam_type = &family_types[F17_M60H_CPUS];
pvt->ops = &family_types[F17_M60H_CPUS].ops;
break;
} else if (pvt->model >= 0x70 && pvt->model <= 0x7f) {
- fam_type = &family_types[F17_M70H_CPUS];
+ pvt->fam_type = &family_types[F17_M70H_CPUS];
pvt->ops = &family_types[F17_M70H_CPUS].ops;
break;
}
fallthrough;
case 0x18:
- fam_type = &family_types[F17_CPUS];
+ pvt->fam_type = &family_types[F17_CPUS];
pvt->ops = &family_types[F17_CPUS].ops;

if (pvt->fam == 0x18)
@@ -3724,12 +3722,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)

case 0x19:
if (pvt->model >= 0x20 && pvt->model <= 0x2f) {
- fam_type = &family_types[F17_M70H_CPUS];
+ pvt->fam_type = &family_types[F17_M70H_CPUS];
pvt->ops = &family_types[F17_M70H_CPUS].ops;
- fam_type->ctl_name = "F19h_M20h";
+ pvt->fam_type->ctl_name = "F19h_M20h";
break;
}
- fam_type = &family_types[F19_CPUS];
+ pvt->fam_type = &family_types[F19_CPUS];
pvt->ops = &family_types[F19_CPUS].ops;
family_types[F19_CPUS].ctl_name = "F19h";
break;
@@ -3753,7 +3751,7 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
return NULL;
}

- return fam_type;
+ return pvt->fam_type;
}

static const struct attribute_group *amd64_edac_attr_groups[] = {
@@ -3770,15 +3768,15 @@ static int hw_info_get(struct amd64_pvt *pvt)
int ret;

if (pvt->fam >= 0x17) {
- pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ pvt->umc = kcalloc(pvt->fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
if (!pvt->umc)
return -ENOMEM;

- pci_id1 = fam_type->f0_id;
- pci_id2 = fam_type->f6_id;
+ pci_id1 = pvt->fam_type->f0_id;
+ pci_id2 = pvt->fam_type->f6_id;
} else {
- pci_id1 = fam_type->f1_id;
- pci_id2 = fam_type->f2_id;
+ pci_id1 = pvt->fam_type->f1_id;
+ pci_id2 = pvt->fam_type->f2_id;
}

ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
@@ -3824,7 +3822,7 @@ static int init_one_instance(struct amd64_pvt *pvt)
* only one channel. Also, this simplifies handling later for the price
* of a couple of KBs tops.
*/
- layers[1].size = fam_type->max_mcs;
+ layers[1].size = pvt->fam_type->max_mcs;
layers[1].is_virt_csrow = false;

mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
@@ -3854,7 +3852,7 @@ static bool instance_has_memory(struct amd64_pvt *pvt)
bool cs_enabled = false;
int cs = 0, dct = 0;

- for (dct = 0; dct < fam_type->max_mcs; dct++) {
+ for (dct = 0; dct < pvt->fam_type->max_mcs; dct++) {
for_each_chip_select(cs, dct, pvt)
cs_enabled |= csrow_enabled(cs, dct, pvt);
}
@@ -3884,8 +3882,8 @@ static int probe_one_instance(unsigned int nid)
pvt->F3 = F3;

ret = -ENODEV;
- fam_type = per_family_init(pvt);
- if (!fam_type)
+ pvt->fam_type = per_family_init(pvt);
+ if (!pvt->fam_type)
goto err_enable;

ret = hw_info_get(pvt);
@@ -3924,7 +3922,7 @@ static int probe_one_instance(unsigned int nid)
goto err_enable;
}

- amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name,
+ amd64_info("%s %sdetected (node %d).\n", pvt->fam_type->ctl_name,
(pvt->fam == 0xf ?
(pvt->ext_model >= K8_REV_F ? "revF or later "
: "revE or earlier ")
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index c7aeb2c46dc3..d307df58ea1b 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -389,6 +389,8 @@ struct amd64_pvt {
enum mem_type dram_type;

struct amd64_umc *umc; /* UMC registers */
+
+ struct amd64_family_type *fam_type;
};

enum err_codes {
--
2.25.1

Subject: [PATCH v5 5/5] EDAC/amd64: Enumerate memory on Aldebaran GPU nodes

On newer heterogeneous systems with AMD CPUs, the data fabrics of the GPUs
are connected directly via custom links.

One such system, where Aldebaran GPU nodes are connected to the
Family 19h, model 30h family of CPU nodes, the Aldebaran GPUs can report
memory errors via SMCA banks.

Aldebaran GPU support was added to DRM framework
https://lists.freedesktop.org/archives/amd-gfx/2021-February/059694.html

The GPU nodes comes with HBM2 memory in-built, ECC support is enabled by
default and the UMCs on GPU node are different from the UMCs on CPU nodes.

GPU specific ops routines are defined to extend the amd64_edac
module to enumerate HBM memory leveraging the existing edac and the
amd64 specific data structures.

Note: The UMC Phys on GPU nodes are enumerated as csrows and the UMC
channels connected to HBM banks are enumerated as ranks.

Cc: Yazen Ghannam <[email protected]>
Co-developed-by: Muralidhara M K <[email protected]>
Signed-off-by: Muralidhara M K <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
Changes since v4:
Split "f17_addr_mask_to_cs_size" instead as did in 3rd patch earlier

Changes since v3:
1. Bifurcated the GPU code from v2

Changes since v2:
1. Restored line deletions and handled minor comments
2. Modified commit message and some of the function comments
3. variable df_inst_id is introduced instead of umc_num

Changes since v1:
1. Modifed the commit message
2. Change the edac_cap
3. kept sizes of both cpu and noncpu together
4. return success if the !F3 condition true and remove unnecessary validation

drivers/edac/amd64_edac.c | 302 +++++++++++++++++++++++++++++++++-----
drivers/edac/amd64_edac.h | 27 ++++
2 files changed, 293 insertions(+), 36 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index fbb1284f3c18..a0dc7ffe63d6 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1121,6 +1121,20 @@ static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl)
}
}

+static void debug_display_dimm_sizes_gpu(struct amd64_pvt *pvt, u8 ctrl)
+{
+ int size, cs = 0, cs_mode;
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
+
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ for_each_chip_select(cs, ctrl, pvt) {
+ size = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs);
+ amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size);
+ }
+}
+
static void __dump_misc_regs_df(struct amd64_pvt *pvt)
{
struct amd64_umc *umc;
@@ -1165,6 +1179,27 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
pvt->dhar, dhar_base(pvt));
}

+static void __dump_misc_regs_gpu(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i, umc_base;
+
+ for_each_umc(i) {
+ umc_base = get_umc_base(i);
+ umc = &pvt->umc[i];
+
+ edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
+ edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
+ edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
+ edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i);
+
+ debug_display_dimm_sizes_gpu(pvt, i);
+ }
+
+ edac_dbg(1, "F0x104 (DRAM Hole Address): 0x%08x, base: 0x%08x\n",
+ pvt->dhar, dhar_base(pvt));
+}
+
/* Display and decode various NB registers for debug purposes. */
static void __dump_misc_regs(struct amd64_pvt *pvt)
{
@@ -1245,6 +1280,43 @@ static void f17_prep_chip_selects(struct amd64_pvt *pvt)
}
}

+static void gpu_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 8;
+ pvt->csels[umc].m_cnt = 8;
+ }
+}
+
+static void read_umc_base_mask_gpu(struct amd64_pvt *pvt)
+{
+ u32 base_reg, mask_reg;
+ u32 *base, *mask;
+ int umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ base_reg = get_umc_base_gpu(umc, cs) + UMCCH_BASE_ADDR;
+ base = &pvt->csels[umc].csbases[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+ }
+
+ mask_reg = get_umc_base_gpu(umc, cs) + UMCCH_ADDR_MASK;
+ mask = &pvt->csels[umc].csmasks[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+ }
+ }
+ }
+}
+
static void read_umc_base_mask(struct amd64_pvt *pvt)
{
u32 umc_base_reg, umc_base_reg_sec;
@@ -1743,6 +1815,19 @@ static int f17_early_channel_count(struct amd64_pvt *pvt)
return channels;
}

+static int gpu_early_channel_count(struct amd64_pvt *pvt)
+{
+ int i, channels = 0;
+
+ /* The memory channels in case of GPUs are fully populated */
+ for_each_umc(i)
+ channels += pvt->csels[i].b_cnt;
+
+ amd64_info("MCT channel count: %d\n", channels);
+
+ return channels;
+}
+
static int ddr3_cs_size(unsigned i, bool dct_width)
{
unsigned shift = 0;
@@ -1870,11 +1955,46 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct,
return ddr3_cs_size(cs_mode, false);
}

+static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
+ int csrow_nr, int dimm)
+{
+ u32 msb, weight, num_zero_bits;
+ u32 addr_mask_deinterleaved;
+ int size = 0;
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
+
+ /* Return size in MBs. */
+ return size >> 10;
+}
+
static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig, addr_mask_deinterleaved;
- u32 msb, weight, num_zero_bits;
+ u32 addr_mask_orig;
int dimm, size = 0;

/* No Chip Selects are enabled. */
@@ -1902,33 +2022,15 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
else
addr_mask_orig = pvt->csels[umc].csmasks[dimm];

- /*
- * The number of zero bits in the mask is equal to the number of bits
- * in a full mask minus the number of bits in the current mask.
- *
- * The MSB is the number of bits in the full mask because BIT[0] is
- * always 0.
- *
- * In the special 3 Rank interleaving case, a single bit is flipped
- * without swapping with the most significant bit. This can be handled
- * by keeping the MSB where it is and ignoring the single zero bit.
- */
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
-
- /* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
-
- edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+}

- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];

- /* Return size in MBs. */
- return size >> 10;
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
}

static void read_dram_ctl_register(struct amd64_pvt *pvt)
@@ -2516,8 +2618,11 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
/* Prototypes for family specific ops routines */
static int init_csrows(struct mem_ctl_info *mci);
static int init_csrows_df(struct mem_ctl_info *mci);
+static int init_csrows_gpu(struct mem_ctl_info *mci);
static void __read_mc_regs_df(struct amd64_pvt *pvt);
+static void __read_mc_regs_gpu(struct amd64_pvt *pvt);
static void update_umc_err_info(struct mce *m, struct err_info *err);
+static void update_umc_err_info_gpu(struct mce *m, struct err_info *err);

static const struct low_ops k8_ops = {
.early_channel_count = k8_early_channel_count,
@@ -2590,6 +2695,17 @@ static const struct low_ops f17_ops = {
.get_umc_err_info = update_umc_err_info,
};

+static const struct low_ops gpu_ops = {
+ .early_channel_count = gpu_early_channel_count,
+ .dbam_to_cs = gpu_addr_mask_to_cs_size,
+ .prep_chip_select = gpu_prep_chip_selects,
+ .get_base_mask = read_umc_base_mask_gpu,
+ .display_misc_regs = __dump_misc_regs_gpu,
+ .get_mc_regs = __read_mc_regs_gpu,
+ .populate_csrows = init_csrows_gpu,
+ .get_umc_err_info = update_umc_err_info_gpu,
+};
+
static struct amd64_family_type family_types[] = {
[K8_CPUS] = {
.ctl_name = "K8",
@@ -2682,6 +2798,14 @@ static struct amd64_family_type family_types[] = {
.max_mcs = 8,
.ops = f17_ops,
},
+ [ALDEBARAN_GPUS] = {
+ .ctl_name = "ALDEBARAN",
+ .f0_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0,
+ .f6_id = PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6,
+ .max_mcs = 4,
+ .ops = gpu_ops,
+ },
+
};

/*
@@ -2940,12 +3064,38 @@ static void update_umc_err_info(struct mce *m, struct err_info *err)
err->csrow = m->synd & 0x7;
}

+/*
+ * The CPUs have one channel per UMC, So UMC number is equivalent to a
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
+ * longer works as a channel number.
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
+ * However, the IDs are split such that two UMC values go to one UMC, and
+ * the channel numbers are split in two groups of four.
+ *
+ * Refer comment on get_umc_base_gpu() from amd64_edac.h
+ *
+ * For example,
+ * UMC0 CH[3:0] = 0x0005[3:0]000
+ * UMC0 CH[7:4] = 0x0015[3:0]000
+ * UMC1 CH[3:0] = 0x0025[3:0]000
+ * UMC1 CH[7:4] = 0x0035[3:0]000
+ */
+static void update_umc_err_info_gpu(struct mce *m, struct err_info *err)
+{
+ u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
+ u8 phy = ((m->ipid >> 12) & 0xf);
+
+ err->channel = ch % 2 ? phy + 4 : phy;
+ err->csrow = phy;
+}
+
static void decode_umc_error(int node_id, struct mce *m)
{
u8 ecc_type = (m->status >> 45) & 0x3;
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
struct err_info err;
+ u8 df_inst_id;
u64 sys_addr;

mci = edac_mc_find(node_id);
@@ -2975,7 +3125,17 @@ static void decode_umc_error(int node_id, struct mce *m)

pvt->ops->get_umc_err_info(m, &err);

- if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
+ /*
+ * GPU node has #phys[X] which has #channels[Y] each.
+ * On GPUs, df_inst_id = [X] * num_ch_per_phy + [Y].
+ * On CPUs, "Channel"="UMC Number"="DF Instance ID".
+ */
+ if (pvt->is_gpu)
+ df_inst_id = (err.csrow * pvt->channel_count / mci->nr_csrows) + err.channel;
+ else
+ df_inst_id = err.channel;
+
+ if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, df_inst_id, &sys_addr)) {
err.err_code = ERR_NORM_ADDR;
goto log_error;
}
@@ -3114,6 +3274,23 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
}
}

+static void __read_mc_regs_gpu(struct amd64_pvt *pvt)
+{
+ u8 nid = pvt->mc_node_id;
+ struct amd64_umc *umc;
+ u32 i, umc_base;
+
+ /* Read registers from each UMC */
+ for_each_umc(i) {
+ umc_base = get_umc_base_gpu(i, 0);
+ umc = &pvt->umc[i];
+
+ amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
+ amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
+ amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
+ }
+}
+
/*
* Retrieve the hardware registers of the memory controller (this includes the
* 'Address Map' and 'Misc' device regs)
@@ -3196,7 +3373,9 @@ static void read_mc_regs(struct amd64_pvt *pvt)
determine_memory_type(pvt);
edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);

- determine_ecc_sym_sz(pvt);
+ /* ECC symbol size is not available on GPU nodes */
+ if (!pvt->is_gpu)
+ determine_ecc_sym_sz(pvt);
}

/*
@@ -3243,7 +3422,10 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig)
csrow_nr >>= 1;
cs_mode = DBAM_DIMM(csrow_nr, dbam);
} else {
- cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt);
+ if (pvt->is_gpu)
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+ else
+ cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt);
}

nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr);
@@ -3300,6 +3482,35 @@ static int init_csrows_df(struct mem_ctl_info *mci)
return empty;
}

+static int init_csrows_gpu(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ int empty = 1;
+ u8 umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ empty = 0;
+ dimm = mci->csrows[umc]->channels[cs]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs);
+ dimm->mtype = MEM_HBM2;
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->dtype = DEV_X16;
+ dimm->grain = 64;
+ }
+ }
+
+ return empty;
+}
+
/*
* Initialize the array of csrow attribute instances, based on the values
* from pci config hardware registers.
@@ -3541,6 +3752,10 @@ static bool ecc_enabled(struct amd64_pvt *pvt)
u8 ecc_en = 0, i;
u32 value;

+ /* ECC is enabled by default on GPU nodes */
+ if (pvt->is_gpu)
+ return true;
+
if (boot_cpu_data.x86 >= 0x17) {
u8 umc_en_mask = 0, ecc_en_mask = 0;
struct amd64_umc *umc;
@@ -3624,7 +3839,10 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
mci->edac_ctl_cap = EDAC_FLAG_NONE;

if (pvt->umc) {
- f17h_determine_edac_ctl_cap(mci, pvt);
+ if (pvt->is_gpu)
+ mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
+ else
+ f17h_determine_edac_ctl_cap(mci, pvt);
} else {
if (pvt->nbcap & NBCAP_SECDED)
mci->edac_ctl_cap |= EDAC_FLAG_SECDED;
@@ -3726,10 +3944,21 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
pvt->ops = &family_types[F17_M70H_CPUS].ops;
pvt->fam_type->ctl_name = "F19h_M20h";
break;
+ } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
+ if (pvt->mc_node_id >= amd_cpu_node_count()) {
+ pvt->fam_type = &family_types[ALDEBARAN_GPUS];
+ pvt->ops = &family_types[ALDEBARAN_GPUS].ops;
+ pvt->is_gpu = true;
+ } else {
+ pvt->fam_type = &family_types[F19_CPUS];
+ pvt->ops = &family_types[F19_CPUS].ops;
+ family_types[F19_CPUS].ctl_name = "F19h_M30h";
+ }
+ } else {
+ pvt->fam_type = &family_types[F19_CPUS];
+ pvt->ops = &family_types[F19_CPUS].ops;
+ family_types[F19_CPUS].ctl_name = "F19h";
}
- pvt->fam_type = &family_types[F19_CPUS];
- pvt->ops = &family_types[F19_CPUS].ops;
- family_types[F19_CPUS].ctl_name = "F19h";
break;

default:
@@ -3811,9 +4040,10 @@ static int init_one_instance(struct amd64_pvt *pvt)
if (pvt->channel_count < 0)
return ret;

+ /* Define layers for CPU and GPU nodes */
ret = -ENOMEM;
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].size = pvt->is_gpu ? pvt->fam_type->max_mcs : pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;

@@ -3822,7 +4052,7 @@ static int init_one_instance(struct amd64_pvt *pvt)
* only one channel. Also, this simplifies handling later for the price
* of a couple of KBs tops.
*/
- layers[1].size = pvt->fam_type->max_mcs;
+ layers[1].size = pvt->is_gpu ? pvt->csels[0].b_cnt : pvt->fam_type->max_mcs;
layers[1].is_virt_csrow = false;

mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index d307df58ea1b..82723ae88d2e 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -126,6 +126,8 @@
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650
#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
+#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F0 0x14d0
+#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F6 0x14d6

/*
* Function 1 - Address Map
@@ -298,6 +300,7 @@ enum amd_families {
F17_M60H_CPUS,
F17_M70H_CPUS,
F19_CPUS,
+ ALDEBARAN_GPUS,
NUM_FAMILIES,
};

@@ -391,6 +394,8 @@ struct amd64_pvt {
struct amd64_umc *umc; /* UMC registers */

struct amd64_family_type *fam_type;
+
+ bool is_gpu;
};

enum err_codes {
@@ -412,6 +417,28 @@ struct err_info {
u32 offset;
};

+static inline u32 get_umc_base_gpu(u8 umc, u8 channel)
+{
+ /*
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
+ * channel numbering. On GPUs, there are eight channels per UMC,
+ * so the channel numbering is different from UMC numbering.
+ *
+ * On CPU nodes channels are selected in 6th nibble
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
+ *
+ * On GPU nodes channels are selected in 3rd nibble
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
+ */
+ umc *= 2;
+
+ if (channel >= 4)
+ umc++;
+
+ return 0x50000 + (umc << 20) + ((channel % 4) << 12);
+}
+
static inline u32 get_umc_base(u8 channel)
{
/* chY: 0xY50000 */
--
2.25.1

Subject: [PATCH v5 3/5] EDAC/amd64: Extend family ops functions

From: Muralidhara M K <[email protected]>

Create new family operation routines and define them respectively.
This would simplify adding support for future platforms.

Signed-off-by: Muralidhara M K <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
Changes since v4:
1. Modified k8_prep_chip_selects for ext_model checks
2. Add read_dct_base_mask to ops
3. Renamed find_umc_channel and addressed minor comments

Changes since v3:
1. Defined new family operation routines

Changs since v2:
1. new patch

drivers/edac/amd64_edac.c | 236 +++++++++++++++++++++++---------------
drivers/edac/amd64_edac.h | 10 +-
2 files changed, 151 insertions(+), 95 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4fce75013674..88d38dbb4e7e 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1204,10 +1204,7 @@ static void __dump_misc_regs(struct amd64_pvt *pvt)
/* Display and decode various NB registers for debug purposes. */
static void dump_misc_regs(struct amd64_pvt *pvt)
{
- if (pvt->umc)
- __dump_misc_regs_df(pvt);
- else
- __dump_misc_regs(pvt);
+ pvt->ops->display_misc_regs(pvt);

edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no");

@@ -1217,28 +1214,39 @@ static void dump_misc_regs(struct amd64_pvt *pvt)
/*
* See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60]
*/
-static void prep_chip_selects(struct amd64_pvt *pvt)
+static void k8_prep_chip_selects(struct amd64_pvt *pvt)
{
if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8;
- } else if (pvt->fam == 0x15 && pvt->model == 0x30) {
- pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
- pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
- } else if (pvt->fam >= 0x17) {
- int umc;
-
- for_each_umc(umc) {
- pvt->csels[umc].b_cnt = 4;
- pvt->csels[umc].m_cnt = 2;
- }
-
- } else {
+ } else if (pvt->fam == 0xf && pvt->ext_model >= K8_REV_F) {
pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
}
}

+static void f15m30_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2;
+}
+
+static void default_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8;
+ pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4;
+}
+
+static void f17_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 4;
+ pvt->csels[umc].m_cnt = 2;
+ }
+}
+
static void read_umc_base_mask(struct amd64_pvt *pvt)
{
u32 umc_base_reg, umc_base_reg_sec;
@@ -1297,11 +1305,6 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
{
int cs;

- prep_chip_selects(pvt);
-
- if (pvt->umc)
- return read_umc_base_mask(pvt);
-
for_each_chip_select(cs, 0, pvt) {
int reg0 = DCSB0 + (cs * 4);
int reg1 = DCSB1 + (cs * 4);
@@ -2512,143 +2515,174 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
}
}

+/* Prototypes for family specific ops routines */
+static int init_csrows(struct mem_ctl_info *mci);
+static int init_csrows_df(struct mem_ctl_info *mci);
+static void __read_mc_regs_df(struct amd64_pvt *pvt);
+static void update_umc_err_info(struct mce *m, struct err_info *err);
+
+static const struct low_ops k8_ops = {
+ .early_channel_count = k8_early_channel_count,
+ .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
+ .dbam_to_cs = k8_dbam_to_chip_select,
+ .prep_chip_select = k8_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .display_misc_regs = __dump_misc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f10_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f10_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .display_misc_regs = __dump_misc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f15_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f15_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .display_misc_regs = __dump_misc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f15m30_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .prep_chip_select = f15m30_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .display_misc_regs = __dump_misc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f15m60_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f15_m60h_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .display_misc_regs = __dump_misc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f16_ops = {
+ .early_channel_count = f1x_early_channel_count,
+ .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
+ .dbam_to_cs = f16_dbam_to_chip_select,
+ .prep_chip_select = default_prep_chip_selects,
+ .get_base_mask = read_dct_base_mask,
+ .display_misc_regs = __dump_misc_regs,
+ .populate_csrows = init_csrows,
+};
+
+static const struct low_ops f17_ops = {
+ .early_channel_count = f17_early_channel_count,
+ .dbam_to_cs = f17_addr_mask_to_cs_size,
+ .prep_chip_select = f17_prep_chip_selects,
+ .get_base_mask = read_umc_base_mask,
+ .display_misc_regs = __dump_misc_regs_df,
+ .get_mc_regs = __read_mc_regs_df,
+ .populate_csrows = init_csrows_df,
+ .get_umc_err_info = update_umc_err_info,
+};
+
static struct amd64_family_type family_types[] = {
[K8_CPUS] = {
.ctl_name = "K8",
.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
.max_mcs = 2,
- .ops = {
- .early_channel_count = k8_early_channel_count,
- .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
- .dbam_to_cs = k8_dbam_to_chip_select,
- }
+ .ops = k8_ops,
},
[F10_CPUS] = {
.ctl_name = "F10h",
.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f10_dbam_to_chip_select,
- }
+ .ops = f10_ops,
},
[F15_CPUS] = {
.ctl_name = "F15h",
.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f15_dbam_to_chip_select,
- }
+ .ops = f15_ops,
},
[F15_M30H_CPUS] = {
.ctl_name = "F15h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
+ .ops = f15m30_ops,
},
[F15_M60H_CPUS] = {
.ctl_name = "F15h_M60h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f15_m60h_dbam_to_chip_select,
- }
+ .ops = f15m60_ops,
},
[F16_CPUS] = {
.ctl_name = "F16h",
.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
+ .ops = f16_ops,
},
[F16_M30H_CPUS] = {
.ctl_name = "F16h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f1x_early_channel_count,
- .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
- .dbam_to_cs = f16_dbam_to_chip_select,
- }
+ .ops = f16_ops,
},
[F17_CPUS] = {
.ctl_name = "F17h",
.f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M10H_CPUS] = {
.ctl_name = "F17h_M10h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M30H_CPUS] = {
.ctl_name = "F17h_M30h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
.max_mcs = 8,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M60H_CPUS] = {
.ctl_name = "F17h_M60h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M60H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F17_M70H_CPUS] = {
.ctl_name = "F17h_M70h",
.f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
.max_mcs = 2,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
[F19_CPUS] = {
.ctl_name = "F19h",
.f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
.max_mcs = 8,
- .ops = {
- .early_channel_count = f17_early_channel_count,
- .dbam_to_cs = f17_addr_mask_to_cs_size,
- }
+ .ops = f17_ops,
},
};

@@ -2899,10 +2933,13 @@ static inline void decode_bus_error(int node_id, struct mce *m)
* Currently, we can derive the channel number by looking at the 6th nibble in
* the instance_id. For example, instance_id=0xYXXXXX where Y is the channel
* number.
+ *
+ * csrow can be derived from the lower 3 bits of MCA_SYND value.
*/
-static int find_umc_channel(struct mce *m)
+static void update_umc_err_info(struct mce *m, struct err_info *err)
{
- return (m->ipid & GENMASK(31, 0)) >> 20;
+ err->channel = (m->ipid & GENMASK(31, 0)) >> 20;
+ err->csrow = m->synd & 0x7;
}

static void decode_umc_error(int node_id, struct mce *m)
@@ -2924,8 +2961,6 @@ static void decode_umc_error(int node_id, struct mce *m)
if (m->status & MCI_STATUS_DEFERRED)
ecc_type = 3;

- err.channel = find_umc_channel(m);
-
if (!(m->status & MCI_STATUS_SYNDV)) {
err.err_code = ERR_SYND;
goto log_error;
@@ -2940,7 +2975,7 @@ static void decode_umc_error(int node_id, struct mce *m)
err.err_code = ERR_CHANNEL;
}

- err.csrow = m->synd & 0x7;
+ pvt->ops->get_umc_err_info(m, &err);

if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
err.err_code = ERR_NORM_ADDR;
@@ -3106,8 +3141,9 @@ static void read_mc_regs(struct amd64_pvt *pvt)
edac_dbg(0, " TOP_MEM2 disabled\n");
}

- if (pvt->umc) {
- __read_mc_regs_df(pvt);
+ if (pvt->ops->get_mc_regs) {
+ pvt->ops->get_mc_regs(pvt);
+
amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);

goto skip;
@@ -3154,7 +3190,10 @@ static void read_mc_regs(struct amd64_pvt *pvt)
}

skip:
- read_dct_base_mask(pvt);
+ pvt->ops->prep_chip_select(pvt);
+
+ if (pvt->ops->get_base_mask)
+ pvt->ops->get_base_mask(pvt);

determine_memory_type(pvt);
edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]);
@@ -3277,9 +3316,6 @@ static int init_csrows(struct mem_ctl_info *mci)
int nr_pages = 0;
u32 val;

- if (pvt->umc)
- return init_csrows_df(mci);
-
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);

pvt->nbcfg = val;
@@ -3703,6 +3739,20 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
return NULL;
}

+ /* ops required for all the families */
+ if (!pvt->ops->early_channel_count || !pvt->ops->dbam_to_cs ||
+ !pvt->ops->prep_chip_select || !pvt->ops->get_base_mask ||
+ !pvt->ops->display_misc_regs || !pvt->ops->populate_csrows) {
+ edac_dbg(1, "Common helper routines not defined.\n");
+ return NULL;
+ }
+
+ /* ops required for families 17h and later */
+ if (pvt->fam >= 0x17 && (!pvt->ops->get_umc_err_info || !pvt->ops->get_mc_regs)) {
+ edac_dbg(1, "Platform specific helper routines not defined.\n");
+ return NULL;
+ }
+
return fam_type;
}

@@ -3786,7 +3836,7 @@ static int init_one_instance(struct amd64_pvt *pvt)

setup_mci_misc_attrs(mci);

- if (init_csrows(mci))
+ if (pvt->ops->populate_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;

ret = -ENODEV;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 85aa820bc165..c7aeb2c46dc3 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -467,11 +467,17 @@ struct ecc_settings {
* functions and per device encoding/decoding logic.
*/
struct low_ops {
- int (*early_channel_count) (struct amd64_pvt *pvt);
+ int (*early_channel_count) (struct amd64_pvt *pvt);
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
struct err_info *);
- int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct,
+ int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct,
unsigned cs_mode, int cs_mask_nr);
+ void (*prep_chip_select) (struct amd64_pvt *pvt);
+ void (*get_base_mask) (struct amd64_pvt *pvt);
+ void (*display_misc_regs) (struct amd64_pvt *pvt);
+ void (*get_mc_regs) (struct amd64_pvt *pvt);
+ int (*populate_csrows) (struct mem_ctl_info *mci);
+ void (*get_umc_err_info) (struct mce *m, struct err_info *err);
};

struct amd64_family_type {
--
2.25.1

Subject: [PATCH v5 1/5] x86/amd_nb: Add support for northbridges on Aldebaran

From: Muralidhara M K <[email protected]>

On newer systems the CPUs manage MCA errors reported from the GPUs.
Enumerate the GPU nodes with the AMD NB framework to support EDAC.

GPU nodes are enumerated in sequential order based on the PCI hierarchy,
and the first GPU node is assumed to have an "AMD Node ID" value after
CPU Nodes are fully populated.

Aldebaran is an AMD GPU, GPU drivers are part of the DRM framework
https://lists.freedesktop.org/archives/amd-gfx/2021-February/059694.html

Each Aldebaran GPU has 2 Data Fabrics, which are enumerated as 2 nodes.
With this implementation detail, the Data Fabric on the GPU nodes can be
accessed the same way as the Data Fabric on CPU nodes.

Special handling was necessary in northbridge enumeration as the
roots_per_misc value is different for GPU and CPU nodes.

Signed-off-by: Muralidhara M K <[email protected]>
Co-developed-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
Changes since v4:
1. renamed struct name from nmap to nodemap
2. split amd_get_node_map and addressed minor comments

Changes since v3:
1. Use word "gpu" instead of "noncpu" in the patch
2. Do not create pci_dev_ids arrays for gpu nodes
3. Identify the gpu node start index from DF18F1 registers on the GPU nodes.
a. Export cpu node count and gpu start node id

Changes since v2:
1. Added Reviewed-by Yazen Ghannam

Changes since v1:
1. Modified the commit message and comments in the code
2. Squashed patch 1/7: "x86/amd_nb: Add Aldebaran device to PCI IDs"

arch/x86/include/asm/amd_nb.h | 9 ++
arch/x86/kernel/amd_nb.c | 150 ++++++++++++++++++++++++++++------
include/linux/pci_ids.h | 1 +
3 files changed, 136 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 455066a06f60..a78d088dae40 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -68,10 +68,17 @@ struct amd_northbridge {
struct threshold_bank *bank4;
};

+/* heterogeneous system node type map variables */
+struct amd_node_map {
+ u16 gpu_node_start_id;
+ u16 cpu_node_count;
+};
+
struct amd_northbridge_info {
u16 num;
u64 flags;
struct amd_northbridge *nb;
+ struct amd_node_map *nodemap;
};

#define AMD_NB_GART BIT(0)
@@ -83,6 +90,8 @@ struct amd_northbridge_info {
u16 amd_nb_num(void);
bool amd_nb_has_feature(unsigned int feature);
struct amd_northbridge *node_to_amd_nb(int node);
+u16 amd_gpu_node_start_id(void);
+u16 amd_cpu_node_count(void);

static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
{
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index c92c9c774c0e..0b495406ab9c 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -19,6 +19,7 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
+#define PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT 0x14bb
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
@@ -28,6 +29,7 @@
#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
+#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F4 0x14d4

/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -40,6 +42,7 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT) },
{}
};

@@ -63,6 +66,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3) },
{}
};

@@ -81,6 +85,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F4) },
{}
};

@@ -126,6 +131,66 @@ struct amd_northbridge *node_to_amd_nb(int node)
}
EXPORT_SYMBOL_GPL(node_to_amd_nb);

+/*
+ * GPU start index and CPU count values on an heterogeneous system,
+ * these values will be used by the AMD EDAC and MCE modules.
+ */
+u16 amd_gpu_node_start_id(void)
+{
+ return (amd_northbridges.nodemap) ?
+ amd_northbridges.nodemap->gpu_node_start_id : 0;
+}
+EXPORT_SYMBOL_GPL(amd_gpu_node_start_id);
+
+u16 amd_cpu_node_count(void)
+{
+ return (amd_northbridges.nodemap) ?
+ amd_northbridges.nodemap->cpu_node_count : amd_northbridges.num;
+}
+EXPORT_SYMBOL_GPL(amd_cpu_node_count);
+
+/* GPU Data Fabric ID Device 24 Function 1 */
+#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1 0x14d1
+
+static struct pci_dev *get_gpu_df_f1(void)
+{
+ return pci_get_device(PCI_VENDOR_ID_AMD,
+ PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL);
+}
+
+/* DF18xF1 registers on Aldebaran GPU */
+#define REG_LOCAL_NODE_TYPE_MAP 0x144
+#define REG_RMT_NODE_TYPE_MAP 0x148
+
+/*
+ * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI
+ * links, comes with registers to gather local and remote node type map info.
+ *
+ * "Local Node Type" refers to nodes with the same type as that from which the
+ * register is read, and "Remote Node Type" refers to nodes with a different type.
+ *
+ * This function, reads the registers from GPU DF function 1.
+ * Hence, local nodes are GPU and remote nodes are CPUs.
+ */
+static int amd_get_node_map(struct pci_dev *pdev)
+{
+ struct amd_node_map *nodemap;
+ u32 tmp;
+
+ nodemap = kmalloc(sizeof(*nodemap), GFP_KERNEL);
+ if (!nodemap)
+ return -ENOMEM;
+
+ pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
+ nodemap->gpu_node_start_id = tmp & 0xFFF;
+
+ pci_read_config_dword(pdev, REG_RMT_NODE_TYPE_MAP, &tmp);
+ nodemap->cpu_node_count = tmp >> 16 & 0xFFF;
+
+ amd_northbridges.nodemap = nodemap;
+ return 0;
+}
+
static struct pci_dev *next_northbridge(struct pci_dev *dev,
const struct pci_device_id *ids)
{
@@ -230,17 +295,38 @@ int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
}
EXPORT_SYMBOL_GPL(amd_df_indirect_read);

+struct pci_dev *get_root_devs(struct pci_dev *root,
+ const struct pci_device_id *root_ids,
+ u16 roots_per_misc)
+{
+ u16 j;
+
+ /*
+ * If there are more PCI root devices than data fabric/
+ * system management network interfaces, then the (N)
+ * PCI roots per DF/SMN interface are functionally the
+ * same (for DF/SMN access) and N-1 are redundant. N-1
+ * PCI roots should be skipped per DF/SMN interface so
+ * the following DF/SMN interfaces get mapped to
+ * correct PCI roots.
+ */
+ for (j = 0; j < roots_per_misc; j++)
+ root = next_northbridge(root, root_ids);
+
+ return root;
+}
+
int amd_cache_northbridges(void)
{
const struct pci_device_id *misc_ids = amd_nb_misc_ids;
const struct pci_device_id *link_ids = amd_nb_link_ids;
const struct pci_device_id *root_ids = amd_root_ids;
- struct pci_dev *root, *misc, *link;
+ struct pci_dev *root, *misc, *link, *dff1;
struct amd_northbridge *nb;
- u16 roots_per_misc = 0;
- u16 misc_count = 0;
- u16 root_count = 0;
- u16 i, j;
+ u16 roots_per_misc = 0, gpu_roots_per_misc = 0;
+ u16 misc_count = 0, gpu_misc_count = 0;
+ u16 root_count = 0, gpu_root_count = 0;
+ u16 i;

if (amd_northbridges.num)
return 0;
@@ -252,15 +338,23 @@ int amd_cache_northbridges(void)
}

misc = NULL;
- while ((misc = next_northbridge(misc, misc_ids)) != NULL)
- misc_count++;
+ while ((misc = next_northbridge(misc, misc_ids)) != NULL) {
+ if (misc->device == PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3)
+ gpu_misc_count++;
+ else
+ misc_count++;
+ }

if (!misc_count)
return -ENODEV;

root = NULL;
- while ((root = next_northbridge(root, root_ids)) != NULL)
- root_count++;
+ while ((root = next_northbridge(root, root_ids)) != NULL) {
+ if (root->device == PCI_DEVICE_ID_AMD_ALDEBARAN_ROOT)
+ gpu_root_count++;
+ else
+ root_count++;
+ }

if (root_count) {
roots_per_misc = root_count / misc_count;
@@ -275,33 +369,41 @@ int amd_cache_northbridges(void)
}
}

- nb = kcalloc(misc_count, sizeof(struct amd_northbridge), GFP_KERNEL);
+ /*
+ * The number of miscs, roots and roots_per_misc might vary on different
+ * nodes of a heterogeneous system.
+ * Calculate roots_per_misc accordingly in order to skip the redundant
+ * roots and map the DF/SMN interfaces to correct PCI roots.
+ */
+ if (gpu_root_count && gpu_misc_count) {
+ dff1 = get_gpu_df_f1();
+ if (!dff1) {
+ pr_debug("Failed to gather GPU node info.\n");
+ return -ENODEV;
+ }
+
+ if (amd_get_node_map(dff1))
+ return -ENOMEM;
+
+ gpu_roots_per_misc = gpu_root_count / gpu_misc_count;
+ }
+
+ amd_northbridges.num = misc_count + gpu_misc_count;
+ nb = kcalloc(amd_northbridges.num, sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb)
return -ENOMEM;

amd_northbridges.nb = nb;
- amd_northbridges.num = misc_count;

link = misc = root = NULL;
for (i = 0; i < amd_northbridges.num; i++) {
+ u16 misc_roots = i < misc_count ? roots_per_misc : gpu_roots_per_misc;
node_to_amd_nb(i)->root = root =
- next_northbridge(root, root_ids);
+ get_root_devs(root, root_ids, misc_roots);
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, misc_ids);
node_to_amd_nb(i)->link = link =
next_northbridge(link, link_ids);
-
- /*
- * If there are more PCI root devices than data fabric/
- * system management network interfaces, then the (N)
- * PCI roots per DF/SMN interface are functionally the
- * same (for DF/SMN access) and N-1 are redundant. N-1
- * PCI roots should be skipped per DF/SMN interface so
- * the following DF/SMN interfaces get mapped to
- * correct PCI roots.
- */
- for (j = 1; j < roots_per_misc; j++)
- root = next_northbridge(root, root_ids);
}

if (amd_gart_present())
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 011f2f1ea5bb..b3a0ec29dbd6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -557,6 +557,7 @@
#define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F3 0x167c
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d
+#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F3 0x14d3
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
--
2.25.1

Subject: [PATCH v5 2/5] EDAC/mce_amd: Extract node id from MCA_IPID

On SMCA banks of the GPU nodes, the node id information is
available in register MCA_IPID[47:44](InstanceIdHi).

Convert the hardware node ID to a value used by Linux
where GPU nodes are sequencially after the CPU nodes.

Co-developed-by: Muralidhara M K <[email protected]>
Signed-off-by: Muralidhara M K <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Reviewed-by: Yazen Ghannam <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
Changes since v4:
Add reviewed by Yazen

Changes since v3:
1. Use APIs from amd_nb to identify the gpu_node_start_id and cpu_node_count.
Which is required to map the hardware node id to node id enumerated by Linux.

Changes since v2:
1. Modified subject and commit message
2. Added Reviewed by Yazen Ghannam

Changes since v1:
1. Modified the commit message
2. rearranged the conditions before calling decode_dram_ecc()

drivers/edac/mce_amd.c | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 67dbf4c31271..af6caa76adc7 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -2,6 +2,7 @@
#include <linux/module.h>
#include <linux/slab.h>

+#include <asm/amd_nb.h>
#include <asm/cpu.h>

#include "mce_amd.h"
@@ -1072,8 +1073,27 @@ static void decode_smca_error(struct mce *m)
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);

- if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
- decode_dram_ecc(topology_die_id(m->extcpu), m);
+ if (xec == 0 && decode_dram_ecc) {
+ int node_id = 0;
+
+ if (bank_type == SMCA_UMC) {
+ node_id = topology_die_id(m->extcpu);
+ } else if (bank_type == SMCA_UMC_V2) {
+ /*
+ * SMCA_UMC_V2 exists on GPU nodes, extract the node id
+ * from register MCA_IPID[47:44](InstanceIdHi).
+ * The InstanceIdHi field represents the instance ID of the GPU.
+ * Which needs to be mapped to a value used by Linux,
+ * where GPU nodes are simply numerically after the CPU nodes.
+ */
+ node_id = ((m->ipid >> 44) & 0xF) -
+ amd_gpu_node_start_id() + amd_cpu_node_count();
+ } else {
+ return;
+ }
+
+ decode_dram_ecc(node_id, m);
+ }
}

static inline void amd_decode_err_code(u16 ec)
--
2.25.1

2021-10-27 21:34:38

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v5 1/5] x86/amd_nb: Add support for northbridges on Aldebaran

On Mon, Oct 25, 2021 at 08:20:14PM +0530, Naveen Krishna Chatradhi wrote:
...
> +/* GPU Data Fabric ID Device 24 Function 1 */
> +#define PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1 0x14d1
> +
> +static struct pci_dev *get_gpu_df_f1(void)
> +{
> + return pci_get_device(PCI_VENDOR_ID_AMD,
> + PCI_DEVICE_ID_AMD_ALDEBARAN_DF_F1, NULL);
> +}
> +
> +/* DF18xF1 registers on Aldebaran GPU */
> +#define REG_LOCAL_NODE_TYPE_MAP 0x144
> +#define REG_RMT_NODE_TYPE_MAP 0x148
> +
> +/*
> + * Newer AMD CPUs and GPUs whose data fabrics can be connected via custom xGMI
> + * links, comes with registers to gather local and remote node type map info.
> + *
> + * "Local Node Type" refers to nodes with the same type as that from which the
> + * register is read, and "Remote Node Type" refers to nodes with a different type.
> + *
> + * This function, reads the registers from GPU DF function 1.
> + * Hence, local nodes are GPU and remote nodes are CPUs.
> + */
> +static int amd_get_node_map(struct pci_dev *pdev)
> +{
> + struct amd_node_map *nodemap;
> + u32 tmp;
> +
> + nodemap = kmalloc(sizeof(*nodemap), GFP_KERNEL);
> + if (!nodemap)
> + return -ENOMEM;
> +
> + pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
> + nodemap->gpu_node_start_id = tmp & 0xFFF;
> +
> + pci_read_config_dword(pdev, REG_RMT_NODE_TYPE_MAP, &tmp);
> + nodemap->cpu_node_count = tmp >> 16 & 0xFFF;
> +
> + amd_northbridges.nodemap = nodemap;
> + return 0;
> +}
> +
...
> int amd_cache_northbridges(void)
> {
> const struct pci_device_id *misc_ids = amd_nb_misc_ids;
> const struct pci_device_id *link_ids = amd_nb_link_ids;
> const struct pci_device_id *root_ids = amd_root_ids;
> - struct pci_dev *root, *misc, *link;
> + struct pci_dev *root, *misc, *link, *dff1;
...
> + /*
> + * The number of miscs, roots and roots_per_misc might vary on different
> + * nodes of a heterogeneous system.
> + * Calculate roots_per_misc accordingly in order to skip the redundant
> + * roots and map the DF/SMN interfaces to correct PCI roots.
> + */
> + if (gpu_root_count && gpu_misc_count) {
> + dff1 = get_gpu_df_f1();

dff1 is only used in this section, so it can be declared here.

> + if (!dff1) {
> + pr_debug("Failed to gather GPU node info.\n");

This message doesn't make sense to me. The failure is that a particular PCI
device was not found. Gathering node info hasn't even been attempted yet.

> + return -ENODEV;
> + }
> +
> + if (amd_get_node_map(dff1))
> + return -ENOMEM;
> +

Also, why do these functions need to be split up? If it was because of the
return values, then you could have done the following.

int ret = amd_get_node_map();

if (ret)
return ret;

Thanks,
Yazen

2021-10-27 21:35:57

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v5 4/5] EDAC/amd64: Move struct fam_type into amd64_pvt structure

On Mon, Oct 25, 2021 at 08:20:17PM +0530, Naveen Krishna Chatradhi wrote:
> From: Muralidhara M K <[email protected]>
>
> On heterogeneous systems, the GPU nodes are probed after the CPU
> nodes and will overwrites the family type set by CPU nodes.
>
> Moving struct fam_type to struct amd64_pvt, instead of using fam_type
> as a global variable.
>
> Signed-off-by: Muralidhara M K <[email protected]>
> Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
> ---

This looks good to me.

Reviewed-by: Yazen Ghannam <[email protected]>

Thanks,
Yazen

2021-10-27 22:14:16

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v5 3/5] EDAC/amd64: Extend family ops functions

On Mon, Oct 25, 2021 at 08:20:16PM +0530, Naveen Krishna Chatradhi wrote:
...
> @@ -3106,8 +3141,9 @@ static void read_mc_regs(struct amd64_pvt *pvt)
> edac_dbg(0, " TOP_MEM2 disabled\n");
> }
>
> - if (pvt->umc) {
> - __read_mc_regs_df(pvt);
> + if (pvt->ops->get_mc_regs) {
> + pvt->ops->get_mc_regs(pvt);

This entire read_mc_regs() function can be split up like how you've done the
others. So get_mc_regs() is set for all family types. The common code can be
split out into another function.

> +
> amd64_read_pci_cfg(pvt->F0, DF_DHAR, &pvt->dhar);
>
> goto skip;
> @@ -3154,7 +3190,10 @@ static void read_mc_regs(struct amd64_pvt *pvt)
> }
>
> skip:
> - read_dct_base_mask(pvt);
> + pvt->ops->prep_chip_select(pvt);
> +
> + if (pvt->ops->get_base_mask)

This check is redundant since it's done below in per_family_init().

...
> @@ -3703,6 +3739,20 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
> return NULL;
> }
>
> + /* ops required for all the families */
> + if (!pvt->ops->early_channel_count || !pvt->ops->dbam_to_cs ||
> + !pvt->ops->prep_chip_select || !pvt->ops->get_base_mask ||
> + !pvt->ops->display_misc_regs || !pvt->ops->populate_csrows) {
> + edac_dbg(1, "Common helper routines not defined.\n");
> + return NULL;
> + }
> +
> + /* ops required for families 17h and later */
> + if (pvt->fam >= 0x17 && (!pvt->ops->get_umc_err_info || !pvt->ops->get_mc_regs)) {
> + edac_dbg(1, "Platform specific helper routines not defined.\n");
> + return NULL;
> + }
> +
> return fam_type;
> }
>

Thanks,
Yazen

2021-10-27 22:34:48

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v5 5/5] EDAC/amd64: Enumerate memory on Aldebaran GPU nodes

On Mon, Oct 25, 2021 at 08:20:18PM +0530, Naveen Krishna Chatradhi wrote:
...
> @@ -3726,10 +3944,21 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
> pvt->ops = &family_types[F17_M70H_CPUS].ops;
> pvt->fam_type->ctl_name = "F19h_M20h";
> break;
> + } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) {
> + if (pvt->mc_node_id >= amd_cpu_node_count()) {
> + pvt->fam_type = &family_types[ALDEBARAN_GPUS];
> + pvt->ops = &family_types[ALDEBARAN_GPUS].ops;
> + pvt->is_gpu = true;
> + } else {
> + pvt->fam_type = &family_types[F19_CPUS];
> + pvt->ops = &family_types[F19_CPUS].ops;
> + family_types[F19_CPUS].ctl_name = "F19h_M30h";
> + }
> + } else {
> + pvt->fam_type = &family_types[F19_CPUS];
> + pvt->ops = &family_types[F19_CPUS].ops;
> + family_types[F19_CPUS].ctl_name = "F19h";
> }
> - pvt->fam_type = &family_types[F19_CPUS];
> - pvt->ops = &family_types[F19_CPUS].ops;
> - family_types[F19_CPUS].ctl_name = "F19h";
> break;
>

Why move these lines into the else clause above?

Thanks,
Yazen