2023-05-15 11:54:06

by M K, Muralidhara

[permalink] [raw]
Subject: [PATCH 0/5] AMD64 EDAC GPU Updates

From: Muralidhara M K <[email protected]>

This set adds GPU support to AMD64 EDAC starting with the MI200
(Aldebaran) series.
The AMD Instinct™ MI200 series accelerators are the data center GPUs.

Patch 1:
Adds PCI IDs without changing the existing AMD NB code.

Patch 2:
Recognizes the new UMC SMCA bank type in the decoder module as just
another UMC bank. Decode details are done in the AMD64 EDAC module.

Patch 3:
Adds code documentation on how the GPU memory is laid out in EDAC.

Patch 4:
Adds the bulk of the GPU-specific code.

Patch 5:
Handle differences in "AMD Node" enumeration for GPU dies.

Muralidhara M K (2):
EDAC/amd64: Document heterogeneous system enumeration
EDAC/amd64: Add support for AMD heterogeneous Family 19h Model 30h-3Fh

Yazen Ghannam (3):
x86/amd_nb: Add MI200 PCI IDs
x86/MCE/AMD, EDAC/mce_amd: Decode UMC_V2 ECC errors
EDAC/amd64: Cache and use GPU node map

Documentation/driver-api/edac.rst | 120 ++++++++++
arch/x86/kernel/amd_nb.c | 5 +
arch/x86/kernel/cpu/mce/amd.c | 6 +-
drivers/edac/amd64_edac.c | 386 +++++++++++++++++++++++++++---
drivers/edac/amd64_edac.h | 1 +
drivers/edac/mce_amd.c | 3 +-
include/linux/pci_ids.h | 1 +
7 files changed, 488 insertions(+), 34 deletions(-)

--
2.25.1



2023-05-15 11:54:55

by M K, Muralidhara

[permalink] [raw]
Subject: [PATCH 4/5] EDAC/amd64: Add support for AMD heterogeneous Family 19h Model 30h-3Fh

From: Muralidhara M K <[email protected]>

AMD Family 19h Model 30h-3Fh systems can be connected to AMD MI200
accelerator/GPU devices such that the CPU and GPU data fabrics are
connected together. In this configuration, the CPU manages error logging
and reporting for MCA banks located on the GPUs. This includes HBM memory
errors reported from Unified Memory Controllers (UMCs) on the GPUs.
The GPU memory errors are handled like CPU memory errors.

AMD CPU UMC support in EDAC can be re-used for GPU UMC support. However,
keeping them separate means drastic changes in one path (e.g. to support
newer products) should have less impact on the other path.

Also, simplify the "gpu_" helper functions where possible. GPU product
configuration, like memory type and channel count, is fixed compared to
CPU products.

GPU UMCs each have four physical connections (phys) connected to eight
channels. There is a single "chip select". This differs from CPUs where
each UMC has one physical connection connected to one channel, and each
channel has up to four "chip selects".

Enumerate each UMC "phy" as an EDAC CSROW, since there is only a single
chip select for each physical connection. This is similar to how a CPU
UMC "phy" is enumerated as an EDAC CHANNEL, since there is only a single
channel for each physical connection.

Signed-off-by: Muralidhara M K <[email protected]>
Co-developed-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Co-developed-by: Yazen Ghannam <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
---
drivers/edac/amd64_edac.c | 310 ++++++++++++++++++++++++++++++++++----
1 file changed, 279 insertions(+), 31 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5c4292e65b96..28155b01f144 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1426,12 +1426,47 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}

+static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
+ int csrow_nr, int dimm)
+{
+ u32 msb, weight, num_zero_bits;
+ u32 addr_mask_deinterleaved;
+ int size = 0;
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
+
+ /* Return size in MBs. */
+ return size >> 10;
+}
+
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig, addr_mask_deinterleaved;
- u32 msb, weight, num_zero_bits;
int cs_mask_nr = csrow_nr;
+ u32 addr_mask_orig;
int dimm, size = 0;

/* No Chip Selects are enabled. */
@@ -1475,33 +1510,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
else
addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];

- /*
- * The number of zero bits in the mask is equal to the number of bits
- * in a full mask minus the number of bits in the current mask.
- *
- * The MSB is the number of bits in the full mask because BIT[0] is
- * always 0.
- *
- * In the special 3 Rank interleaving case, a single bit is flipped
- * without swapping with the most significant bit. This can be handled
- * by keeping the MSB where it is and ignoring the single zero bit.
- */
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
-
- /* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
-
- edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
-
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
-
- /* Return size in MBs. */
- return size >> 10;
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
}

static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3675,6 +3684,221 @@ static int umc_hw_info_get(struct amd64_pvt *pvt)
return 0;
}

+/*
+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
+ * longer works as a channel number.
+ *
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
+ * However, the IDs are split such that two UMC values go to one UMC, and
+ * the channel numbers are split in two groups of four.
+ *
+ * Refer to comment on gpu_get_umc_base().
+ *
+ * For example,
+ * UMC0 CH[3:0] = 0x0005[3:0]000
+ * UMC0 CH[7:4] = 0x0015[3:0]000
+ * UMC1 CH[3:0] = 0x0025[3:0]000
+ * UMC1 CH[7:4] = 0x0035[3:0]000
+ */
+static void gpu_get_err_info(struct mce *m, struct err_info *err)
+{
+ u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
+ u8 phy = ((m->ipid >> 12) & 0xf);
+
+ err->channel = ch % 2 ? phy + 4 : phy;
+ err->csrow = phy;
+}
+
+static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+}
+
+static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
+{
+ int size, cs_mode, cs = 0;
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
+
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ for_each_chip_select(cs, ctrl, pvt) {
+ size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs);
+ amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size);
+ }
+}
+
+static void gpu_dump_misc_regs(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i;
+
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
+
+ edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
+ edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
+ edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
+ edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i);
+
+ gpu_debug_display_dimm_sizes(pvt, i);
+ }
+}
+
+static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+{
+ u32 nr_pages;
+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr);
+ nr_pages <<= 20 - PAGE_SHIFT;
+
+ edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
+
+ return nr_pages;
+}
+
+static void gpu_init_csrows(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ u8 umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ dimm = mci->csrows[umc]->channels[cs]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs);
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = MEM_HBM2;
+ dimm->dtype = DEV_X16;
+ dimm->grain = 64;
+ }
+ }
+}
+
+static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ mci->mtype_cap = MEM_FLAG_HBM2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->edac_cap = EDAC_FLAG_EC;
+ mci->mod_name = EDAC_MOD_STR;
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F3);
+ mci->ctl_page_to_phys = NULL;
+
+ gpu_init_csrows(mci);
+}
+
+/* ECC is enabled by default on GPU nodes */
+static bool gpu_ecc_enabled(struct amd64_pvt *pvt)
+{
+ return true;
+}
+
+static inline u32 gpu_get_umc_base(u8 umc, u8 channel)
+{
+ /*
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
+ * channel numbering. On GPUs, there are eight channels per UMC,
+ * so the channel numbering is different from UMC numbering.
+ *
+ * On CPU nodes channels are selected in 6th nibble
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
+ *
+ * On GPU nodes channels are selected in 3rd nibble
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
+ */
+ umc *= 2;
+
+ if (channel >= 4)
+ umc++;
+
+ return 0x50000 + (umc << 20) + ((channel % 4) << 12);
+}
+
+static void gpu_read_mc_regs(struct amd64_pvt *pvt)
+{
+ u8 nid = pvt->mc_node_id;
+ struct amd64_umc *umc;
+ u32 i, umc_base;
+
+ /* Read registers from each UMC */
+ for_each_umc(i) {
+ umc_base = gpu_get_umc_base(i, 0);
+ umc = &pvt->umc[i];
+
+ amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
+ amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
+ amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
+ }
+}
+
+static void gpu_read_base_mask(struct amd64_pvt *pvt)
+{
+ u32 base_reg, mask_reg;
+ u32 *base, *mask;
+ int umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ base_reg = gpu_get_umc_base(umc, cs) + UMCCH_BASE_ADDR;
+ base = &pvt->csels[umc].csbases[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+ }
+
+ mask_reg = gpu_get_umc_base(umc, cs) + UMCCH_ADDR_MASK;
+ mask = &pvt->csels[umc].csmasks[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+ }
+ }
+ }
+}
+
+static void gpu_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 8;
+ pvt->csels[umc].m_cnt = 8;
+ }
+}
+
+static int gpu_hw_info_get(struct amd64_pvt *pvt)
+{
+ pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
+
+ gpu_prep_chip_selects(pvt);
+ gpu_read_base_mask(pvt);
+ gpu_read_mc_regs(pvt);
+
+ return 0;
+}
+
static void hw_info_put(struct amd64_pvt *pvt)
{
pci_dev_put(pvt->F1);
@@ -3690,6 +3914,14 @@ static struct low_ops umc_ops = {
.get_err_info = umc_get_err_info,
};

+static struct low_ops gpu_ops = {
+ .hw_info_get = gpu_hw_info_get,
+ .ecc_enabled = gpu_ecc_enabled,
+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs,
+ .dump_misc_regs = gpu_dump_misc_regs,
+ .get_err_info = gpu_get_err_info,
+};
+
/* Use Family 16h versions for defaults and adjust as needed below. */
static struct low_ops dct_ops = {
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -3813,6 +4045,16 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x20 ... 0x2f:
pvt->ctl_name = "F19h_M20h";
break;
+ case 0x30 ... 0x3f:
+ if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
+ pvt->ctl_name = "MI200";
+ pvt->max_mcs = 4;
+ pvt->ops = &gpu_ops;
+ } else {
+ pvt->ctl_name = "F19h_M30h";
+ pvt->max_mcs = 8;
+ }
+ break;
case 0x50 ... 0x5f:
pvt->ctl_name = "F19h_M50h";
break;
@@ -3846,11 +4088,17 @@ static int init_one_instance(struct amd64_pvt *pvt)
struct edac_mc_layer layers[2];
int ret = -ENOMEM;

+ /*
+ * For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
+ * be swapped to fit into the layers.
+ */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->max_mcs : pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = pvt->max_mcs;
+ layers[1].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->csels[0].b_cnt : pvt->max_mcs;
layers[1].is_virt_csrow = false;

mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
--
2.25.1


2023-05-15 11:55:08

by M K, Muralidhara

[permalink] [raw]
Subject: [PATCH 3/5] EDAC/amd64: Document heterogeneous system enumeration

From: Muralidhara M K <[email protected]>

Document High Bandwidth Memory (HBM) and AMD heterogeneous system
topology and enumeration.

[ bp: Simplify and de-marketize, unify, massage. ]

Signed-off-by: Muralidhara M K <[email protected]>
Co-developed-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
---
Documentation/driver-api/edac.rst | 120 ++++++++++++++++++++++++++++++
1 file changed, 120 insertions(+)

diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst
index b8c742aa0a71..f4f044b95c4f 100644
--- a/Documentation/driver-api/edac.rst
+++ b/Documentation/driver-api/edac.rst
@@ -106,6 +106,16 @@ will occupy those chip-select rows.
This term is avoided because it is unclear when needing to distinguish
between chip-select rows and socket sets.

+* High Bandwidth Memory (HBM)
+
+HBM is a new memory type with low power consumption and ultra-wide
+communication lanes. It uses vertically stacked memory chips (DRAM dies)
+interconnected by microscopic wires called "through-silicon vias," or
+TSVs.
+
+Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast
+interconnect called the "interposer". Therefore, HBM's characteristics
+are nearly indistinguishable from on-chip integrated RAM.

Memory Controllers
------------------
@@ -176,3 +186,113 @@ nodes::
the L1 and L2 directories would be "edac_device_block's"

.. kernel-doc:: drivers/edac/edac_device.h
+
+
+Heterogeneous system support
+----------------------------
+
+An AMD heterogeneous system is built by connecting the data fabrics of
+both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the
+GPU nodes can be accessed the same way as the data fabric on CPU nodes.
+
+The MI200 accelerators are data center GPUs. They have 2 data fabrics,
+and each GPU data fabric contains four Unified Memory Controllers (UMC).
+Each UMC contains eight channels. Each UMC channel controls one 128-bit
+HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total
+of 4096-bits of DRAM data bus.
+
+While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC
+channel is interfacing 2GB of DRAM (represented as rank).
+
+Memory controllers on AMD GPU nodes can be represented in EDAC thusly:
+
+ GPU DF / GPU Node -> EDAC MC
+ GPU UMC -> EDAC CSROW
+ GPU UMC channel -> EDAC CHANNEL
+
+For example: a heterogeneous system with 1 AMD CPU is connected to
+4 MI200 (Aldebaran) GPUs using xGMI.
+
+Some more heterogeneous hardware details:
+
+- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC.
+ They have chip selects (csrows) and channels. However, the layouts are different
+ for performance, physical layout, or other reasons.
+- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the
+ marketing speak. CPU has X memory channels, etc.
+- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW.
+- GPU UMCs use 1 chip select, So UMC = EDAC CSROW.
+- GPU UMCs use 8 channels, So UMC channel = EDAC channel.
+
+The EDAC subsystem provides a mechanism to handle AMD heterogeneous
+systems by calling system specific ops for both CPUs and GPUs.
+
+AMD GPU nodes are enumerated in sequential order based on the PCI
+hierarchy, and the first GPU node is assumed to have a Node ID value
+following those of the CPU nodes after latter are fully populated::
+
+ $ ls /sys/devices/system/edac/mc/
+ mc0 - CPU MC node 0
+ mc1 |
+ mc2 |- GPU card[0] => node 0(mc1), node 1(mc2)
+ mc3 |
+ mc4 |- GPU card[1] => node 0(mc3), node 1(mc4)
+ mc5 |
+ mc6 |- GPU card[2] => node 0(mc5), node 1(mc6)
+ mc7 |
+ mc8 |- GPU card[3] => node 0(mc7), node 1(mc8)
+
+For example, a heterogeneous system with one AMD CPU is connected to
+four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented
+via the following sysfs entries::
+
+ /sys/devices/system/edac/mc/..
+
+ CPU # CPU node
+ ├── mc 0
+
+ GPU Nodes are enumerated sequentially after CPU nodes have been populated
+ GPU card 1 # Each MI200 GPU has 2 nodes/mcs
+ ├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs
+ │   ├── csrow 0 # UMC 0
+ │   │   ├── channel 0 # Each UMC has 8 channels
+ │   │   ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB
+ │   │   ├── channel 2
+ │   │   ├── channel 3
+ │   │   ├── channel 4
+ │   │   ├── channel 5
+ │   │   ├── channel 6
+ │   │   ├── channel 7
+ │   ├── csrow 1 # UMC 1
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── .. ..
+ │   ├── csrow 3 # UMC 3
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── rank 0
+ │   ├── .. ..
+ │   ├── rank 31 # total 32 ranks/dimms from 4 UMCs
+ ├
+ ├── mc 2 # GPU node 1 == mc2
+ │   ├── .. # each GPU has total 64 GB
+
+ GPU card 2
+ ├── mc 3
+ │   ├── ..
+ ├── mc 4
+ │   ├── ..
+
+ GPU card 3
+ ├── mc 5
+ │   ├── ..
+ ├── mc 6
+ │   ├── ..
+
+ GPU card 4
+ ├── mc 7
+ │   ├── ..
+ ├── mc 8
+ │   ├── ..
--
2.25.1


2023-05-15 11:55:11

by M K, Muralidhara

[permalink] [raw]
Subject: [PATCH 2/5] x86/MCE/AMD, EDAC/mce_amd: Decode UMC_V2 ECC errors

From: Yazen Ghannam <[email protected]>

The MI200 (Aldebaran) series of devices introduced a new SMCA bank type
for Unified Memory Controllers. The MCE subsystem already has support
for this new type. The MCE decoder module will decode the common MCA
error information for the new bank type, but it will not pass the
information to the AMD64 EDAC module for detailed memory error decoding.

Have the MCE decoder module recognize the new bank type as an SMCA UMC
memory error and pass the MCA information to AMD64 EDAC.

Signed-off-by: Yazen Ghannam <[email protected]>
Co-developed-by: Muralidhara M K <[email protected]>
Signed-off-by: Muralidhara M K <[email protected]>
---
arch/x86/kernel/cpu/mce/amd.c | 6 ++++--
drivers/edac/mce_amd.c | 3 ++-
2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 0b971f974096..5e74610b39e7 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)

bool amd_mce_is_memory_error(struct mce *m)
{
+ enum smca_bank_types bank_type;
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;

+ bank_type = smca_get_bank_type(m->extcpu, m->bank);
if (mce_flags.smca)
- return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
+ return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;

return m->bank == 4 && xec == 0x8;
}
@@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;

- if (b && bank_type == SMCA_UMC) {
+ if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
return NULL;
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index cc5c63feb26a..9215c06783df 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1186,7 +1186,8 @@ static void decode_smca_error(struct mce *m)
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);

- if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
+ if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
+ xec == 0 && decode_dram_ecc)
decode_dram_ecc(topology_die_id(m->extcpu), m);
}

--
2.25.1


Subject: [tip: ras/core] EDAC/amd64: Document heterogeneous system enumeration

The following commit has been merged into the ras/core branch of tip:

Commit-ID: 4f3fa571a48feb56e7ed1978a27983b89dd2107a
Gitweb: https://git.kernel.org/tip/4f3fa571a48feb56e7ed1978a27983b89dd2107a
Author: Muralidhara M K <[email protected]>
AuthorDate: Mon, 15 May 2023 11:35:35
Committer: Borislav Petkov (AMD) <[email protected]>
CommitterDate: Mon, 05 Jun 2023 12:27:15 +02:00

EDAC/amd64: Document heterogeneous system enumeration

Document High Bandwidth Memory (HBM) and AMD heterogeneous system
topology and enumeration.

[ bp: Simplify and de-marketize, unify, massage. ]

Signed-off-by: Muralidhara M K <[email protected]>
Co-developed-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
Documentation/driver-api/edac.rst | 120 +++++++++++++++++++++++++++++-
1 file changed, 120 insertions(+)

diff --git a/Documentation/driver-api/edac.rst b/Documentation/driver-api/edac.rst
index b8c742a..f4f044b 100644
--- a/Documentation/driver-api/edac.rst
+++ b/Documentation/driver-api/edac.rst
@@ -106,6 +106,16 @@ will occupy those chip-select rows.
This term is avoided because it is unclear when needing to distinguish
between chip-select rows and socket sets.

+* High Bandwidth Memory (HBM)
+
+HBM is a new memory type with low power consumption and ultra-wide
+communication lanes. It uses vertically stacked memory chips (DRAM dies)
+interconnected by microscopic wires called "through-silicon vias," or
+TSVs.
+
+Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast
+interconnect called the "interposer". Therefore, HBM's characteristics
+are nearly indistinguishable from on-chip integrated RAM.

Memory Controllers
------------------
@@ -176,3 +186,113 @@ nodes::
the L1 and L2 directories would be "edac_device_block's"

.. kernel-doc:: drivers/edac/edac_device.h
+
+
+Heterogeneous system support
+----------------------------
+
+An AMD heterogeneous system is built by connecting the data fabrics of
+both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the
+GPU nodes can be accessed the same way as the data fabric on CPU nodes.
+
+The MI200 accelerators are data center GPUs. They have 2 data fabrics,
+and each GPU data fabric contains four Unified Memory Controllers (UMC).
+Each UMC contains eight channels. Each UMC channel controls one 128-bit
+HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total
+of 4096-bits of DRAM data bus.
+
+While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC
+channel is interfacing 2GB of DRAM (represented as rank).
+
+Memory controllers on AMD GPU nodes can be represented in EDAC thusly:
+
+ GPU DF / GPU Node -> EDAC MC
+ GPU UMC -> EDAC CSROW
+ GPU UMC channel -> EDAC CHANNEL
+
+For example: a heterogeneous system with 1 AMD CPU is connected to
+4 MI200 (Aldebaran) GPUs using xGMI.
+
+Some more heterogeneous hardware details:
+
+- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC.
+ They have chip selects (csrows) and channels. However, the layouts are different
+ for performance, physical layout, or other reasons.
+- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the
+ marketing speak. CPU has X memory channels, etc.
+- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW.
+- GPU UMCs use 1 chip select, So UMC = EDAC CSROW.
+- GPU UMCs use 8 channels, So UMC channel = EDAC channel.
+
+The EDAC subsystem provides a mechanism to handle AMD heterogeneous
+systems by calling system specific ops for both CPUs and GPUs.
+
+AMD GPU nodes are enumerated in sequential order based on the PCI
+hierarchy, and the first GPU node is assumed to have a Node ID value
+following those of the CPU nodes after latter are fully populated::
+
+ $ ls /sys/devices/system/edac/mc/
+ mc0 - CPU MC node 0
+ mc1 |
+ mc2 |- GPU card[0] => node 0(mc1), node 1(mc2)
+ mc3 |
+ mc4 |- GPU card[1] => node 0(mc3), node 1(mc4)
+ mc5 |
+ mc6 |- GPU card[2] => node 0(mc5), node 1(mc6)
+ mc7 |
+ mc8 |- GPU card[3] => node 0(mc7), node 1(mc8)
+
+For example, a heterogeneous system with one AMD CPU is connected to
+four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented
+via the following sysfs entries::
+
+ /sys/devices/system/edac/mc/..
+
+ CPU # CPU node
+ ├── mc 0
+
+ GPU Nodes are enumerated sequentially after CPU nodes have been populated
+ GPU card 1 # Each MI200 GPU has 2 nodes/mcs
+ ├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs
+ │   ├── csrow 0 # UMC 0
+ │   │   ├── channel 0 # Each UMC has 8 channels
+ │   │   ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB
+ │   │   ├── channel 2
+ │   │   ├── channel 3
+ │   │   ├── channel 4
+ │   │   ├── channel 5
+ │   │   ├── channel 6
+ │   │   ├── channel 7
+ │   ├── csrow 1 # UMC 1
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── .. ..
+ │   ├── csrow 3 # UMC 3
+ │   │   ├── channel 0
+ │   │   ├── ..
+ │   │   ├── channel 7
+ │   ├── rank 0
+ │   ├── .. ..
+ │   ├── rank 31 # total 32 ranks/dimms from 4 UMCs
+ ├
+ ├── mc 2 # GPU node 1 == mc2
+ │   ├── .. # each GPU has total 64 GB
+
+ GPU card 2
+ ├── mc 3
+ │   ├── ..
+ ├── mc 4
+ │   ├── ..
+
+ GPU card 3
+ ├── mc 5
+ │   ├── ..
+ ├── mc 6
+ │   ├── ..
+
+ GPU card 4
+ ├── mc 7
+ │   ├── ..
+ ├── mc 8
+ │   ├── ..

Subject: [tip: ras/core] x86/MCE/AMD, EDAC/mce_amd: Decode UMC_V2 ECC errors

The following commit has been merged into the ras/core branch of tip:

Commit-ID: c35977b00fa76ce5f3fe9afdb9cffda970c943d5
Gitweb: https://git.kernel.org/tip/c35977b00fa76ce5f3fe9afdb9cffda970c943d5
Author: Yazen Ghannam <[email protected]>
AuthorDate: Mon, 15 May 2023 11:35:34
Committer: Borislav Petkov (AMD) <[email protected]>
CommitterDate: Mon, 05 Jun 2023 12:27:11 +02:00

x86/MCE/AMD, EDAC/mce_amd: Decode UMC_V2 ECC errors

The MI200 (Aldebaran) series of devices introduced a new SMCA bank type
for Unified Memory Controllers. The MCE subsystem already has support
for this new type. The MCE decoder module will decode the common MCA
error information for the new bank type, but it will not pass the
information to the AMD64 EDAC module for detailed memory error decoding.

Have the MCE decoder module recognize the new bank type as an SMCA UMC
memory error and pass the MCA information to AMD64 EDAC.

Signed-off-by: Yazen Ghannam <[email protected]>
Co-developed-by: Muralidhara M K <[email protected]>
Signed-off-by: Muralidhara M K <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/mce/amd.c | 6 ++++--
drivers/edac/mce_amd.c | 3 ++-
2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 0b971f9..5e74610 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -715,11 +715,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)

bool amd_mce_is_memory_error(struct mce *m)
{
+ enum smca_bank_types bank_type;
/* ErrCodeExt[20:16] */
u8 xec = (m->status >> 16) & 0x1f;

+ bank_type = smca_get_bank_type(m->extcpu, m->bank);
if (mce_flags.smca)
- return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
+ return (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) && xec == 0x0;

return m->bank == 4 && xec == 0x8;
}
@@ -1050,7 +1052,7 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;

- if (b && bank_type == SMCA_UMC) {
+ if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
return NULL;
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index cc5c63f..9215c06 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1186,7 +1186,8 @@ static void decode_smca_error(struct mce *m)
if (xec < smca_mce_descs[bank_type].num_descs)
pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);

- if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
+ if ((bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2) &&
+ xec == 0 && decode_dram_ecc)
decode_dram_ecc(topology_die_id(m->extcpu), m);
}


Subject: [tip: ras/core] EDAC/amd64: Add support for AMD heterogeneous Family 19h Model 30h-3Fh

The following commit has been merged into the ras/core branch of tip:

Commit-ID: 9c42edd571aa4f8b2125b71e3924eeb0f6a54af1
Gitweb: https://git.kernel.org/tip/9c42edd571aa4f8b2125b71e3924eeb0f6a54af1
Author: Muralidhara M K <[email protected]>
AuthorDate: Mon, 15 May 2023 11:35:36
Committer: Borislav Petkov (AMD) <[email protected]>
CommitterDate: Mon, 05 Jun 2023 12:27:18 +02:00

EDAC/amd64: Add support for AMD heterogeneous Family 19h Model 30h-3Fh

AMD Family 19h Model 30h-3Fh systems can be connected to AMD MI200
accelerator/GPU devices such that the CPU and GPU data fabrics are
connected together. In this configuration, the CPU manages error logging
and reporting for MCA banks located on the GPUs. This includes HBM memory
errors reported from Unified Memory Controllers (UMCs) on the GPUs.
The GPU memory errors are handled like CPU memory errors.

AMD CPU UMC support in EDAC can be re-used for GPU UMC support. However,
keeping them separate means drastic changes in one path (e.g. to support
newer products) should have less impact on the other path.

Also, simplify the "gpu_" helper functions where possible. GPU product
configuration, like memory type and channel count, is fixed compared to
CPU products.

GPU UMCs each have four physical connections (phys) connected to eight
channels. There is a single "chip select". This differs from CPUs where
each UMC has one physical connection connected to one channel, and each
channel has up to four "chip selects".

Enumerate each UMC "phy" as an EDAC CSROW, since there is only a single
chip select for each physical connection. This is similar to how a CPU
UMC "phy" is enumerated as an EDAC CHANNEL, since there is only a single
channel for each physical connection.

Signed-off-by: Muralidhara M K <[email protected]>
Co-developed-by: Naveen Krishna Chatradhi <[email protected]>
Signed-off-by: Naveen Krishna Chatradhi <[email protected]>
Co-developed-by: Yazen Ghannam <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
drivers/edac/amd64_edac.c | 310 +++++++++++++++++++++++++++++++++----
1 file changed, 279 insertions(+), 31 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5c4292e..28155b0 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1426,12 +1426,47 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}

+static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
+ int csrow_nr, int dimm)
+{
+ u32 msb, weight, num_zero_bits;
+ u32 addr_mask_deinterleaved;
+ int size = 0;
+
+ /*
+ * The number of zero bits in the mask is equal to the number of bits
+ * in a full mask minus the number of bits in the current mask.
+ *
+ * The MSB is the number of bits in the full mask because BIT[0] is
+ * always 0.
+ *
+ * In the special 3 Rank interleaving case, a single bit is flipped
+ * without swapping with the most significant bit. This can be handled
+ * by keeping the MSB where it is and ignoring the single zero bit.
+ */
+ msb = fls(addr_mask_orig) - 1;
+ weight = hweight_long(addr_mask_orig);
+ num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
+
+ /* Take the number of zero bits off from the top of the mask. */
+ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+
+ edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
+ edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+
+ /* Register [31:1] = Address [39:9]. Size is in kBs here. */
+ size = (addr_mask_deinterleaved >> 2) + 1;
+
+ /* Return size in MBs. */
+ return size >> 10;
+}
+
static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig, addr_mask_deinterleaved;
- u32 msb, weight, num_zero_bits;
int cs_mask_nr = csrow_nr;
+ u32 addr_mask_orig;
int dimm, size = 0;

/* No Chip Selects are enabled. */
@@ -1475,33 +1510,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
else
addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];

- /*
- * The number of zero bits in the mask is equal to the number of bits
- * in a full mask minus the number of bits in the current mask.
- *
- * The MSB is the number of bits in the full mask because BIT[0] is
- * always 0.
- *
- * In the special 3 Rank interleaving case, a single bit is flipped
- * without swapping with the most significant bit. This can be handled
- * by keeping the MSB where it is and ignoring the single zero bit.
- */
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
- num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
-
- /* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
-
- edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
-
- /* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
-
- /* Return size in MBs. */
- return size >> 10;
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
}

static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3675,6 +3684,221 @@ static int umc_hw_info_get(struct amd64_pvt *pvt)
return 0;
}

+/*
+ * The CPUs have one channel per UMC, so UMC number is equivalent to a
+ * channel number. The GPUs have 8 channels per UMC, so the UMC number no
+ * longer works as a channel number.
+ *
+ * The channel number within a GPU UMC is given in MCA_IPID[15:12].
+ * However, the IDs are split such that two UMC values go to one UMC, and
+ * the channel numbers are split in two groups of four.
+ *
+ * Refer to comment on gpu_get_umc_base().
+ *
+ * For example,
+ * UMC0 CH[3:0] = 0x0005[3:0]000
+ * UMC0 CH[7:4] = 0x0015[3:0]000
+ * UMC1 CH[3:0] = 0x0025[3:0]000
+ * UMC1 CH[7:4] = 0x0035[3:0]000
+ */
+static void gpu_get_err_info(struct mce *m, struct err_info *err)
+{
+ u8 ch = (m->ipid & GENMASK(31, 0)) >> 20;
+ u8 phy = ((m->ipid >> 12) & 0xf);
+
+ err->channel = ch % 2 ? phy + 4 : phy;
+ err->csrow = phy;
+}
+
+static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
+ unsigned int cs_mode, int csrow_nr)
+{
+ u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+
+ return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+}
+
+static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
+{
+ int size, cs_mode, cs = 0;
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "UMC%d chip selects:\n", ctrl);
+
+ cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ for_each_chip_select(cs, ctrl, pvt) {
+ size = gpu_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs);
+ amd64_info(EDAC_MC ": %d: %5dMB\n", cs, size);
+ }
+}
+
+static void gpu_dump_misc_regs(struct amd64_pvt *pvt)
+{
+ struct amd64_umc *umc;
+ u32 i;
+
+ for_each_umc(i) {
+ umc = &pvt->umc[i];
+
+ edac_dbg(1, "UMC%d UMC cfg: 0x%x\n", i, umc->umc_cfg);
+ edac_dbg(1, "UMC%d SDP ctrl: 0x%x\n", i, umc->sdp_ctrl);
+ edac_dbg(1, "UMC%d ECC ctrl: 0x%x\n", i, umc->ecc_ctrl);
+ edac_dbg(1, "UMC%d All HBMs support ECC: yes\n", i);
+
+ gpu_debug_display_dimm_sizes(pvt, i);
+ }
+}
+
+static u32 gpu_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
+{
+ u32 nr_pages;
+ int cs_mode = CS_EVEN_PRIMARY | CS_ODD_PRIMARY;
+
+ nr_pages = gpu_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr);
+ nr_pages <<= 20 - PAGE_SHIFT;
+
+ edac_dbg(0, "csrow: %d, channel: %d\n", csrow_nr, dct);
+ edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
+
+ return nr_pages;
+}
+
+static void gpu_init_csrows(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ struct dimm_info *dimm;
+ u8 umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ if (!csrow_enabled(cs, umc, pvt))
+ continue;
+
+ dimm = mci->csrows[umc]->channels[cs]->dimm;
+
+ edac_dbg(1, "MC node: %d, csrow: %d\n",
+ pvt->mc_node_id, cs);
+
+ dimm->nr_pages = gpu_get_csrow_nr_pages(pvt, umc, cs);
+ dimm->edac_mode = EDAC_SECDED;
+ dimm->mtype = MEM_HBM2;
+ dimm->dtype = DEV_X16;
+ dimm->grain = 64;
+ }
+ }
+}
+
+static void gpu_setup_mci_misc_attrs(struct mem_ctl_info *mci)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+
+ mci->mtype_cap = MEM_FLAG_HBM2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+
+ mci->edac_cap = EDAC_FLAG_EC;
+ mci->mod_name = EDAC_MOD_STR;
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F3);
+ mci->ctl_page_to_phys = NULL;
+
+ gpu_init_csrows(mci);
+}
+
+/* ECC is enabled by default on GPU nodes */
+static bool gpu_ecc_enabled(struct amd64_pvt *pvt)
+{
+ return true;
+}
+
+static inline u32 gpu_get_umc_base(u8 umc, u8 channel)
+{
+ /*
+ * On CPUs, there is one channel per UMC, so UMC numbering equals
+ * channel numbering. On GPUs, there are eight channels per UMC,
+ * so the channel numbering is different from UMC numbering.
+ *
+ * On CPU nodes channels are selected in 6th nibble
+ * UMC chY[3:0]= [(chY*2 + 1) : (chY*2)]50000;
+ *
+ * On GPU nodes channels are selected in 3rd nibble
+ * HBM chX[3:0]= [Y ]5X[3:0]000;
+ * HBM chX[7:4]= [Y+1]5X[3:0]000
+ */
+ umc *= 2;
+
+ if (channel >= 4)
+ umc++;
+
+ return 0x50000 + (umc << 20) + ((channel % 4) << 12);
+}
+
+static void gpu_read_mc_regs(struct amd64_pvt *pvt)
+{
+ u8 nid = pvt->mc_node_id;
+ struct amd64_umc *umc;
+ u32 i, umc_base;
+
+ /* Read registers from each UMC */
+ for_each_umc(i) {
+ umc_base = gpu_get_umc_base(i, 0);
+ umc = &pvt->umc[i];
+
+ amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
+ amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
+ amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
+ }
+}
+
+static void gpu_read_base_mask(struct amd64_pvt *pvt)
+{
+ u32 base_reg, mask_reg;
+ u32 *base, *mask;
+ int umc, cs;
+
+ for_each_umc(umc) {
+ for_each_chip_select(cs, umc, pvt) {
+ base_reg = gpu_get_umc_base(umc, cs) + UMCCH_BASE_ADDR;
+ base = &pvt->csels[umc].csbases[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) {
+ edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *base, base_reg);
+ }
+
+ mask_reg = gpu_get_umc_base(umc, cs) + UMCCH_ADDR_MASK;
+ mask = &pvt->csels[umc].csmasks[cs];
+
+ if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) {
+ edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n",
+ umc, cs, *mask, mask_reg);
+ }
+ }
+ }
+}
+
+static void gpu_prep_chip_selects(struct amd64_pvt *pvt)
+{
+ int umc;
+
+ for_each_umc(umc) {
+ pvt->csels[umc].b_cnt = 8;
+ pvt->csels[umc].m_cnt = 8;
+ }
+}
+
+static int gpu_hw_info_get(struct amd64_pvt *pvt)
+{
+ pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
+ if (!pvt->umc)
+ return -ENOMEM;
+
+ gpu_prep_chip_selects(pvt);
+ gpu_read_base_mask(pvt);
+ gpu_read_mc_regs(pvt);
+
+ return 0;
+}
+
static void hw_info_put(struct amd64_pvt *pvt)
{
pci_dev_put(pvt->F1);
@@ -3690,6 +3914,14 @@ static struct low_ops umc_ops = {
.get_err_info = umc_get_err_info,
};

+static struct low_ops gpu_ops = {
+ .hw_info_get = gpu_hw_info_get,
+ .ecc_enabled = gpu_ecc_enabled,
+ .setup_mci_misc_attrs = gpu_setup_mci_misc_attrs,
+ .dump_misc_regs = gpu_dump_misc_regs,
+ .get_err_info = gpu_get_err_info,
+};
+
/* Use Family 16h versions for defaults and adjust as needed below. */
static struct low_ops dct_ops = {
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -3813,6 +4045,16 @@ static int per_family_init(struct amd64_pvt *pvt)
case 0x20 ... 0x2f:
pvt->ctl_name = "F19h_M20h";
break;
+ case 0x30 ... 0x3f:
+ if (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) {
+ pvt->ctl_name = "MI200";
+ pvt->max_mcs = 4;
+ pvt->ops = &gpu_ops;
+ } else {
+ pvt->ctl_name = "F19h_M30h";
+ pvt->max_mcs = 8;
+ }
+ break;
case 0x50 ... 0x5f:
pvt->ctl_name = "F19h_M50h";
break;
@@ -3846,11 +4088,17 @@ static int init_one_instance(struct amd64_pvt *pvt)
struct edac_mc_layer layers[2];
int ret = -ENOMEM;

+ /*
+ * For Heterogeneous family EDAC CHIP_SELECT and CHANNEL layers should
+ * be swapped to fit into the layers.
+ */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
- layers[0].size = pvt->csels[0].b_cnt;
+ layers[0].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->max_mcs : pvt->csels[0].b_cnt;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
- layers[1].size = pvt->max_mcs;
+ layers[1].size = (pvt->F3->device == PCI_DEVICE_ID_AMD_MI200_DF_F3) ?
+ pvt->csels[0].b_cnt : pvt->max_mcs;
layers[1].is_virt_csrow = false;

mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);