2022-02-03 20:52:54

by William Roche

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] EDAC/amd64: Add new register offset support and related changes

This version is clearer according to me, and the dimm value can continue
to be used in the debug messages at the end of the
f17_addr_mask_to_cs_size function.
Thank you.

Reviewed-by: William Roche <[email protected]>

W.


On 02/02/2022 15:43, Yazen Ghannam wrote:
> Introduce a "family flags" bitmask that can be used to indicate any
> special behavior needed on a per-family basis.
>
> Add a flag to indicate a system uses the new register offsets introduced
> with Family 19h Model 10h.
>
> Use this flag to account for register offset changes, a new bitfield
> indicating DDR5 use on a memory controller, and to set the proper number
> of chip select masks.
>
> Rework f17_addr_mask_to_cs_size() to properly handle the change in chip
> select masks. And update code comments to reflect the updated Chip
> Select, DIMM, and Mask relationships.
>
> [uninitiliazed variable warning]
> Reported-by: kernel test robot <[email protected]>
> Signed-off-by: Yazen Ghannam <[email protected]>
> ---
> Link:
> https://lore.kernel.org/r/[email protected]
>
> v3->v4:
> * Use a single helper function for new register offsets.
> * Fix an uninitialized variable warning.
>
> v2->v3:
> * Adjust variable names to explicitly show what they represent.
> * Update code comment to give more detail on CS/MASK/DIMM layout.
>
> v1->v2:
> * Was patch 4 in v1.
> * Change "has_ddr5" flag to "zn_regs_v2".
> * Drop flag check helper function.
> * Update determine_memory_type() to check bitfield for DDR5.
> * Update code comments.
>
> drivers/edac/amd64_edac.c | 80 +++++++++++++++++++++++++++++++--------
> drivers/edac/amd64_edac.h | 14 +++++++
> 2 files changed, 78 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 49e384207ce0..5806fe657373 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -15,6 +15,21 @@ static struct msr __percpu *msrs;
>
> static struct amd64_family_type *fam_type;
>
> +static inline u32 get_umc_reg(u32 reg)
> +{
> + if (!fam_type->flags.zn_regs_v2)
> + return reg;
> +
> + switch (reg) {
> + case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5;
> + case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5;
> + case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5;
> + }
> +
> + WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg);
> + return 0;
> +}
> +
> /* Per-node stuff */
> static struct ecc_settings **ecc_stngs;
>
> @@ -1429,8 +1444,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt)
> edac_dbg(1, "UMC%d x16 DIMMs present: %s\n",
> i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no");
>
> - if (umc->dram_type == MEM_LRDDR4) {
> - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp);
> + if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) {
> + amd_smn_read(pvt->mc_node_id,
> + umc_base + get_umc_reg(UMCCH_ADDR_CFG),
> + &tmp);
> edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n",
> i, 1 << ((tmp >> 4) & 0x3));
> }
> @@ -1505,7 +1522,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt)
>
> for_each_umc(umc) {
> pvt->csels[umc].b_cnt = 4;
> - pvt->csels[umc].m_cnt = 2;
> + pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2;
> }
>
> } else {
> @@ -1545,7 +1562,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt)
> }
>
> umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK;
> - umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC;
> + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC);
>
> for_each_chip_select_mask(cs, umc, pvt) {
> mask = &pvt->csels[umc].csmasks[cs];
> @@ -1623,12 +1640,25 @@ static void _determine_memory_type_df(struct amd64_umc *umc)
> return;
> }
>
> - if (umc->dimm_cfg & BIT(5))
> - umc->dram_type = MEM_LRDDR4;
> - else if (umc->dimm_cfg & BIT(4))
> - umc->dram_type = MEM_RDDR4;
> - else
> - umc->dram_type = MEM_DDR4;
> + /*
> + * Check if the system supports the "DDR Type" field in UMC Config
> + * and has DDR5 DIMMs in use.
> + */
> + if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) {
> + if (umc->dimm_cfg & BIT(5))
> + umc->dram_type = MEM_LRDDR5;
> + else if (umc->dimm_cfg & BIT(4))
> + umc->dram_type = MEM_RDDR5;
> + else
> + umc->dram_type = MEM_DDR5;
> + } else {
> + if (umc->dimm_cfg & BIT(5))
> + umc->dram_type = MEM_LRDDR4;
> + else if (umc->dimm_cfg & BIT(4))
> + umc->dram_type = MEM_RDDR4;
> + else
> + umc->dram_type = MEM_DDR4;
> + }
> }
>
> static void determine_memory_type_df(struct amd64_pvt *pvt)
> @@ -2170,6 +2200,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
> {
> u32 addr_mask_orig, addr_mask_deinterleaved;
> u32 msb, weight, num_zero_bits;
> + int cs_mask_nr = csrow_nr;
> int dimm, size = 0;
>
> /* No Chip Selects are enabled. */
> @@ -2185,17 +2216,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
> return size;
>
> /*
> - * There is one mask per DIMM, and two Chip Selects per DIMM.
> - * CS0 and CS1 -> DIMM0
> - * CS2 and CS3 -> DIMM1
> + * Family 17h introduced systems with one mask per DIMM,
> + * and two Chip Selects per DIMM.
> + *
> + * CS0 and CS1 -> MASK0 / DIMM0
> + * CS2 and CS3 -> MASK1 / DIMM1
> + *
> + * Family 19h Model 10h introduced systems with one mask per Chip Select,
> + * and two Chip Selects per DIMM.
> + *
> + * CS0 -> MASK0 -> DIMM0
> + * CS1 -> MASK1 -> DIMM0
> + * CS2 -> MASK2 -> DIMM1
> + * CS3 -> MASK3 -> DIMM1
> + *
> + * Keep the mask number equal to the Chip Select number for newer systems,
> + * and shift the mask number for older systems.
> */
> dimm = csrow_nr >> 1;
>
> + if (!fam_type->flags.zn_regs_v2)
> + cs_mask_nr >>= 1;
> +
> /* Asymmetric dual-rank DIMM support. */
> if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
> - addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm];
> + addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
> else
> - addr_mask_orig = pvt->csels[umc].csmasks[dimm];
> + addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
>
> /*
> * The number of zero bits in the mask is equal to the number of bits
> @@ -2951,6 +2998,7 @@ static struct amd64_family_type family_types[] = {
> .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0,
> .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6,
> .max_mcs = 12,
> + .flags.zn_regs_v2 = 1,
> .ops = {
> .early_channel_count = f17_early_channel_count,
> .dbam_to_cs = f17_addr_mask_to_cs_size,
> @@ -3389,7 +3437,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt)
> umc_base = get_umc_base(i);
> umc = &pvt->umc[i];
>
> - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg);
> + amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg);
> amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg);
> amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl);
> amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl);
> diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
> index 09ad28299c57..6f8147abfa71 100644
> --- a/drivers/edac/amd64_edac.h
> +++ b/drivers/edac/amd64_edac.h
> @@ -273,8 +273,11 @@
> #define UMCCH_BASE_ADDR_SEC 0x10
> #define UMCCH_ADDR_MASK 0x20
> #define UMCCH_ADDR_MASK_SEC 0x28
> +#define UMCCH_ADDR_MASK_SEC_DDR5 0x30
> #define UMCCH_ADDR_CFG 0x30
> +#define UMCCH_ADDR_CFG_DDR5 0x40
> #define UMCCH_DIMM_CFG 0x80
> +#define UMCCH_DIMM_CFG_DDR5 0x90
> #define UMCCH_UMC_CFG 0x100
> #define UMCCH_SDP_CTRL 0x104
> #define UMCCH_ECC_CTRL 0x14C
> @@ -483,11 +486,22 @@ struct low_ops {
> unsigned cs_mode, int cs_mask_nr);
> };
>
> +struct amd64_family_flags {
> + /*
> + * Indicates that the system supports the new register offsets, etc.
> + * first introduced with Family 19h Model 10h.
> + */
> + __u64 zn_regs_v2 : 1,
> +
> + __reserved : 63;
> +};
> +
> struct amd64_family_type {
> const char *ctl_name;
> u16 f0_id, f1_id, f2_id, f6_id;
> /* Maximum number of memory controllers per die/node. */
> u8 max_mcs;
> + struct amd64_family_flags flags;
> struct low_ops ops;
> };
>