2023-10-25 07:38:58

by M K, Muralidhara

[permalink] [raw]
Subject: [PATCH 1/7] RAS: Add Address Translation support for MI200

From: Muralidhara M K <[email protected]>

Add support for address translation on Data Fabric version 3.5
Add new interleaving modes for heterogeneous model support and
adjust how the DRAM address maps are found early in the
translation for certain cases.

Signed-off-by: Muralidhara M K <[email protected]>
Co-developed-by: Yazen Ghannam <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
---
drivers/ras/amd/atl/dehash.c | 60 +++++++++++++++++
drivers/ras/amd/atl/denormalize.c | 11 +++-
drivers/ras/amd/atl/internal.h | 15 ++++-
drivers/ras/amd/atl/map.c | 105 +++++++++++++++++++++++++++++-
drivers/ras/amd/atl/reg_fields.h | 29 +++++++++
drivers/ras/amd/atl/system.c | 1 +
6 files changed, 217 insertions(+), 4 deletions(-)

diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
index e501f2e918d7..5760e6bca194 100644
--- a/drivers/ras/amd/atl/dehash.c
+++ b/drivers/ras/amd/atl/dehash.c
@@ -395,6 +395,61 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
return 0;
}

+/*
+ * MI200 hash bits
+ * 64K 2M 1G
+ * CSSelect[0] = XOR of addr{8, 16, 21, 30};
+ * CSSelect[1] = XOR of addr{9, 17, 22, 31};
+ * CSSelect[2] = XOR of addr{10, 18, 23, 32};
+ * CSSelect[3] = XOR of addr{11, 19, 24, 33}; - 16 and 32 channel only
+ * CSSelect[4] = XOR of addr{12, 20, 25, 34}; - 32 channel only
+ */
+static int mi200_dehash_addr(struct addr_ctx *ctx)
+{
+ u8 num_intlv_bits = ctx->map.total_intlv_bits;
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
+ u8 hashed_bit, intlv_bit, i;
+
+ /* Assert that interleave bit is 8. */
+ if (ctx->map.intlv_bit_pos != 8) {
+ pr_warn("%s: Invalid interleave bit: %u",
+ __func__, ctx->map.intlv_bit_pos);
+ return -EINVAL;
+ }
+
+ /* Assert that die interleaving is disabled. */
+ if (ctx->map.num_intlv_dies > 1) {
+ pr_warn("%s: Invalid number of interleave dies: %u",
+ __func__, ctx->map.num_intlv_dies);
+ return -EINVAL;
+ }
+
+ /* Assert that socket interleaving is disabled. */
+ if (ctx->map.num_intlv_sockets > 1) {
+ pr_warn("%s: Invalid number of interleave sockets: %u",
+ __func__, ctx->map.num_intlv_sockets);
+ return -EINVAL;
+ }
+
+ hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl);
+
+ for (i = 0; i < num_intlv_bits; i++) {
+ intlv_bit = atl_get_bit(8 + i, ctx->ret_addr);
+
+ hashed_bit = intlv_bit;
+ hashed_bit ^= atl_get_bit(8 + i, ctx->ret_addr);
+ hashed_bit ^= atl_get_bit(16 + i, ctx->ret_addr) & hash_ctl_64k;
+ hashed_bit ^= atl_get_bit(21 + i, ctx->ret_addr) & hash_ctl_2M;
+ hashed_bit ^= atl_get_bit(30 + i, ctx->ret_addr) & hash_ctl_1G;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(8 + i);
+ }
+ return 0;
+}
+
int dehash_address(struct addr_ctx *ctx)
{
switch (ctx->map.intlv_mode) {
@@ -452,6 +507,11 @@ int dehash_address(struct addr_ctx *ctx)
case DF4p5_NPS1_16CHAN_2K_HASH:
return df4p5_dehash_addr(ctx);

+ case MI2_HASH_8CHAN:
+ case MI2_HASH_16CHAN:
+ case MI2_HASH_32CHAN:
+ return mi200_dehash_addr(ctx);
+
default:
ATL_BAD_INTLV_MODE(ctx->map.intlv_mode);
return -EINVAL;
diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
index fe1480c8e0d8..03eb1eea68f9 100644
--- a/drivers/ras/amd/atl/denormalize.c
+++ b/drivers/ras/amd/atl/denormalize.c
@@ -16,7 +16,7 @@
* Returns the Destination Fabric ID. This is the first (lowest)
* CS Fabric ID used within a DRAM Address map.
*/
-static u16 get_dst_fabric_id(struct addr_ctx *ctx)
+u16 get_dst_fabric_id(struct addr_ctx *ctx)
{
switch (df_cfg.rev) {
case DF2:
@@ -97,6 +97,9 @@ static u64 make_space_for_cs_id(struct addr_ctx *ctx)
case NOHASH_8CHAN:
case NOHASH_16CHAN:
case NOHASH_32CHAN:
+ case MI2_HASH_8CHAN:
+ case MI2_HASH_16CHAN:
+ case MI2_HASH_32CHAN:
case DF2_2CHAN_HASH:
return make_space_for_cs_id_at_intlv_bit(ctx);

@@ -233,6 +236,9 @@ static u16 calculate_cs_id(struct addr_ctx *ctx)
case DF3_COD4_2CHAN_HASH:
case DF3_COD2_4CHAN_HASH:
case DF3_COD1_8CHAN_HASH:
+ case MI2_HASH_8CHAN:
+ case MI2_HASH_16CHAN:
+ case MI2_HASH_32CHAN:
case DF2_2CHAN_HASH:
return get_cs_id_df2(ctx);

@@ -296,6 +302,9 @@ static u64 insert_cs_id(struct addr_ctx *ctx, u64 denorm_addr, u16 cs_id)
case NOHASH_8CHAN:
case NOHASH_16CHAN:
case NOHASH_32CHAN:
+ case MI2_HASH_8CHAN:
+ case MI2_HASH_16CHAN:
+ case MI2_HASH_32CHAN:
case DF2_2CHAN_HASH:
return insert_cs_id_at_intlv_bit(ctx, denorm_addr, cs_id);

diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index f3888c8fd02d..33905933e31e 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -30,6 +30,12 @@
/* Shift needed for adjusting register values to true values. */
#define DF_DRAM_BASE_LIMIT_LSB 28

+/* Cache Coherent Moderator Instnce Type value on register */
+#define DF_INST_TYPE_CCM 0
+
+/* Maximum possible number of DRAM maps within a Data Fabric. */
+#define DF_NUM_DRAM_MAPS_AVAILABLE 16
+
/*
* Glossary of acronyms used in address translation for Zen-based systems
*
@@ -68,6 +74,9 @@ enum intlv_modes {
DF4_NPS1_12CHAN_HASH = 0x15,
DF4_NPS2_5CHAN_HASH = 0x16,
DF4_NPS1_10CHAN_HASH = 0x17,
+ MI2_HASH_8CHAN = 0x1C,
+ MI2_HASH_16CHAN = 0x1D,
+ MI2_HASH_32CHAN = 0x1E,
DF2_2CHAN_HASH = 0x21,
/* DF4.5 modes are all IntLvNumChan + 0x20 */
DF4p5_NPS1_16CHAN_1K_HASH = 0x2C,
@@ -94,8 +103,9 @@ enum intlv_modes {

struct df_flags {
__u8 legacy_ficaa : 1,
+ heterogeneous : 1,
genoa_quirk : 1,
- __reserved_0 : 6;
+ __reserved_0 : 5;
};

struct df_config {
@@ -220,6 +230,9 @@ int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num);
int get_address_map(struct addr_ctx *ctx);

int denormalize_address(struct addr_ctx *ctx);
+
+u16 get_dst_fabric_id(struct addr_ctx *ctx);
+
int dehash_address(struct addr_ctx *ctx);

int norm_to_sys_addr(u8 socket_id, u8 die_id, u8 cs_inst_id, u64 *addr);
diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
index 05141da27028..9326f6a6b6c3 100644
--- a/drivers/ras/amd/atl/map.c
+++ b/drivers/ras/amd/atl/map.c
@@ -355,6 +355,101 @@ static int get_dram_addr_map(struct addr_ctx *ctx)
}
}

+static int find_moderator_instance_id(struct addr_ctx *ctx)
+{
+ u16 num_df_instances;
+ u32 reg;
+
+ /* Read D18F0x40 (FabricBlockInstanceCount). */
+ if (df_indirect_read_broadcast(0, 0, 0x40, &reg))
+ return -EINVAL;
+
+ num_df_instances = FIELD_GET(DF_BLOCK_INSTANCE_COUNT, reg);
+
+ for (ctx->inst_id = 0; ctx->inst_id < num_df_instances; ctx->inst_id++) {
+ /* Read D18F0x44 (FabricBlockInstanceInformation0). */
+ if (df_indirect_read_instance(0, 0, 0x44, ctx->inst_id, &reg))
+ return -EINVAL;
+
+ if (!reg)
+ continue;
+
+ /* Match on the first CCM instance. */
+ if (FIELD_GET(DF_INSTANCE_TYPE, reg) == DF_INST_TYPE_CCM)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int find_map_by_dst_fabric_id(struct addr_ctx *ctx)
+{
+ u64 mask = df_cfg.node_id_mask;
+
+ for (ctx->map.num = 0; ctx->map.num < DF_NUM_DRAM_MAPS_AVAILABLE ; ctx->map.num++) {
+ if (get_dram_addr_map(ctx))
+ return -EINVAL;
+
+ /*
+ * Match if the Destination Fabric ID in this map is the same as
+ * the Fabric ID for the target memory device.
+ */
+ if ((get_dst_fabric_id(ctx) & mask) == (ctx->cs_fabric_id & mask))
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/* UMC to CS mapping for MI200 die[0]s */
+u8 umc_to_cs_mapping_mi200_die0[] = { 28, 20, 24, 16, 12, 4, 8, 0,
+ 6, 30, 2, 26, 22, 14, 18, 10,
+ 19, 11, 15, 7, 3, 27, 31, 23,
+ 9, 1, 5, 29, 25, 17, 21, 13};
+
+/* UMC to CS mapping for MI200 die[1]s */
+u8 umc_to_cs_mapping_mi200_die1[] = { 19, 11, 15, 7, 3, 27, 31, 23,
+ 9, 1, 5, 29, 25, 17, 21, 13,
+ 28, 20, 24, 16, 12, 4, 8, 0,
+ 6, 30, 2, 26, 22, 14, 18, 10};
+
+int get_umc_to_cs_mapping(struct addr_ctx *ctx)
+{
+ if (ctx->inst_id >= sizeof(umc_to_cs_mapping_mi200_die0))
+ return -EINVAL;
+
+ /*
+ * MI200 has 2 dies and are enumerated alternatively
+ * die0's are enumerated as node 2, 4, 6 and 8
+ * die1's are enumerated as node 1, 3, 5 and 7
+ */
+ if (ctx->node_id % 2)
+ ctx->inst_id = umc_to_cs_mapping_mi200_die1[ctx->inst_id];
+ else
+ ctx->inst_id = umc_to_cs_mapping_mi200_die0[ctx->inst_id];
+
+ return 0;
+}
+
+static int get_address_map_heterogeneous(struct addr_ctx *ctx)
+{
+ if (ctx->node_id >= amd_nb_num()) {
+ if (get_umc_to_cs_mapping(ctx))
+ return -EINVAL;
+ }
+
+ ctx->cs_fabric_id = ctx->inst_id;
+ ctx->cs_fabric_id |= ctx->node_id << df_cfg.node_id_shift;
+
+ if (find_moderator_instance_id(ctx))
+ return -EINVAL;
+
+ if (find_map_by_dst_fabric_id(ctx))
+ return -EINVAL;
+
+ return 0;
+}
+
static int lookup_cs_fabric_id(struct addr_ctx *ctx)
{
u32 reg;
@@ -482,6 +577,7 @@ static u8 get_num_intlv_chan(enum intlv_modes intlv_mode)
case NOHASH_8CHAN:
case DF3_COD1_8CHAN_HASH:
case DF4_NPS1_8CHAN_HASH:
+ case MI2_HASH_8CHAN:
case DF4p5_NPS1_8CHAN_1K_HASH:
case DF4p5_NPS1_8CHAN_2K_HASH:
return 8;
@@ -494,6 +590,7 @@ static u8 get_num_intlv_chan(enum intlv_modes intlv_mode)
case DF4p5_NPS1_12CHAN_2K_HASH:
return 12;
case NOHASH_16CHAN:
+ case MI2_HASH_16CHAN:
case DF4p5_NPS1_16CHAN_1K_HASH:
case DF4p5_NPS1_16CHAN_2K_HASH:
return 16;
@@ -501,6 +598,7 @@ static u8 get_num_intlv_chan(enum intlv_modes intlv_mode)
case DF4p5_NPS0_24CHAN_2K_HASH:
return 24;
case NOHASH_32CHAN:
+ case MI2_HASH_32CHAN:
return 32;
default:
ATL_BAD_INTLV_MODE(intlv_mode);
@@ -645,8 +743,11 @@ int get_address_map(struct addr_ctx *ctx)
{
int ret = 0;

- /* TODO: Add special path for DF3.5 heterogeneous systems. */
- ret = get_address_map_common(ctx);
+ /* Add special path for DF3.5 heterogeneous systems. */
+ if (df_cfg.flags.heterogeneous && df_cfg.rev == DF3p5)
+ ret = get_address_map_heterogeneous(ctx);
+ else
+ ret = get_address_map_common(ctx);
if (ret)
return ret;

diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h
index d48470e12498..b85ab157773e 100644
--- a/drivers/ras/amd/atl/reg_fields.h
+++ b/drivers/ras/amd/atl/reg_fields.h
@@ -601,3 +601,32 @@
#define DF2_SOCKET_ID_SHIFT GENMASK(31, 28)
#define DF3_SOCKET_ID_SHIFT GENMASK(9, 8)
#define DF4_SOCKET_ID_SHIFT GENMASK(11, 8)
+
+/*
+ * Total number of instances of all the blocks in DF
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x040 [Fabric Block Instance Count]
+ * DF3 BlkInstCount [7:0]
+ * DF3p5 BlkInstCount [7:0]
+ * DF4 BlkInstCount [7:0]
+ * DF4p5 BlkInstCount [9:0]
+ */
+#define DF_BLOCK_INSTANCE_COUNT GENMASK(9, 0)
+
+/*
+ * Information on the block capabilities
+ *
+ * Access type: Broadcast
+ *
+ * Register
+ * Rev Fieldname Bits
+ *
+ * D18F0x044 [Fabric Block Instance Information 0]
+ * DF3p5 BlkInstCount [3:0]
+ */
+#define DF_INSTANCE_TYPE GENMASK(3, 0)
diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
index 86488138e120..656aac3f6c59 100644
--- a/drivers/ras/amd/atl/system.c
+++ b/drivers/ras/amd/atl/system.c
@@ -144,6 +144,7 @@ static int determine_df_rev_legacy(void)

if (FIELD_GET(DF4_COMPONENT_ID_MASK, fabric_id_mask0)) {
df_cfg.rev = DF3p5;
+ df_cfg.flags.heterogeneous = 1;

/* Read D18F1x154 (SystemFabricIdMask1) */
if (df_indirect_read_broadcast(0, 1, 0x154, &fabric_id_mask1))
--
2.25.1


2023-10-26 08:49:59

by Yujie Liu

[permalink] [raw]
Subject: Re: [PATCH 1/7] RAS: Add Address Translation support for MI200

Hi Muralidhara,

kernel test robot noticed the following build warnings:

[auto build test WARNING on ras/edac-for-next]
[also build test WARNING on tip/master linus/master tip/auto-latest v6.6-rc7 next-20231025]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Muralidhara-M-K/RAS-Add-Address-Translation-support-for-MI200/20231025-154756
base: https://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
patch link: https://lore.kernel.org/r/20231025073339.630093-2-muralimk%40amd.com
patch subject: [PATCH 1/7] RAS: Add Address Translation support for MI200
config: x86_64-randconfig-071-20231026 (https://download.01.org/0day-ci/archive/20231026/[email protected]/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231026/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/r/[email protected]/

All warnings (new ones prefixed by >>):

>> drivers/ras/amd/atl/map.c:416:5: warning: no previous prototype for 'get_umc_to_cs_mapping' [-Wmissing-prototypes]
416 | int get_umc_to_cs_mapping(struct addr_ctx *ctx)
| ^~~~~~~~~~~~~~~~~~~~~


vim +/get_umc_to_cs_mapping +416 drivers/ras/amd/atl/map.c

ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 403
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 404 /* UMC to CS mapping for MI200 die[0]s */
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 405 u8 umc_to_cs_mapping_mi200_die0[] = { 28, 20, 24, 16, 12, 4, 8, 0,
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 406 6, 30, 2, 26, 22, 14, 18, 10,
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 407 19, 11, 15, 7, 3, 27, 31, 23,
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 408 9, 1, 5, 29, 25, 17, 21, 13};
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 409
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 410 /* UMC to CS mapping for MI200 die[1]s */
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 411 u8 umc_to_cs_mapping_mi200_die1[] = { 19, 11, 15, 7, 3, 27, 31, 23,
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 412 9, 1, 5, 29, 25, 17, 21, 13,
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 413 28, 20, 24, 16, 12, 4, 8, 0,
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 414 6, 30, 2, 26, 22, 14, 18, 10};
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 415
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 @416 int get_umc_to_cs_mapping(struct addr_ctx *ctx)
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 417 {
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 418 if (ctx->inst_id >= sizeof(umc_to_cs_mapping_mi200_die0))
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 419 return -EINVAL;
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 420
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 421 /*
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 422 * MI200 has 2 dies and are enumerated alternatively
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 423 * die0's are enumerated as node 2, 4, 6 and 8
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 424 * die1's are enumerated as node 1, 3, 5 and 7
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 425 */
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 426 if (ctx->node_id % 2)
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 427 ctx->inst_id = umc_to_cs_mapping_mi200_die1[ctx->inst_id];
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 428 else
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 429 ctx->inst_id = umc_to_cs_mapping_mi200_die0[ctx->inst_id];
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 430
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 431 return 0;
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 432 }
ea0e1d9c3eaf5d Muralidhara M K 2023-10-25 433

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki