2024-03-27 18:52:47

by John Allen

[permalink] [raw]
Subject: [PATCH v2 0/4] RAS: ATL: DF 4.5 NP2 Denormalization

Implement non-power-of-two denormalization for Data Fabric 4.5 in the
AMD address translation library.

Tree: git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git
Base commit: bd17b7c34fadef645becde1245b9394f69f31702 (origin/edac-amd-atl)

v2:
- Fix compilation error.
- Make remove_base_and_hole the inverse of add_base_and_hole.
- Move all map validation checks to validate_address_map at the beginning
of translation

John Allen (4):
RAS/AMD/ATL: Read DRAM hole base early
RAS/AMD/ATL: Expand helpers for adding and removing base and hole
RAS/AMD/ATL: Validate address map when information is gathered
RAS/AMD/ATL: Implement DF 4.5 NP2 denormalization

drivers/ras/amd/atl/core.c | 48 +--
drivers/ras/amd/atl/dehash.c | 43 ---
drivers/ras/amd/atl/denormalize.c | 530 ++++++++++++++++++++++++++++++
drivers/ras/amd/atl/internal.h | 45 +++
drivers/ras/amd/atl/map.c | 105 ++++++
drivers/ras/amd/atl/system.c | 21 ++
6 files changed, 726 insertions(+), 66 deletions(-)

--
2.34.1



2024-03-27 18:53:02

by John Allen

[permalink] [raw]
Subject: [PATCH v2 1/4] RAS/AMD/ATL: Read DRAM hole base early

Read DRAM hole base when constructing the address map as the value will
not change during run time.

Signed-off-by: John Allen <[email protected]>
---
v2:
- Fix compilation error. ctx->addr should read ctx->ret_addr.
- Improve commit description.
---
drivers/ras/amd/atl/core.c | 15 ++-------------
drivers/ras/amd/atl/internal.h | 2 ++
drivers/ras/amd/atl/system.c | 21 +++++++++++++++++++++
3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 6dc4e06305f7..63513d972c07 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -51,22 +51,11 @@ static bool legacy_hole_en(struct addr_ctx *ctx)

static int add_legacy_hole(struct addr_ctx *ctx)
{
- u32 dram_hole_base;
- u8 func = 0;
-
if (!legacy_hole_en(ctx))
return 0;

- if (df_cfg.rev >= DF4)
- func = 7;
-
- if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base))
- return -EINVAL;
-
- dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
-
- if (ctx->ret_addr >= dram_hole_base)
- ctx->ret_addr += (BIT_ULL(32) - dram_hole_base);
+ if (ctx->ret_addr >= df_cfg.dram_hole_base)
+ ctx->ret_addr += (BIT_ULL(32) - df_cfg.dram_hole_base);

return 0;
}
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 5de69e0bb0f9..1413c8ddc6c5 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -132,6 +132,8 @@ struct df_config {
/* Number of DRAM Address maps visible in a Coherent Station. */
u8 num_coh_st_maps;

+ u32 dram_hole_base;
+
/* Global flags to handle special cases. */
struct df_flags flags;
};
diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
index 701349e84942..6f6fe24dec81 100644
--- a/drivers/ras/amd/atl/system.c
+++ b/drivers/ras/amd/atl/system.c
@@ -223,6 +223,21 @@ static int determine_df_rev(void)
return -EINVAL;
}

+static int get_dram_hole_base(void)
+{
+ u8 func = 0;
+
+ if (df_cfg.rev >= DF4)
+ func = 7;
+
+ if (df_indirect_read_broadcast(0, func, 0x104, &df_cfg.dram_hole_base))
+ return -EINVAL;
+
+ df_cfg.dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
+
+ return 0;
+}
+
static void get_num_maps(void)
{
switch (df_cfg.rev) {
@@ -266,6 +281,7 @@ static void dump_df_cfg(void)

pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps);

+ pr_debug("dram_hole_base=%x", df_cfg.dram_hole_base);
pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa);
pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk);
}
@@ -282,6 +298,11 @@ int get_df_system_info(void)

get_num_maps();

+ if (get_dram_hole_base()) {
+ pr_warn("amd_atl: Failed to read DRAM hole base");
+ return -EINVAL;
+ }
+
dump_df_cfg();

return 0;
--
2.34.1


2024-03-27 18:53:14

by John Allen

[permalink] [raw]
Subject: [PATCH v2 2/4] RAS/AMD/ATL: Expand helpers for adding and removing base and hole

The existing helpers for adding and removing the base and legacy MMIO
hole operate on the ret_addr stored in the addr_ctx struct. This works
for the existing use case as adding and removing the base and hole is
only done once for an address that is already stored in ret_addr as part
of translation.

However, in the Data Fabric 4.5 non-power-of-2 cases, we will add and
remove the base and hole temporarily to check for correctness on
addresses other than ret_addr multiple times throughout translation.

Modify the helpers to take any address as a parameter and return the
resulting address after adding or removing the hole. This will allow for
more simply adding and removing the base and hole from any address
without having to store them in the ret_addr field of the addr_ctx
struct.

Signed-off-by: John Allen <[email protected]>
---
v2:
- Make remove_base_and_hole the inverse of add_base_and_hole, removing
the hole first and then removing the base.
- Improve commit description.
---
drivers/ras/amd/atl/core.c | 43 ++++++++++++++++++++++------------
drivers/ras/amd/atl/internal.h | 3 +++
2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 63513d972c07..d45f9948c0ab 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
}

-static int add_legacy_hole(struct addr_ctx *ctx)
+static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr)
{
if (!legacy_hole_en(ctx))
- return 0;
+ return addr;

- if (ctx->ret_addr >= df_cfg.dram_hole_base)
- ctx->ret_addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
+ if (addr >= df_cfg.dram_hole_base)
+ addr += (BIT_ULL(32) - df_cfg.dram_hole_base);

- return 0;
+ return addr;
+}
+
+static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr)
+{
+ if (!legacy_hole_en(ctx))
+ return addr;
+
+ if (addr >= df_cfg.dram_hole_base)
+ addr -= (BIT_ULL(32) - df_cfg.dram_hole_base);
+
+ return addr;
}

static u64 get_base_addr(struct addr_ctx *ctx)
@@ -72,14 +83,16 @@ static u64 get_base_addr(struct addr_ctx *ctx)
return base_addr << DF_DRAM_BASE_LIMIT_LSB;
}

-static int add_base_and_hole(struct addr_ctx *ctx)
+u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr)
{
- ctx->ret_addr += get_base_addr(ctx);
-
- if (add_legacy_hole(ctx))
- return -EINVAL;
+ addr += get_base_addr(ctx);
+ return add_legacy_hole(ctx, addr);
+}

- return 0;
+u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr)
+{
+ remove_legacy_hole(ctx, addr);
+ return addr - get_base_addr(ctx);
}

static bool late_hole_remove(struct addr_ctx *ctx)
@@ -123,14 +136,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig
if (denormalize_address(&ctx))
return -EINVAL;

- if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
- return -EINVAL;
+ if (!late_hole_remove(&ctx))
+ ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);

if (dehash_address(&ctx))
return -EINVAL;

- if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
- return -EINVAL;
+ if (late_hole_remove(&ctx))
+ ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);

if (addr_over_limit(&ctx))
return -EINVAL;
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 1413c8ddc6c5..05b870fcb24e 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -236,6 +236,9 @@ int dehash_address(struct addr_ctx *ctx);
unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);

+u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
+u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
+
/*
* Make a gap in @data that is @num_bits long starting at @bit_num.
* e.g. data = 11111111'b
--
2.34.1


2024-03-27 18:53:43

by John Allen

[permalink] [raw]
Subject: [PATCH v2 4/4] RAS/AMD/ATL: Implement DF 4.5 NP2 denormalization

Unlike with previous Data Fabric versions, with Data Fabric 4.5, there
are bits of the system physical address that can't be reconstructed from
the normalized address. Using NPS0_24CHAN_1K_HASH as an example, the
normalized address consists of bits [63:13] (divided by 3), bits
[11:10], and bits [7:0] of the system physical address.

In this case, the remainder from the divide by 3 and bits 8, 9, and 12
are missing. To determine the proper combination of missing system
physical address bits, iterate through each possible combination of
these bits, normalize the resulting system physical address, and compare
to the original address that is being translated. If the addresses
match, then the correct permutation of bits has been found.

Signed-off-by: John Allen <[email protected]
---
v2:
- Move map validation to patch 3/4.
---
drivers/ras/amd/atl/denormalize.c | 530 ++++++++++++++++++++++++++++++
drivers/ras/amd/atl/internal.h | 40 +++
2 files changed, 570 insertions(+)

diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
index e279224288d6..b03bba851e14 100644
--- a/drivers/ras/amd/atl/denormalize.c
+++ b/drivers/ras/amd/atl/denormalize.c
@@ -448,6 +448,105 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
}

+static u64 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
+ struct df4p5_denorm_ctx *denorm_ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
+ bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
+ u64 cs_id = 0;
+
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
+ hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
+ hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
+ hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
+ hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
+ hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa9 | (hash_pa12 << 1));
+ cs_id |= hash_pa8 << df_cfg.socket_id_shift;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa12 | (hash_pa13 << 1));
+ cs_id |= hash_pa8 << df_cfg.socket_id_shift;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa8 | (hash_pa9 << 1));
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa8 | (hash_pa12 << 1));
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 1;
+ cs_id |= hash_pa8;
+ break;
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 1;
+ cs_id |= hash_pa8;
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
+ cs_id %= denorm_ctx->mod_value;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
+ cs_id %= denorm_ctx->mod_value;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+
+ return cs_id;
+}
+
static int denorm_addr_common(struct addr_ctx *ctx)
{
u64 denorm_addr;
@@ -699,6 +798,424 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx)
return 0;
}

+static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
+ u64 addr)
+{
+ u64 temp_addr_a, temp_addr_b;
+
+ temp_addr_a = 0;
+ temp_addr_b = 0;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
+ temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
+ temp_addr_b /= denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
+ temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
+ temp_addr_b /= denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
+ temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
+ temp_addr_b /= denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+
+ return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
+}
+
+static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
+ struct df4p5_denorm_ctx *denorm_ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
+
+ if (!denorm_ctx->rehash_vector)
+ return;
+
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
+ hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(8);
+ }
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
+ hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(9);
+ }
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
+ hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(12);
+ }
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
+ hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(13);
+ }
+}
+
+static bool check_logical_coh_st_fabric_id(struct addr_ctx *ctx,
+ struct df4p5_denorm_ctx *denorm_ctx)
+{
+ unsigned int logical_coh_st_fabric_id;
+
+ /*
+ * The logical CS fabric ID of the permutation must be calculated from the
+ * current SPA with the base and with the MMIO hole.
+ */
+ logical_coh_st_fabric_id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
+
+ atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
+ atl_debug(ctx, " calculated fabric id = 0x%x\n", logical_coh_st_fabric_id);
+ atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id);
+
+ if (denorm_ctx->coh_st_fabric_id != logical_coh_st_fabric_id)
+ return false;
+
+ return true;
+}
+
+static bool check_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
+{
+ u64 current_spa_without_base = remove_base_and_hole(ctx, denorm_ctx->current_spa);
+ u64 norm_addr;
+
+ /*
+ * The normalized address must be calculated with the current SPA without
+ * the base and without the MMIO hole.
+ */
+ norm_addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, current_spa_without_base);
+
+ atl_debug(ctx, "Checking calculated normalized address:\n");
+ atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", norm_addr);
+ atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr);
+
+ if (norm_addr != ctx->ret_addr)
+ return false;
+
+ return true;
+}
+
+static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
+{
+ u64 test_perm, temp_addr, denorm_addr, num_perms;
+ unsigned int dropped_remainder;
+
+ denorm_ctx->div_addr *= denorm_ctx->mod_value;
+
+ /*
+ * The high order bits of num_permutations represent the permutations
+ * of the dropped remainder. This will be either 0-3 or 0-5 depending
+ * on the interleave mode. The low order bits represent the
+ * permutations of other "lost" bits which will be any combination of
+ * 1, 2, or 3 bits depending on the interleave mode.
+ */
+ num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
+
+ for (test_perm = 0; test_perm < num_perms; test_perm++) {
+ denorm_addr = denorm_ctx->base_denorm_addr;
+ dropped_remainder = test_perm >> denorm_ctx->perm_shift;
+ temp_addr = denorm_ctx->div_addr + dropped_remainder;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ denorm_addr |= temp_addr << 14;
+ break;
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ denorm_addr |= temp_addr << 13;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ denorm_addr |= temp_addr << 12;
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
+ denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
+ denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
+ denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
+ denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
+ denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
+ recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
+
+ atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
+ denorm_ctx->current_spa);
+
+ if (!check_logical_coh_st_fabric_id(ctx, denorm_ctx))
+ continue;
+
+ if (!check_norm_addr(ctx, denorm_ctx))
+ continue;
+
+ if (denorm_ctx->resolved_spa == INVALID_SPA ||
+ denorm_ctx->current_spa > denorm_ctx->resolved_spa)
+ denorm_ctx->resolved_spa = denorm_ctx->current_spa;
+ }
+
+ if (denorm_ctx->resolved_spa == INVALID_SPA) {
+ atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
+ ctx->ret_addr);
+ return -EINVAL;
+ }
+
+ /* Return the resolved SPA without the base, without the MMIO hole */
+ ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
+
+ return 0;
+}
+
+static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
+{
+ denorm_ctx->current_spa = INVALID_SPA;
+ denorm_ctx->resolved_spa = INVALID_SPA;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ denorm_ctx->perm_shift = 3;
+ denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 3;
+ denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ denorm_ctx->perm_shift = 2;
+ denorm_ctx->rehash_vector = BIT(8);
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 2;
+ denorm_ctx->rehash_vector = BIT(8) | BIT(12);
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 1;
+ denorm_ctx->rehash_vector = BIT(8);
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_ctx->perm_shift = 2;
+ denorm_ctx->rehash_vector = 0;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 1;
+ denorm_ctx->rehash_vector = 0;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
+ denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
+ denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ if (ctx->map.num_intlv_chan % 3 == 0)
+ denorm_ctx->mod_value = 3;
+ else
+ denorm_ctx->mod_value = 5;
+
+ denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
+
+ atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
+ atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value);
+ atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift);
+ atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector);
+ atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr);
+ atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr);
+ atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id);
+
+ return 0;
+}
+
+/*
+ * For DF 4.5, parts of the physical address can be directly pulled from the
+ * normalized address. The exact bits will differ between interleave modes, but
+ * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
+ * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
+ * physical address.
+ *
+ * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
+ * and 12) from the normalized address. Additionally, when bits [63:13] are
+ * divided by 3, the remainder is dropped. Determine the proper combination of
+ * "lost" bits and dropped remainder by iterating through each possible
+ * permutation of these bits and then normalizing the generated system physical
+ * addresses. If the normalized address matches the address we are trying to
+ * translate, then we have found the correct permutation of bits.
+ */
+static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
+{
+ struct df4p5_denorm_ctx denorm_ctx;
+ int ret = 0;
+
+ memset(&denorm_ctx, 0, sizeof(denorm_ctx));
+
+ atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
+
+ ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
+ if (ret)
+ return ret;
+
+ return check_permutations(ctx, &denorm_ctx);
+}
+
int denormalize_address(struct addr_ctx *ctx)
{
switch (ctx->map.intlv_mode) {
@@ -710,6 +1227,19 @@ int denormalize_address(struct addr_ctx *ctx)
case DF4_NPS2_5CHAN_HASH:
case DF4_NPS1_10CHAN_HASH:
return denorm_addr_df4_np2(ctx);
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ return denorm_addr_df4p5_np2(ctx);
case DF3_6CHAN:
return denorm_addr_df3_6chan(ctx);
default:
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 05b870fcb24e..946e36c053c5 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -34,6 +34,8 @@
#define DF_DRAM_BASE_LIMIT_LSB 28
#define MI300_DRAM_LIMIT_LSB 20

+#define INVALID_SPA ~0ULL
+
enum df_revisions {
UNKNOWN,
DF2,
@@ -90,6 +92,44 @@ enum intlv_modes {
DF4p5_NPS1_10CHAN_2K_HASH = 0x49,
};

+struct df4p5_denorm_ctx {
+ /* perm_shift: Indicates the number of "lost" bits. This will be 1, 2, or 3. */
+ u8 perm_shift;
+
+ /* rehash_vector: A mask indicating the bits that need to be rehashed. */
+ u16 rehash_vector;
+
+ /*
+ * mod_value: Represents the value that the high bits of the normalized
+ * address are divided by during normalization. This value will be 3
+ * for interleave modes with a number of channels divisible by 3 or the
+ * value will be 5 for interleave modes with a number of channels
+ * divisible by 5. Power-of-two interleave modes are handled
+ * separately.
+ */
+ u8 mod_value;
+
+ /*
+ * base_denorm_addr: Represents the bits that can be directly pulled
+ * from the normalized address. In each case, pass through bits [7:0]
+ * of the normalized address. The other bits depend on the interleave
+ * bit position which will be bit 10 for 1K interleave stripe cases and
+ * bit 11 for 2K interleave stripe cases.
+ */
+ u64 base_denorm_addr;
+
+ /*
+ * div_addr: Represents the high bits of the physical address that have
+ * been divided by the mod_value.
+ */
+ u64 div_addr;
+
+ u64 current_spa;
+ u64 resolved_spa;
+
+ u16 coh_st_fabric_id;
+};
+
struct df_flags {
__u8 legacy_ficaa : 1,
socket_id_shift_quirk : 1,
--
2.34.1


2024-03-27 18:54:02

by John Allen

[permalink] [raw]
Subject: [PATCH v2 3/4] RAS/AMD/ATL: Validate address map when information is gathered

The address map will not change during translation so all checks to
validate the map can be moved to a single function that checks the map
at the time the information is gathered.

Signed-off-by: John Allen <[email protected]>
---
v2:
- New in v2.
---
drivers/ras/amd/atl/dehash.c | 43 --------------
drivers/ras/amd/atl/map.c | 105 +++++++++++++++++++++++++++++++++++
2 files changed, 105 insertions(+), 43 deletions(-)

diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
index 4ea46262c4f5..d4ee7ecabaee 100644
--- a/drivers/ras/amd/atl/dehash.c
+++ b/drivers/ras/amd/atl/dehash.c
@@ -12,41 +12,10 @@

#include "internal.h"

-/*
- * Verify the interleave bits are correct in the different interleaving
- * settings.
- *
- * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
- * respective interleaving is disabled.
- */
-static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
- u8 num_intlv_dies, u8 num_intlv_sockets)
-{
- if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
- pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
- return false;
- }
-
- if (ctx->map.num_intlv_dies > num_intlv_dies) {
- pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies);
- return false;
- }
-
- if (ctx->map.num_intlv_sockets > num_intlv_sockets) {
- pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets);
- return false;
- }
-
- return true;
-}
-
static int df2_dehash_addr(struct addr_ctx *ctx)
{
u8 hashed_bit, intlv_bit, intlv_bit_pos;

- if (!map_bits_valid(ctx, 8, 9, 1, 1))
- return -EINVAL;
-
intlv_bit_pos = ctx->map.intlv_bit_pos;
intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);

@@ -67,9 +36,6 @@ static int df3_dehash_addr(struct addr_ctx *ctx)
bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
u8 hashed_bit, intlv_bit, intlv_bit_pos;

- if (!map_bits_valid(ctx, 8, 9, 1, 1))
- return -EINVAL;
-
hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl);
hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl);
hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl);
@@ -171,9 +137,6 @@ static int df4_dehash_addr(struct addr_ctx *ctx)
bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
u8 hashed_bit, intlv_bit;

- if (!map_bits_valid(ctx, 8, 8, 1, 2))
- return -EINVAL;
-
hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
@@ -247,9 +210,6 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
u8 hashed_bit, intlv_bit;
u64 rehash_vector;

- if (!map_bits_valid(ctx, 8, 8, 1, 2))
- return -EINVAL;
-
hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
@@ -360,9 +320,6 @@ static int mi300_dehash_addr(struct addr_ctx *ctx)
bool hashed_bit, intlv_bit, test_bit;
u8 num_intlv_bits, base_bit, i;

- if (!map_bits_valid(ctx, 8, 8, 4, 1))
- return -EINVAL;
-
hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl);
hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
index 8b908e8d7495..c7772733a363 100644
--- a/drivers/ras/amd/atl/map.c
+++ b/drivers/ras/amd/atl/map.c
@@ -642,6 +642,107 @@ static int get_global_map_data(struct addr_ctx *ctx)
return 0;
}

+/*
+ * Verify the interleave bits are correct in the different interleaving
+ * settings.
+ *
+ * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
+ * respective interleaving is disabled.
+ */
+inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
+ u8 num_intlv_dies, u8 num_intlv_sockets)
+{
+ if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
+ pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
+ return false;
+ }
+
+ if (ctx->map.num_intlv_dies > num_intlv_dies) {
+ pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies);
+ return false;
+ }
+
+ if (ctx->map.num_intlv_sockets > num_intlv_sockets) {
+ pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets);
+ return false;
+ }
+
+ return true;
+}
+
+static int validate_address_map(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ case DF2_2CHAN_HASH:
+ if (!map_bits_valid(ctx, 8, 9, 1, 1))
+ goto out;
+ break;
+
+ case DF3_COD4_2CHAN_HASH:
+ case DF3_COD2_4CHAN_HASH:
+ case DF3_COD1_8CHAN_HASH:
+ if (!map_bits_valid(ctx, 8, 9, 1, 1))
+ goto out;
+ break;
+
+ case DF4_NPS4_2CHAN_HASH:
+ case DF4_NPS2_4CHAN_HASH:
+ case DF4_NPS1_8CHAN_HASH:
+ if (!map_bits_valid(ctx, 8, 8, 1, 2))
+ goto out;
+ break;
+
+ case DF4p5_NPS4_2CHAN_1K_HASH:
+ case DF4p5_NPS4_2CHAN_2K_HASH:
+ case DF4p5_NPS2_4CHAN_1K_HASH:
+ case DF4p5_NPS2_4CHAN_2K_HASH:
+ case DF4p5_NPS1_8CHAN_1K_HASH:
+ case DF4p5_NPS1_8CHAN_2K_HASH:
+ case DF4p5_NPS1_16CHAN_1K_HASH:
+ case DF4p5_NPS1_16CHAN_2K_HASH:
+ if (!map_bits_valid(ctx, 8, 8, 1, 2))
+ goto out;
+ break;
+
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ if (ctx->map.num_intlv_sockets != 1 || !map_bits_valid(ctx, 8, 0, 1, 1))
+ goto out;
+ break;
+
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ if (ctx->map.num_intlv_sockets < 2 || !map_bits_valid(ctx, 8, 0, 1, 2))
+ goto out;
+ break;
+
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ if (!map_bits_valid(ctx, 8, 8, 4, 1))
+ goto out;
+ break;
+
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ return 0;
+
+out:
+ atl_debug(ctx, "Inconsistent address map");
+ return -EINVAL;
+}
+
static void dump_address_map(struct dram_addr_map *map)
{
u8 i;
@@ -678,5 +779,9 @@ int get_address_map(struct addr_ctx *ctx)

dump_address_map(&ctx->map);

+ ret = validate_address_map(ctx);
+ if (ret)
+ return ret;
+
return ret;
}
--
2.34.1


2024-04-03 13:30:52

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v2 1/4] RAS/AMD/ATL: Read DRAM hole base early



On 3/27/24 14:52, John Allen wrote:
> Read DRAM hole base when constructing the address map as the value will
> not change during run time.
>
> Signed-off-by: John Allen <[email protected]>
> ---
> v2:
> - Fix compilation error. ctx->addr should read ctx->ret_addr.
> - Improve commit description.
> ---
> drivers/ras/amd/atl/core.c | 15 ++-------------
> drivers/ras/amd/atl/internal.h | 2 ++
> drivers/ras/amd/atl/system.c | 21 +++++++++++++++++++++
> 3 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
> index 6dc4e06305f7..63513d972c07 100644
> --- a/drivers/ras/amd/atl/core.c
> +++ b/drivers/ras/amd/atl/core.c
> @@ -51,22 +51,11 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
>
> static int add_legacy_hole(struct addr_ctx *ctx)
> {
> - u32 dram_hole_base;
> - u8 func = 0;
> -
> if (!legacy_hole_en(ctx))
> return 0;
>
> - if (df_cfg.rev >= DF4)
> - func = 7;
> -
> - if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base))
> - return -EINVAL;
> -
> - dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
> -
> - if (ctx->ret_addr >= dram_hole_base)
> - ctx->ret_addr += (BIT_ULL(32) - dram_hole_base);
> + if (ctx->ret_addr >= df_cfg.dram_hole_base)
> + ctx->ret_addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
>
> return 0;
> }
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 5de69e0bb0f9..1413c8ddc6c5 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -132,6 +132,8 @@ struct df_config {
> /* Number of DRAM Address maps visible in a Coherent Station. */
> u8 num_coh_st_maps;
>
> + u32 dram_hole_base;
> +
> /* Global flags to handle special cases. */
> struct df_flags flags;
> };
> diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
> index 701349e84942..6f6fe24dec81 100644
> --- a/drivers/ras/amd/atl/system.c
> +++ b/drivers/ras/amd/atl/system.c
> @@ -223,6 +223,21 @@ static int determine_df_rev(void)
> return -EINVAL;
> }
>
> +static int get_dram_hole_base(void)
> +{
> + u8 func = 0;
> +
> + if (df_cfg.rev >= DF4)
> + func = 7;
> +
> + if (df_indirect_read_broadcast(0, func, 0x104, &df_cfg.dram_hole_base))
> + return -EINVAL;
> +
> + df_cfg.dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
> +
> + return 0;
> +}
> +
> static void get_num_maps(void)
> {
> switch (df_cfg.rev) {
> @@ -266,6 +281,7 @@ static void dump_df_cfg(void)
>
> pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps);
>
> + pr_debug("dram_hole_base=%x", df_cfg.dram_hole_base);

This should include a "0x" prefix to make clear it is a hex value when
printed.

> pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa);
> pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk);
> }
> @@ -282,6 +298,11 @@ int get_df_system_info(void)
>
> get_num_maps();
>
> + if (get_dram_hole_base()) {
> + pr_warn("amd_atl: Failed to read DRAM hole base");
> + return -EINVAL;
> + }
> +
> dump_df_cfg();
>
> return 0;

With the small change above, this looks good to me.

Reviewed-by: Yazen Ghannam <[email protected]>

Thanks,
Yazen

2024-04-03 14:15:53

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v2 2/4] RAS/AMD/ATL: Expand helpers for adding and removing base and hole



On 3/27/24 14:52, John Allen wrote:
> The existing helpers for adding and removing the base and legacy MMIO

The remove helper is newly introduced in this patch, correct?

> hole operate on the ret_addr stored in the addr_ctx struct. This works
> for the existing use case as adding and removing the base and hole is
> only done once for an address that is already stored in ret_addr as part
> of translation.
>

I think it would help to give a bit more context and to describe things
more generally.

"The ret_addr field in struct addr_ctx contains the intermediate value of
the returned address as it passes through multiple steps in the
translation process. Currently, the "add DRAM base and legacy hole"
step is only done once, so it operates directly on the intermediate
value."

> However, in the Data Fabric 4.5 non-power-of-2 cases, we will add and
> remove the base and hole temporarily to check for correctness on
> addresses other than ret_addr multiple times throughout translation.
>

Don't say "we". Keep the voice passive.

"However, for X cases, the "add DRAM base and legacy hole" step, and its
inverse, need to be done for multiple temporary addresses values. During
this process, the intermediate should not be lost, so the ret_addr value
cannot be reused."

> Modify the helpers to take any address as a parameter and return the
> resulting address after adding or removing the hole. This will allow for
> more simply adding and removing the base and hole from any address
> without having to store them in the ret_addr field of the addr_ctx
> struct.
>

"Update the existing 'add' helper to operate on an arbitrary address.
And introduce a new 'remove' helper to do the inverse operations."

How do these sound?

> Signed-off-by: John Allen <[email protected]>
> ---
> v2:
> - Make remove_base_and_hole the inverse of add_base_and_hole, removing
> the hole first and then removing the base.
> - Improve commit description.
> ---
> drivers/ras/amd/atl/core.c | 43 ++++++++++++++++++++++------------
> drivers/ras/amd/atl/internal.h | 3 +++
> 2 files changed, 31 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
> index 63513d972c07..d45f9948c0ab 100644
> --- a/drivers/ras/amd/atl/core.c
> +++ b/drivers/ras/amd/atl/core.c
> @@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
> return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
> }
>
> -static int add_legacy_hole(struct addr_ctx *ctx)
> +static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr)
> {
> if (!legacy_hole_en(ctx))
> - return 0;
> + return addr;
>
> - if (ctx->ret_addr >= df_cfg.dram_hole_base)
> - ctx->ret_addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
> + if (addr >= df_cfg.dram_hole_base)
> + addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
>
> - return 0;
> + return addr;
> +}
> +
> +static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr)
> +{
> + if (!legacy_hole_en(ctx))
> + return addr;
> +
> + if (addr >= df_cfg.dram_hole_base)
> + addr -= (BIT_ULL(32) - df_cfg.dram_hole_base);
> +
> + return addr;
> }
>
> static u64 get_base_addr(struct addr_ctx *ctx)
> @@ -72,14 +83,16 @@ static u64 get_base_addr(struct addr_ctx *ctx)
> return base_addr << DF_DRAM_BASE_LIMIT_LSB;
> }
>
> -static int add_base_and_hole(struct addr_ctx *ctx)
> +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr)
> {
> - ctx->ret_addr += get_base_addr(ctx);
> -
> - if (add_legacy_hole(ctx))
> - return -EINVAL;
> + addr += get_base_addr(ctx);
> + return add_legacy_hole(ctx, addr);
> +}
>
> - return 0;
> +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr)
> +{
> + remove_legacy_hole(ctx, addr);
> + return addr - get_base_addr(ctx);
> }
>
> static bool late_hole_remove(struct addr_ctx *ctx)
> @@ -123,14 +136,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig
> if (denormalize_address(&ctx))
> return -EINVAL;
>
> - if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
> - return -EINVAL;
> + if (!late_hole_remove(&ctx))
> + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
>
> if (dehash_address(&ctx))
> return -EINVAL;
>
> - if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
> - return -EINVAL;
> + if (late_hole_remove(&ctx))
> + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
>
> if (addr_over_limit(&ctx))
> return -EINVAL;
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 1413c8ddc6c5..05b870fcb24e 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -236,6 +236,9 @@ int dehash_address(struct addr_ctx *ctx);
> unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
> unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
>
> +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
> +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
> +
> /*
> * Make a gap in @data that is @num_bits long starting at @bit_num.
> * e.g. data = 11111111'b

Besides the commit message notes, I think this looks good.

Reviewed-by: Yazen Ghannam <[email protected]>

Thanks,
Yazen

2024-04-03 14:26:58

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] RAS/AMD/ATL: Validate address map when information is gathered



On 3/27/24 14:52, John Allen wrote:
> The address map will not change during translation so all checks to
> validate the map can be moved to a single function that checks the map
> at the time the information is gathered.

There needs to be an command in the message (imperative mood).

"Validate maps earlier..." or similar.

>
> Signed-off-by: John Allen <[email protected]>
> ---
> v2:
> - New in v2.
> ---
> drivers/ras/amd/atl/dehash.c | 43 --------------
> drivers/ras/amd/atl/map.c | 105 +++++++++++++++++++++++++++++++++++
> 2 files changed, 105 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
> index 4ea46262c4f5..d4ee7ecabaee 100644
> --- a/drivers/ras/amd/atl/dehash.c
> +++ b/drivers/ras/amd/atl/dehash.c
> @@ -12,41 +12,10 @@
>
> #include "internal.h"
>
> -/*
> - * Verify the interleave bits are correct in the different interleaving
> - * settings.
> - *
> - * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
> - * respective interleaving is disabled.
> - */
> -static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> - u8 num_intlv_dies, u8 num_intlv_sockets)
> -{
> - if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
> - pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
> - return false;
> - }
> -
> - if (ctx->map.num_intlv_dies > num_intlv_dies) {
> - pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies);
> - return false;
> - }
> -
> - if (ctx->map.num_intlv_sockets > num_intlv_sockets) {
> - pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets);
> - return false;
> - }
> -
> - return true;
> -}
> -
> static int df2_dehash_addr(struct addr_ctx *ctx)
> {
> u8 hashed_bit, intlv_bit, intlv_bit_pos;
>
> - if (!map_bits_valid(ctx, 8, 9, 1, 1))
> - return -EINVAL;
> -
> intlv_bit_pos = ctx->map.intlv_bit_pos;
> intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr);
>
> @@ -67,9 +36,6 @@ static int df3_dehash_addr(struct addr_ctx *ctx)
> bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
> u8 hashed_bit, intlv_bit, intlv_bit_pos;
>
> - if (!map_bits_valid(ctx, 8, 9, 1, 1))
> - return -EINVAL;
> -
> hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl);
> hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl);
> hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl);
> @@ -171,9 +137,6 @@ static int df4_dehash_addr(struct addr_ctx *ctx)
> bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
> u8 hashed_bit, intlv_bit;
>
> - if (!map_bits_valid(ctx, 8, 8, 1, 2))
> - return -EINVAL;
> -
> hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
> hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
> hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
> @@ -247,9 +210,6 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
> u8 hashed_bit, intlv_bit;
> u64 rehash_vector;
>
> - if (!map_bits_valid(ctx, 8, 8, 1, 2))
> - return -EINVAL;
> -
> hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
> hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
> hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
> @@ -360,9 +320,6 @@ static int mi300_dehash_addr(struct addr_ctx *ctx)
> bool hashed_bit, intlv_bit, test_bit;
> u8 num_intlv_bits, base_bit, i;
>
> - if (!map_bits_valid(ctx, 8, 8, 4, 1))
> - return -EINVAL;
> -
> hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl);
> hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
> hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
> diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
> index 8b908e8d7495..c7772733a363 100644
> --- a/drivers/ras/amd/atl/map.c
> +++ b/drivers/ras/amd/atl/map.c
> @@ -642,6 +642,107 @@ static int get_global_map_data(struct addr_ctx *ctx)
> return 0;
> }
>
> +/*
> + * Verify the interleave bits are correct in the different interleaving
> + * settings.
> + *
> + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
> + * respective interleaving is disabled.
> + */
> +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,

Missing "static" keyword.

> + u8 num_intlv_dies, u8 num_intlv_sockets)
> +{
> + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
> + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
> + return false;
> + }
> +
> + if (ctx->map.num_intlv_dies > num_intlv_dies) {
> + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies);
> + return false;
> + }
> +
> + if (ctx->map.num_intlv_sockets > num_intlv_sockets) {
> + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets);
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static int validate_address_map(struct addr_ctx *ctx)
> +{
> + switch (ctx->map.intlv_mode) {
> + case DF2_2CHAN_HASH:
> + if (!map_bits_valid(ctx, 8, 9, 1, 1))
> + goto out;
> + break;
> +
> + case DF3_COD4_2CHAN_HASH:
> + case DF3_COD2_4CHAN_HASH:
> + case DF3_COD1_8CHAN_HASH:
> + if (!map_bits_valid(ctx, 8, 9, 1, 1))
> + goto out;
> + break;
> +
> + case DF4_NPS4_2CHAN_HASH:
> + case DF4_NPS2_4CHAN_HASH:
> + case DF4_NPS1_8CHAN_HASH:
> + if (!map_bits_valid(ctx, 8, 8, 1, 2))
> + goto out;
> + break;
> +
> + case DF4p5_NPS4_2CHAN_1K_HASH:
> + case DF4p5_NPS4_2CHAN_2K_HASH:
> + case DF4p5_NPS2_4CHAN_1K_HASH:
> + case DF4p5_NPS2_4CHAN_2K_HASH:
> + case DF4p5_NPS1_8CHAN_1K_HASH:
> + case DF4p5_NPS1_8CHAN_2K_HASH:
> + case DF4p5_NPS1_16CHAN_1K_HASH:
> + case DF4p5_NPS1_16CHAN_2K_HASH:
> + if (!map_bits_valid(ctx, 8, 8, 1, 2))
> + goto out;
> + break;
> +
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + if (ctx->map.num_intlv_sockets != 1 || !map_bits_valid(ctx, 8, 0, 1, 1))
> + goto out;
> + break;
> +
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + if (ctx->map.num_intlv_sockets < 2 || !map_bits_valid(ctx, 8, 0, 1, 2))
> + goto out;
> + break;

Please move the new DF4p5 np2 cases to the next patch. This patch can
then just be rearranging the existing code. And the next patch can add
new code.

> +
> + case MI3_HASH_8CHAN:
> + case MI3_HASH_16CHAN:
> + case MI3_HASH_32CHAN:
> + if (!map_bits_valid(ctx, 8, 8, 4, 1))
> + goto out;
> + break;
> +
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return -EINVAL;
> + }
> +
> + return 0;
> +
> +out:
> + atl_debug(ctx, "Inconsistent address map");
> + return -EINVAL;
> +}
> +
> static void dump_address_map(struct dram_addr_map *map)
> {
> u8 i;
> @@ -678,5 +779,9 @@ int get_address_map(struct addr_ctx *ctx)
>
> dump_address_map(&ctx->map);
>
> + ret = validate_address_map(ctx);
> + if (ret)
> + return ret;
> +
> return ret;
> }

Thanks,
Yazen

2024-04-03 15:23:08

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] RAS/AMD/ATL: Implement DF 4.5 NP2 denormalization



On 3/27/24 14:52, John Allen wrote:
> Unlike with previous Data Fabric versions, with Data Fabric 4.5, there

Highlight that this is just for non-power-of-2 cases.

> are bits of the system physical address that can't be reconstructed from

"...can't be fully reconstructed..."

> the normalized address. Using NPS0_24CHAN_1K_HASH as an example, the
> normalized address consists of bits [63:13] (divided by 3), bits
> [11:10], and bits [7:0] of the system physical address.
>
> In this case, the remainder from the divide by 3 and bits 8, 9, and 12

I think an example like this is best kept in the code comments. And the
commit message can have a more high-level description.

"Some bits will be indeterminate, i.e., they will have multiple possible
solutions."

> are missing. To determine the proper combination of missing system
> physical address bits, iterate through each possible combination of
> these bits, normalize the resulting system physical address, and compare
> to the original address that is being translated. If the addresses
> match, then the correct permutation of bits has been found.
>
> Signed-off-by: John Allen <[email protected]
> ---
> v2:
> - Move map validation to patch 3/4.
> ---
> drivers/ras/amd/atl/denormalize.c | 530 ++++++++++++++++++++++++++++++
> drivers/ras/amd/atl/internal.h | 40 +++
> 2 files changed, 570 insertions(+)
>
> diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
> index e279224288d6..b03bba851e14 100644
> --- a/drivers/ras/amd/atl/denormalize.c
> +++ b/drivers/ras/amd/atl/denormalize.c
> @@ -448,6 +448,105 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
> return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
> }
>
> +static u64 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
> + struct df4p5_denorm_ctx *denorm_ctx)
> +{
> + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
> + bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
> + u64 cs_id = 0;
> +
> + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
> + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
> + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
> + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
> +
> + hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
> + hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
> + hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
> + hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
> + hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
> + hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
> + hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
> + hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
> + hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
> + hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
> + hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
> + hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
> + hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
> + hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
> + hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
> + hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
> + hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
> + hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
> + cs_id %= denorm_ctx->mod_value;
> + cs_id <<= 2;
> + cs_id |= (hash_pa9 | (hash_pa12 << 1));
> + cs_id |= hash_pa8 << df_cfg.socket_id_shift;
> + break;
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
> + cs_id %= denorm_ctx->mod_value;
> + cs_id <<= 2;
> + cs_id |= (hash_pa12 | (hash_pa13 << 1));
> + cs_id |= hash_pa8 << df_cfg.socket_id_shift;
> + break;
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> + cs_id %= denorm_ctx->mod_value;
> + cs_id <<= 2;
> + cs_id |= (hash_pa8 | (hash_pa9 << 1));
> + break;
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
> + cs_id %= denorm_ctx->mod_value;
> + cs_id <<= 2;
> + cs_id |= (hash_pa8 | (hash_pa12 << 1));
> + break;
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> + cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
> + cs_id %= denorm_ctx->mod_value;
> + cs_id <<= 1;
> + cs_id |= hash_pa8;
> + break;
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> + cs_id %= denorm_ctx->mod_value;
> + cs_id <<= 1;
> + cs_id |= hash_pa8;
> + break;
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> + cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
> + cs_id %= denorm_ctx->mod_value;
> + break;
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
> + cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
> + cs_id %= denorm_ctx->mod_value;
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return 0;
> + }
> +

Using u64 type for cs_id makes sense to handle all the bit operations
above. But the final value should fit within a u16.

So I think this function should return a u16. And there should be a
final check here to make sure that temporary u64 value fits within a
u16. Otherwise, the math above could have a bug.

> + return cs_id;
> +}
> +
> static int denorm_addr_common(struct addr_ctx *ctx)
> {
> u64 denorm_addr;
> @@ -699,6 +798,424 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx)
> return 0;
> }
>
> +static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
> + u64 addr)
> +{
> + u64 temp_addr_a, temp_addr_b;
> +
> + temp_addr_a = 0;
> + temp_addr_b = 0;

Initialize these when defining.

> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
> + break;
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return 0;
> + }
> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
> + temp_addr_b <<= 10;
> + break;
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
> + temp_addr_b <<= 11;
> + break;
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
> + temp_addr_b <<= 10;
> + break;
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
> + temp_addr_b <<= 11;
> + break;
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
> + temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
> + temp_addr_b /= denorm_ctx->mod_value;
> + temp_addr_b <<= 10;
> + break;
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
> + temp_addr_b <<= 11;
> + break;
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
> + temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
> + temp_addr_b /= denorm_ctx->mod_value;
> + temp_addr_b <<= 10;
> + break;
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
> + temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
> + temp_addr_b /= denorm_ctx->mod_value;
> + temp_addr_b <<= 11;
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return 0;
> + }
> +
> + return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
> +}
> +
> +static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
> + struct df4p5_denorm_ctx *denorm_ctx)
> +{
> + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
> +
> + if (!denorm_ctx->rehash_vector)
> + return;
> +
> + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
> + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
> + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
> + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
> +
> + if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
> + hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
> + hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
> + hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
> + hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
> + hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
> + hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
> + denorm_ctx->current_spa ^= BIT_ULL(8);
> + }
> +
> + if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
> + hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
> + hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
> + hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
> + hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
> + hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
> + denorm_ctx->current_spa ^= BIT_ULL(9);
> + }
> +
> + if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
> + hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
> + hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
> + hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
> + hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
> + hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
> + denorm_ctx->current_spa ^= BIT_ULL(12);
> + }
> +
> + if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
> + hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
> + hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
> + hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
> + hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
> + hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
> +
> + if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
> + denorm_ctx->current_spa ^= BIT_ULL(13);
> + }
> +}
> +
> +static bool check_logical_coh_st_fabric_id(struct addr_ctx *ctx,

check->match

..to highlight that the check condition is a match.

> + struct df4p5_denorm_ctx *denorm_ctx)
> +{
> + unsigned int logical_coh_st_fabric_id;

Local variables don't need such descriptive names. Just use "id".

Also, denorm_ctx->coh_st_fabric_id is a u16. So the types should match
unless there's a reason to use something different.

> +
> + /*
> + * The logical CS fabric ID of the permutation must be calculated from the
> + * current SPA with the base and with the MMIO hole.
> + */
> + logical_coh_st_fabric_id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
> +
> + atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
> + atl_debug(ctx, " calculated fabric id = 0x%x\n", logical_coh_st_fabric_id);
> + atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id);
> +
> + if (denorm_ctx->coh_st_fabric_id != logical_coh_st_fabric_id)
> + return false;
> +
> + return true;

return denorm_ctx->coh_st_fabric_id == id;

> +}
> +
> +static bool check_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)

check->match

> +{
> + u64 current_spa_without_base = remove_base_and_hole(ctx, denorm_ctx->current_spa);
> + u64 norm_addr;

Just need one variable and reuse it.

u64 addr = remove_base_and_hole();

> +
> + /*
> + * The normalized address must be calculated with the current SPA without
> + * the base and without the MMIO hole.
> + */
> + norm_addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, current_spa_without_base);

addr = normalize...(..., addr);

> +
> + atl_debug(ctx, "Checking calculated normalized address:\n");
> + atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", norm_addr);
> + atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr);
> +
> + if (norm_addr != ctx->ret_addr)
> + return false;
> +
> + return true;

return ctx->ret_addr == addr;

> +}
> +
> +static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
> +{
> + u64 test_perm, temp_addr, denorm_addr, num_perms;
> + unsigned int dropped_remainder;
> +
> + denorm_ctx->div_addr *= denorm_ctx->mod_value;
> +
> + /*
> + * The high order bits of num_permutations represent the permutations
> + * of the dropped remainder. This will be either 0-3 or 0-5 depending
> + * on the interleave mode. The low order bits represent the
> + * permutations of other "lost" bits which will be any combination of
> + * 1, 2, or 3 bits depending on the interleave mode.
> + */
> + num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
> +
> + for (test_perm = 0; test_perm < num_perms; test_perm++) {
> + denorm_addr = denorm_ctx->base_denorm_addr;
> + dropped_remainder = test_perm >> denorm_ctx->perm_shift;
> + temp_addr = denorm_ctx->div_addr + dropped_remainder;
> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + denorm_addr |= temp_addr << 14;
> + break;
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + denorm_addr |= temp_addr << 13;
> + break;
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + denorm_addr |= temp_addr << 12;
> + break;
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
> + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
> + break;
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
> + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
> + break;
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
> + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return -EINVAL;
> + }
> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
> + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
> + break;
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
> + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
> + break;
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
> + break;
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
> + break;
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return -EINVAL;
> + }
> +
> + denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
> + recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
> +
> + atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
> + denorm_ctx->current_spa);
> +
> + if (!check_logical_coh_st_fabric_id(ctx, denorm_ctx))
> + continue;
> +
> + if (!check_norm_addr(ctx, denorm_ctx))
> + continue;
> +
> + if (denorm_ctx->resolved_spa == INVALID_SPA ||
> + denorm_ctx->current_spa > denorm_ctx->resolved_spa)
> + denorm_ctx->resolved_spa = denorm_ctx->current_spa;
> + }
> +
> + if (denorm_ctx->resolved_spa == INVALID_SPA) {
> + atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
> + ctx->ret_addr);
> + return -EINVAL;
> + }
> +
> + /* Return the resolved SPA without the base, without the MMIO hole */
> + ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
> +
> + return 0;
> +}
> +
> +static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
> +{
> + denorm_ctx->current_spa = INVALID_SPA;
> + denorm_ctx->resolved_spa = INVALID_SPA;
> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + denorm_ctx->perm_shift = 3;
> + denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
> + break;
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + denorm_ctx->perm_shift = 3;
> + denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
> + break;
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + denorm_ctx->perm_shift = 2;
> + denorm_ctx->rehash_vector = BIT(8);
> + break;
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + denorm_ctx->perm_shift = 2;
> + denorm_ctx->rehash_vector = BIT(8) | BIT(12);
> + break;
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + denorm_ctx->perm_shift = 1;
> + denorm_ctx->rehash_vector = BIT(8);
> + break;
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + denorm_ctx->perm_shift = 2;
> + denorm_ctx->rehash_vector = 0;
> + break;
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + denorm_ctx->perm_shift = 1;
> + denorm_ctx->rehash_vector = 0;
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return -EINVAL;
> + }
> +
> + denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
> +
> + switch (ctx->map.intlv_mode) {
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
> + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
> + break;
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
> + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
> + break;
> + default:
> + atl_debug_on_bad_intlv_mode(ctx);
> + return -EINVAL;
> + }
> +
> + if (ctx->map.num_intlv_chan % 3 == 0)
> + denorm_ctx->mod_value = 3;
> + else
> + denorm_ctx->mod_value = 5;
> +
> + denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
> +
> + atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
> + atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value);
> + atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift);
> + atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector);
> + atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr);
> + atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr);
> + atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id);
> +
> + return 0;
> +}
> +
> +/*
> + * For DF 4.5, parts of the physical address can be directly pulled from the
> + * normalized address. The exact bits will differ between interleave modes, but
> + * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
> + * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
> + * physical address.
> + *
> + * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
> + * and 12) from the normalized address. Additionally, when bits [63:13] are
> + * divided by 3, the remainder is dropped. Determine the proper combination of
> + * "lost" bits and dropped remainder by iterating through each possible
> + * permutation of these bits and then normalizing the generated system physical
> + * addresses. If the normalized address matches the address we are trying to
> + * translate, then we have found the correct permutation of bits.
> + */
> +static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
> +{
> + struct df4p5_denorm_ctx denorm_ctx;
> + int ret = 0;
> +
> + memset(&denorm_ctx, 0, sizeof(denorm_ctx));
> +
> + atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
> +
> + ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
> + if (ret)
> + return ret;
> +
> + return check_permutations(ctx, &denorm_ctx);
> +}
> +
> int denormalize_address(struct addr_ctx *ctx)
> {
> switch (ctx->map.intlv_mode) {
> @@ -710,6 +1227,19 @@ int denormalize_address(struct addr_ctx *ctx)
> case DF4_NPS2_5CHAN_HASH:
> case DF4_NPS1_10CHAN_HASH:
> return denorm_addr_df4_np2(ctx);
> + case DF4p5_NPS0_24CHAN_1K_HASH:
> + case DF4p5_NPS4_3CHAN_1K_HASH:
> + case DF4p5_NPS2_6CHAN_1K_HASH:
> + case DF4p5_NPS1_12CHAN_1K_HASH:
> + case DF4p5_NPS2_5CHAN_1K_HASH:
> + case DF4p5_NPS1_10CHAN_1K_HASH:
> + case DF4p5_NPS4_3CHAN_2K_HASH:
> + case DF4p5_NPS2_6CHAN_2K_HASH:
> + case DF4p5_NPS1_12CHAN_2K_HASH:
> + case DF4p5_NPS0_24CHAN_2K_HASH:
> + case DF4p5_NPS2_5CHAN_2K_HASH:
> + case DF4p5_NPS1_10CHAN_2K_HASH:
> + return denorm_addr_df4p5_np2(ctx);
> case DF3_6CHAN:
> return denorm_addr_df3_6chan(ctx);
> default:
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 05b870fcb24e..946e36c053c5 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -34,6 +34,8 @@
> #define DF_DRAM_BASE_LIMIT_LSB 28
> #define MI300_DRAM_LIMIT_LSB 20
>
> +#define INVALID_SPA ~0ULL
> +
> enum df_revisions {
> UNKNOWN,
> DF2,
> @@ -90,6 +92,44 @@ enum intlv_modes {
> DF4p5_NPS1_10CHAN_2K_HASH = 0x49,
> };
>
> +struct df4p5_denorm_ctx {
> + /* perm_shift: Indicates the number of "lost" bits. This will be 1, 2, or 3. */
> + u8 perm_shift;
> +
> + /* rehash_vector: A mask indicating the bits that need to be rehashed. */
> + u16 rehash_vector;
> +
> + /*
> + * mod_value: Represents the value that the high bits of the normalized
> + * address are divided by during normalization. This value will be 3
> + * for interleave modes with a number of channels divisible by 3 or the
> + * value will be 5 for interleave modes with a number of channels
> + * divisible by 5. Power-of-two interleave modes are handled
> + * separately.
> + */
> + u8 mod_value;
> +
> + /*
> + * base_denorm_addr: Represents the bits that can be directly pulled
> + * from the normalized address. In each case, pass through bits [7:0]
> + * of the normalized address. The other bits depend on the interleave
> + * bit position which will be bit 10 for 1K interleave stripe cases and
> + * bit 11 for 2K interleave stripe cases.
> + */
> + u64 base_denorm_addr;
> +
> + /*
> + * div_addr: Represents the high bits of the physical address that have
> + * been divided by the mod_value.
> + */
> + u64 div_addr;
> +
> + u64 current_spa;
> + u64 resolved_spa;
> +
> + u16 coh_st_fabric_id;
> +};
> +
> struct df_flags {
> __u8 legacy_ficaa : 1,
> socket_id_shift_quirk : 1,

Thanks,
Yazen