2024-03-14 16:36:12

by John Allen

[permalink] [raw]
Subject: [PATCH 0/4] RAS: ATL: DF 4.5 NP2 Denormalization

Implement non-power-of-two denormalization for Data Fabric 4.5 in the
AMD address translation library.

Tree: git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git
Base commit: bd17b7c34fadef645becde1245b9394f69f31702 (origin/edac-amd-atl)

John Allen (4):
RAS: ATL: Read DRAM hole base early
RAS: ATL: Expand helpers for adding and removing base and hole
RAS: ATL: Add map_bits_valid to header
RAS: ATL: Implement DF 4.5 NP2 denormalization

drivers/ras/amd/atl/core.c | 48 +--
drivers/ras/amd/atl/dehash.c | 2 +-
drivers/ras/amd/atl/denormalize.c | 530 ++++++++++++++++++++++++++++++
drivers/ras/amd/atl/internal.h | 48 +++
drivers/ras/amd/atl/map.c | 37 +++
drivers/ras/amd/atl/system.c | 21 ++
6 files changed, 662 insertions(+), 24 deletions(-)

--
2.34.1



2024-03-14 16:36:25

by John Allen

[permalink] [raw]
Subject: [PATCH 1/4] RAS: ATL: Read DRAM hole base early

Read DRAM hole base when constructing the address map as the value will
not change during translation.

Signed-off-by: John Allen <[email protected]>
---
drivers/ras/amd/atl/core.c | 15 ++-------------
drivers/ras/amd/atl/internal.h | 2 ++
drivers/ras/amd/atl/system.c | 21 +++++++++++++++++++++
3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 6dc4e06305f7..c1710d233adb 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -51,22 +51,11 @@ static bool legacy_hole_en(struct addr_ctx *ctx)

static int add_legacy_hole(struct addr_ctx *ctx)
{
- u32 dram_hole_base;
- u8 func = 0;
-
if (!legacy_hole_en(ctx))
return 0;

- if (df_cfg.rev >= DF4)
- func = 7;
-
- if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base))
- return -EINVAL;
-
- dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
-
- if (ctx->ret_addr >= dram_hole_base)
- ctx->ret_addr += (BIT_ULL(32) - dram_hole_base);
+ if (ctx->addr >= df_cfg.dram_hole_base)
+ ctx->addr += (BIT_ULL(32) - df_cfg.dram_hole_base);

return 0;
}
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 5de69e0bb0f9..1413c8ddc6c5 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -132,6 +132,8 @@ struct df_config {
/* Number of DRAM Address maps visible in a Coherent Station. */
u8 num_coh_st_maps;

+ u32 dram_hole_base;
+
/* Global flags to handle special cases. */
struct df_flags flags;
};
diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
index 701349e84942..6f6fe24dec81 100644
--- a/drivers/ras/amd/atl/system.c
+++ b/drivers/ras/amd/atl/system.c
@@ -223,6 +223,21 @@ static int determine_df_rev(void)
return -EINVAL;
}

+static int get_dram_hole_base(void)
+{
+ u8 func = 0;
+
+ if (df_cfg.rev >= DF4)
+ func = 7;
+
+ if (df_indirect_read_broadcast(0, func, 0x104, &df_cfg.dram_hole_base))
+ return -EINVAL;
+
+ df_cfg.dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
+
+ return 0;
+}
+
static void get_num_maps(void)
{
switch (df_cfg.rev) {
@@ -266,6 +281,7 @@ static void dump_df_cfg(void)

pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps);

+ pr_debug("dram_hole_base=%x", df_cfg.dram_hole_base);
pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa);
pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk);
}
@@ -282,6 +298,11 @@ int get_df_system_info(void)

get_num_maps();

+ if (get_dram_hole_base()) {
+ pr_warn("amd_atl: Failed to read DRAM hole base");
+ return -EINVAL;
+ }
+
dump_df_cfg();

return 0;
--
2.34.1


2024-03-14 16:36:43

by John Allen

[permalink] [raw]
Subject: [PATCH 2/4] RAS: ATL: Expand helpers for adding and removing base and hole

Data fabric 4.5 denormalization will need to frequently add and remove
the base and the legacy MMIO hole. Modify existing helpers to improve DF
4.5 denormalization flow and add helper to remove the base and hole.

Signed-off-by: John Allen <[email protected]>
---
drivers/ras/amd/atl/core.c | 43 ++++++++++++++++++++++------------
drivers/ras/amd/atl/internal.h | 3 +++
2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index c1710d233adb..cafdfc57d929 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
}

-static int add_legacy_hole(struct addr_ctx *ctx)
+static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr)
{
if (!legacy_hole_en(ctx))
- return 0;
+ return addr;

- if (ctx->addr >= df_cfg.dram_hole_base)
- ctx->addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
+ if (addr >= df_cfg.dram_hole_base)
+ addr += (BIT_ULL(32) - df_cfg.dram_hole_base);

- return 0;
+ return addr;
+}
+
+static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr)
+{
+ if (!legacy_hole_en(ctx))
+ return addr;
+
+ if (addr >= df_cfg.dram_hole_base)
+ addr -= (BIT_ULL(32) - df_cfg.dram_hole_base);
+
+ return addr;
}

static u64 get_base_addr(struct addr_ctx *ctx)
@@ -72,14 +83,16 @@ static u64 get_base_addr(struct addr_ctx *ctx)
return base_addr << DF_DRAM_BASE_LIMIT_LSB;
}

-static int add_base_and_hole(struct addr_ctx *ctx)
+u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr)
{
- ctx->ret_addr += get_base_addr(ctx);
-
- if (add_legacy_hole(ctx))
- return -EINVAL;
+ addr += get_base_addr(ctx);
+ return add_legacy_hole(ctx, addr);
+}

- return 0;
+u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr)
+{
+ addr -= get_base_addr(ctx);
+ return remove_legacy_hole(ctx, addr);
}

static bool late_hole_remove(struct addr_ctx *ctx)
@@ -123,14 +136,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig
if (denormalize_address(&ctx))
return -EINVAL;

- if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
- return -EINVAL;
+ if (!late_hole_remove(&ctx))
+ ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);

if (dehash_address(&ctx))
return -EINVAL;

- if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
- return -EINVAL;
+ if (late_hole_remove(&ctx))
+ ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);

if (addr_over_limit(&ctx))
return -EINVAL;
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 1413c8ddc6c5..05b870fcb24e 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -236,6 +236,9 @@ int dehash_address(struct addr_ctx *ctx);
unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);

+u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
+u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
+
/*
* Make a gap in @data that is @num_bits long starting at @bit_num.
* e.g. data = 11111111'b
--
2.34.1


2024-03-14 16:36:57

by John Allen

[permalink] [raw]
Subject: [PATCH 3/4] RAS: ATL: Add map_bits_valid to header

Make map_bits_valid available in the AMD ATL internal header as the
function can be used in other parts of the library.

Signed-off-by: John Allen <[email protected]>
---
drivers/ras/amd/atl/dehash.c | 2 +-
drivers/ras/amd/atl/internal.h | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
index 4ea46262c4f5..a20cf615b83a 100644
--- a/drivers/ras/amd/atl/dehash.c
+++ b/drivers/ras/amd/atl/dehash.c
@@ -19,7 +19,7 @@
* If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
* respective interleaving is disabled.
*/
-static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
+inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
u8 num_intlv_dies, u8 num_intlv_sockets)
{
if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 05b870fcb24e..4681449321de 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -239,6 +239,9 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);

+inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
+ u8 num_intlv_dies, u8 num_intlv_sockets);
+
/*
* Make a gap in @data that is @num_bits long starting at @bit_num.
* e.g. data = 11111111'b
--
2.34.1


2024-03-14 16:37:09

by John Allen

[permalink] [raw]
Subject: [PATCH 4/4] RAS: ATL: Implement DF 4.5 NP2 denormalization

Unlike with previous Data Fabric versions, with Data Fabric 4.5, there
are bits of the system physical address that can't be reconstructed from
the normalized address. Using NPS0_24CHAN_1K_HASH as an example, the
normalized address consists of bits [63:13] (divided by 3), bits
[11:10], and bits [7:0] of the system physical address.

In this case, the remainder from the divide by 3 and bits 8, 9, and 12
are missing. To determine the proper combination of missing system
physical address bits, iterate through each possible combination of
these bits, normalize the resulting system physical address, and compare
to the original address that is being translated. If the addresses
match, then the correct permutation of bits has been found.

Signed-off-by: John Allen <[email protected]>
---
drivers/ras/amd/atl/denormalize.c | 530 ++++++++++++++++++++++++++++++
drivers/ras/amd/atl/internal.h | 40 +++
drivers/ras/amd/atl/map.c | 37 +++
3 files changed, 607 insertions(+)

diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
index e279224288d6..b03bba851e14 100644
--- a/drivers/ras/amd/atl/denormalize.c
+++ b/drivers/ras/amd/atl/denormalize.c
@@ -448,6 +448,105 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
}

+static u64 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx,
+ struct df4p5_denorm_ctx *denorm_ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
+ bool hash_pa8, hash_pa9, hash_pa12, hash_pa13;
+ u64 cs_id = 0;
+
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
+ hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
+ hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
+ hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
+ hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
+ hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
+ hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
+ hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
+ hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa9 | (hash_pa12 << 1));
+ cs_id |= hash_pa8 << df_cfg.socket_id_shift;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa12 | (hash_pa13 << 1));
+ cs_id |= hash_pa8 << df_cfg.socket_id_shift;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa8 | (hash_pa9 << 1));
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 2;
+ cs_id |= (hash_pa8 | (hash_pa12 << 1));
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1);
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 1;
+ cs_id |= hash_pa8;
+ break;
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id %= denorm_ctx->mod_value;
+ cs_id <<= 1;
+ cs_id |= hash_pa8;
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa);
+ cs_id %= denorm_ctx->mod_value;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2;
+ cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1;
+ cs_id %= denorm_ctx->mod_value;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+
+ return cs_id;
+}
+
static int denorm_addr_common(struct addr_ctx *ctx)
{
u64 denorm_addr;
@@ -699,6 +798,424 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx)
return 0;
}

+static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx,
+ u64 addr)
+{
+ u64 temp_addr_a, temp_addr_b;
+
+ temp_addr_a = 0;
+ temp_addr_b = 0;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
+ temp_addr_b |= FIELD_GET(BIT_ULL(9), addr);
+ temp_addr_b /= denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2;
+ temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr);
+ temp_addr_b /= denorm_ctx->mod_value;
+ temp_addr_b <<= 10;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1;
+ temp_addr_b |= FIELD_GET(BIT_ULL(8), addr);
+ temp_addr_b /= denorm_ctx->mod_value;
+ temp_addr_b <<= 11;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return 0;
+ }
+
+ return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b;
+}
+
+static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx,
+ struct df4p5_denorm_ctx *denorm_ctx)
+{
+ bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit;
+
+ if (!denorm_ctx->rehash_vector)
+ return;
+
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(8)) {
+ hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(8);
+ }
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(9)) {
+ hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(9);
+ }
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(12)) {
+ hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(12);
+ }
+
+ if (denorm_ctx->rehash_vector & BIT_ULL(13)) {
+ hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa);
+ hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k;
+ hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M;
+ hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G;
+ hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T;
+
+ if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit)
+ denorm_ctx->current_spa ^= BIT_ULL(13);
+ }
+}
+
+static bool check_logical_coh_st_fabric_id(struct addr_ctx *ctx,
+ struct df4p5_denorm_ctx *denorm_ctx)
+{
+ unsigned int logical_coh_st_fabric_id;
+
+ /*
+ * The logical CS fabric ID of the permutation must be calculated from the
+ * current SPA with the base and with the MMIO hole.
+ */
+ logical_coh_st_fabric_id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx);
+
+ atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n");
+ atl_debug(ctx, " calculated fabric id = 0x%x\n", logical_coh_st_fabric_id);
+ atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id);
+
+ if (denorm_ctx->coh_st_fabric_id != logical_coh_st_fabric_id)
+ return false;
+
+ return true;
+}
+
+static bool check_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
+{
+ u64 current_spa_without_base = remove_base_and_hole(ctx, denorm_ctx->current_spa);
+ u64 norm_addr;
+
+ /*
+ * The normalized address must be calculated with the current SPA without
+ * the base and without the MMIO hole.
+ */
+ norm_addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, current_spa_without_base);
+
+ atl_debug(ctx, "Checking calculated normalized address:\n");
+ atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", norm_addr);
+ atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr);
+
+ if (norm_addr != ctx->ret_addr)
+ return false;
+
+ return true;
+}
+
+static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
+{
+ u64 test_perm, temp_addr, denorm_addr, num_perms;
+ unsigned int dropped_remainder;
+
+ denorm_ctx->div_addr *= denorm_ctx->mod_value;
+
+ /*
+ * The high order bits of num_permutations represent the permutations
+ * of the dropped remainder. This will be either 0-3 or 0-5 depending
+ * on the interleave mode. The low order bits represent the
+ * permutations of other "lost" bits which will be any combination of
+ * 1, 2, or 3 bits depending on the interleave mode.
+ */
+ num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift;
+
+ for (test_perm = 0; test_perm < num_perms; test_perm++) {
+ denorm_addr = denorm_ctx->base_denorm_addr;
+ dropped_remainder = test_perm >> denorm_ctx->perm_shift;
+ temp_addr = denorm_ctx->div_addr + dropped_remainder;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ denorm_addr |= temp_addr << 14;
+ break;
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ denorm_addr |= temp_addr << 13;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ denorm_addr |= temp_addr << 12;
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9;
+ denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8;
+ denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8;
+ denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
+ denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12;
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
+ denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13;
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9;
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr);
+ recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx);
+
+ atl_debug(ctx, "Checking potential system physical address 0x%016llx\n",
+ denorm_ctx->current_spa);
+
+ if (!check_logical_coh_st_fabric_id(ctx, denorm_ctx))
+ continue;
+
+ if (!check_norm_addr(ctx, denorm_ctx))
+ continue;
+
+ if (denorm_ctx->resolved_spa == INVALID_SPA ||
+ denorm_ctx->current_spa > denorm_ctx->resolved_spa)
+ denorm_ctx->resolved_spa = denorm_ctx->current_spa;
+ }
+
+ if (denorm_ctx->resolved_spa == INVALID_SPA) {
+ atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n",
+ ctx->ret_addr);
+ return -EINVAL;
+ }
+
+ /* Return the resolved SPA without the base, without the MMIO hole */
+ ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa);
+
+ return 0;
+}
+
+static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx)
+{
+ denorm_ctx->current_spa = INVALID_SPA;
+ denorm_ctx->resolved_spa = INVALID_SPA;
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ denorm_ctx->perm_shift = 3;
+ denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12);
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 3;
+ denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13);
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ denorm_ctx->perm_shift = 2;
+ denorm_ctx->rehash_vector = BIT(8);
+ break;
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 2;
+ denorm_ctx->rehash_vector = BIT(8) | BIT(12);
+ break;
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 1;
+ denorm_ctx->rehash_vector = BIT(8);
+ break;
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_ctx->perm_shift = 2;
+ denorm_ctx->rehash_vector = 0;
+ break;
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_ctx->perm_shift = 1;
+ denorm_ctx->rehash_vector = 0;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr);
+
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10;
+ denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr);
+ break;
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9;
+ denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr);
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ if (ctx->map.num_intlv_chan % 3 == 0)
+ denorm_ctx->mod_value = 3;
+ else
+ denorm_ctx->mod_value = 5;
+
+ denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx);
+
+ atl_debug(ctx, "Initialized df4p5_denorm_ctx:");
+ atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value);
+ atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift);
+ atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector);
+ atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr);
+ atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr);
+ atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id);
+
+ return 0;
+}
+
+/*
+ * For DF 4.5, parts of the physical address can be directly pulled from the
+ * normalized address. The exact bits will differ between interleave modes, but
+ * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of
+ * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system
+ * physical address.
+ *
+ * In this case, there is no way to reconstruct the missing bits (bits 8, 9,
+ * and 12) from the normalized address. Additionally, when bits [63:13] are
+ * divided by 3, the remainder is dropped. Determine the proper combination of
+ * "lost" bits and dropped remainder by iterating through each possible
+ * permutation of these bits and then normalizing the generated system physical
+ * addresses. If the normalized address matches the address we are trying to
+ * translate, then we have found the correct permutation of bits.
+ */
+static int denorm_addr_df4p5_np2(struct addr_ctx *ctx)
+{
+ struct df4p5_denorm_ctx denorm_ctx;
+ int ret = 0;
+
+ memset(&denorm_ctx, 0, sizeof(denorm_ctx));
+
+ atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr);
+
+ ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx);
+ if (ret)
+ return ret;
+
+ return check_permutations(ctx, &denorm_ctx);
+}
+
int denormalize_address(struct addr_ctx *ctx)
{
switch (ctx->map.intlv_mode) {
@@ -710,6 +1227,19 @@ int denormalize_address(struct addr_ctx *ctx)
case DF4_NPS2_5CHAN_HASH:
case DF4_NPS1_10CHAN_HASH:
return denorm_addr_df4_np2(ctx);
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ return denorm_addr_df4p5_np2(ctx);
case DF3_6CHAN:
return denorm_addr_df3_6chan(ctx);
default:
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 4681449321de..0c67ceedfc60 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -34,6 +34,8 @@
#define DF_DRAM_BASE_LIMIT_LSB 28
#define MI300_DRAM_LIMIT_LSB 20

+#define INVALID_SPA ~0ULL
+
enum df_revisions {
UNKNOWN,
DF2,
@@ -90,6 +92,44 @@ enum intlv_modes {
DF4p5_NPS1_10CHAN_2K_HASH = 0x49,
};

+struct df4p5_denorm_ctx {
+ /* perm_shift: Indicates the number of "lost" bits. This will be 1, 2, or 3. */
+ u8 perm_shift;
+
+ /* rehash_vector: A mask indicating the bits that need to be rehashed. */
+ u16 rehash_vector;
+
+ /*
+ * mod_value: Represents the value that the high bits of the normalized
+ * address are divided by during normalization. This value will be 3
+ * for interleave modes with a number of channels divisible by 3 or the
+ * value will be 5 for interleave modes with a number of channels
+ * divisible by 5. Power-of-two interleave modes are handled
+ * separately.
+ */
+ u8 mod_value;
+
+ /*
+ * base_denorm_addr: Represents the bits that can be directly pulled
+ * from the normalized address. In each case, pass through bits [7:0]
+ * of the normalized address. The other bits depend on the interleave
+ * bit position which will be bit 10 for 1K interleave stripe cases and
+ * bit 11 for 2K interleave stripe cases.
+ */
+ u64 base_denorm_addr;
+
+ /*
+ * div_addr: Represents the high bits of the physical address that have
+ * been divided by the mod_value.
+ */
+ u64 div_addr;
+
+ u64 current_spa;
+ u64 resolved_spa;
+
+ u16 coh_st_fabric_id;
+};
+
struct df_flags {
__u8 legacy_ficaa : 1,
socket_id_shift_quirk : 1,
diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
index 8b908e8d7495..1a32a52be942 100644
--- a/drivers/ras/amd/atl/map.c
+++ b/drivers/ras/amd/atl/map.c
@@ -642,6 +642,39 @@ static int get_global_map_data(struct addr_ctx *ctx)
return 0;
}

+static int validate_address_map(struct addr_ctx *ctx)
+{
+ switch (ctx->map.intlv_mode) {
+ case DF4p5_NPS0_24CHAN_1K_HASH:
+ case DF4p5_NPS0_24CHAN_2K_HASH:
+ if (ctx->map.num_intlv_sockets < 2 || !map_bits_valid(ctx, 8, 0, 1, 2))
+ goto out;
+ break;
+ case DF4p5_NPS1_12CHAN_1K_HASH:
+ case DF4p5_NPS1_12CHAN_2K_HASH:
+ case DF4p5_NPS2_6CHAN_1K_HASH:
+ case DF4p5_NPS2_6CHAN_2K_HASH:
+ case DF4p5_NPS4_3CHAN_1K_HASH:
+ case DF4p5_NPS4_3CHAN_2K_HASH:
+ case DF4p5_NPS1_10CHAN_1K_HASH:
+ case DF4p5_NPS1_10CHAN_2K_HASH:
+ case DF4p5_NPS2_5CHAN_1K_HASH:
+ case DF4p5_NPS2_5CHAN_2K_HASH:
+ if (ctx->map.num_intlv_sockets != 1 || !map_bits_valid(ctx, 8, 0, 1, 1))
+ goto out;
+ break;
+ default:
+ atl_debug_on_bad_intlv_mode(ctx);
+ return -EINVAL;
+ }
+
+ return 0;
+
+out:
+ atl_debug(ctx, "Inconsistent address map");
+ return -EINVAL;
+}
+
static void dump_address_map(struct dram_addr_map *map)
{
u8 i;
@@ -678,5 +711,9 @@ int get_address_map(struct addr_ctx *ctx)

dump_address_map(&ctx->map);

+ ret = validate_address_map(ctx);
+ if (ret)
+ return ret;
+
return ret;
}
--
2.34.1


2024-03-18 15:06:22

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH 1/4] RAS: ATL: Read DRAM hole base early

On 3/14/24 12:35, John Allen wrote:

Please use "RAS/AMD/ATL:" for the $SUBJECT prefix.

> Read DRAM hole base when constructing the address map as the value will
> not change during translation.

More specifically, the value will not change during run time.

>
> Signed-off-by: John Allen <[email protected]>
> ---
> drivers/ras/amd/atl/core.c | 15 ++-------------
> drivers/ras/amd/atl/internal.h | 2 ++
> drivers/ras/amd/atl/system.c | 21 +++++++++++++++++++++
> 3 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
> index 6dc4e06305f7..c1710d233adb 100644
> --- a/drivers/ras/amd/atl/core.c
> +++ b/drivers/ras/amd/atl/core.c
> @@ -51,22 +51,11 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
>
> static int add_legacy_hole(struct addr_ctx *ctx)
> {
> - u32 dram_hole_base;
> - u8 func = 0;
> -
> if (!legacy_hole_en(ctx))
> return 0;
>
> - if (df_cfg.rev >= DF4)
> - func = 7;
> -
> - if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base))
> - return -EINVAL;
> -
> - dram_hole_base &= DF_DRAM_HOLE_BASE_MASK;
> -
> - if (ctx->ret_addr >= dram_hole_base)
> - ctx->ret_addr += (BIT_ULL(32) - dram_hole_base);
> + if (ctx->addr >= df_cfg.dram_hole_base)
> + ctx->addr += (BIT_ULL(32) - df_cfg.dram_hole_base);

There is a compilation error here. Please make sure to build each patch
individually.

Thanks,
Yazen

2024-03-18 15:29:08

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH 2/4] RAS: ATL: Expand helpers for adding and removing base and hole

On 3/14/24 12:35, John Allen wrote:
> Data fabric 4.5 denormalization will need to frequently add and remove

More specifically, the non-power-of-2 cases will need this.

> the base and the legacy MMIO hole. Modify existing helpers to improve DF
> 4.5 denormalization flow and add helper to remove the base and hole.

Please write the what/context, why/issue, and how/fix information as
separate paragraphs even if they're just a single sentence each. I think
this helps to find the details more easily.

>
> Signed-off-by: John Allen <[email protected]>
> ---
> drivers/ras/amd/atl/core.c | 43 ++++++++++++++++++++++------------
> drivers/ras/amd/atl/internal.h | 3 +++
> 2 files changed, 31 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
> index c1710d233adb..cafdfc57d929 100644
> --- a/drivers/ras/amd/atl/core.c
> +++ b/drivers/ras/amd/atl/core.c
> @@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
> return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
> }
>
> -static int add_legacy_hole(struct addr_ctx *ctx)
> +static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr)
> {
> if (!legacy_hole_en(ctx))
> - return 0;
> + return addr;
>
> - if (ctx->addr >= df_cfg.dram_hole_base)
> - ctx->addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
> + if (addr >= df_cfg.dram_hole_base)
> + addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
>
> - return 0;
> + return addr;
> +}
> +
> +static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr)
> +{
> + if (!legacy_hole_en(ctx))
> + return addr;
> +
> + if (addr >= df_cfg.dram_hole_base)
> + addr -= (BIT_ULL(32) - df_cfg.dram_hole_base);
> +
> + return addr;
> }
>
> static u64 get_base_addr(struct addr_ctx *ctx)
> @@ -72,14 +83,16 @@ static u64 get_base_addr(struct addr_ctx *ctx)
> return base_addr << DF_DRAM_BASE_LIMIT_LSB;
> }
>
> -static int add_base_and_hole(struct addr_ctx *ctx)
> +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr)
> {
> - ctx->ret_addr += get_base_addr(ctx);
> -
> - if (add_legacy_hole(ctx))
> - return -EINVAL;
> + addr += get_base_addr(ctx);
> + return add_legacy_hole(ctx, addr);
> +}
>
> - return 0;
> +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr)
> +{
> + addr -= get_base_addr(ctx);
> + return remove_legacy_hole(ctx, addr);

This should be the inverse of the "add" operation, I think. So remove
the legacy hole first, then remove the base address.

> }
>
> static bool late_hole_remove(struct addr_ctx *ctx)
> @@ -123,14 +136,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig
> if (denormalize_address(&ctx))
> return -EINVAL;
>
> - if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
> - return -EINVAL;
> + if (!late_hole_remove(&ctx))
> + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
>
> if (dehash_address(&ctx))
> return -EINVAL;
>
> - if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
> - return -EINVAL;
> + if (late_hole_remove(&ctx))
> + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
>
> if (addr_over_limit(&ctx))
> return -EINVAL;
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 1413c8ddc6c5..05b870fcb24e 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -236,6 +236,9 @@ int dehash_address(struct addr_ctx *ctx);
> unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
> unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
>
> +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
> +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);

remove_base_and_hole() is only used in denormalize.c, correct? So why
not define it there as static? Other than trying to keep the code
together and symmetrical, I mean.

Thanks,
Yazen

2024-03-18 15:47:33

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH 3/4] RAS: ATL: Add map_bits_valid to header

On 3/14/24 12:35, John Allen wrote:
> Make map_bits_valid available in the AMD ATL internal header as the
> function can be used in other parts of the library.
>
> Signed-off-by: John Allen <[email protected]>
> ---
> drivers/ras/amd/atl/dehash.c | 2 +-
> drivers/ras/amd/atl/internal.h | 3 +++
> 2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
> index 4ea46262c4f5..a20cf615b83a 100644
> --- a/drivers/ras/amd/atl/dehash.c
> +++ b/drivers/ras/amd/atl/dehash.c
> @@ -19,7 +19,7 @@
> * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
> * respective interleaving is disabled.
> */
> -static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> u8 num_intlv_dies, u8 num_intlv_sockets)
> {
> if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> index 05b870fcb24e..4681449321de 100644
> --- a/drivers/ras/amd/atl/internal.h
> +++ b/drivers/ras/amd/atl/internal.h
> @@ -239,6 +239,9 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
> u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
> u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
>
> +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> + u8 num_intlv_dies, u8 num_intlv_sockets);
> +
> /*
> * Make a gap in @data that is @num_bits long starting at @bit_num.
> * e.g. data = 11111111'b

Ultimately, the maps should be validated as soon as they are gathered. I
figured we would do that later. But that would wipe out this change.
And, after looking at dehash.c again, map_bits_valid() isn't used in too
many places right now.

So I think validate_address_map() from the following patch should be
done for all modes first. That way we don't need to add and then remove
this function from the header.

Thanks,
Yazen

2024-03-25 19:35:20

by John Allen

[permalink] [raw]
Subject: Re: [PATCH 2/4] RAS: ATL: Expand helpers for adding and removing base and hole

On Mon, Mar 18, 2024 at 11:28:48AM -0400, Yazen Ghannam wrote:
> On 3/14/24 12:35, John Allen wrote:
> > Data fabric 4.5 denormalization will need to frequently add and remove
>
> More specifically, the non-power-of-2 cases will need this.
>
> > the base and the legacy MMIO hole. Modify existing helpers to improve DF
> > 4.5 denormalization flow and add helper to remove the base and hole.
>
> Please write the what/context, why/issue, and how/fix information as
> separate paragraphs even if they're just a single sentence each. I think
> this helps to find the details more easily.
>
> >
> > Signed-off-by: John Allen <[email protected]>
> > ---
> > drivers/ras/amd/atl/core.c | 43 ++++++++++++++++++++++------------
> > drivers/ras/amd/atl/internal.h | 3 +++
> > 2 files changed, 31 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
> > index c1710d233adb..cafdfc57d929 100644
> > --- a/drivers/ras/amd/atl/core.c
> > +++ b/drivers/ras/amd/atl/core.c
> > @@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
> > return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
> > }
> > -static int add_legacy_hole(struct addr_ctx *ctx)
> > +static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr)
> > {
> > if (!legacy_hole_en(ctx))
> > - return 0;
> > + return addr;
> > - if (ctx->addr >= df_cfg.dram_hole_base)
> > - ctx->addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
> > + if (addr >= df_cfg.dram_hole_base)
> > + addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
> > - return 0;
> > + return addr;
> > +}
> > +
> > +static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr)
> > +{
> > + if (!legacy_hole_en(ctx))
> > + return addr;
> > +
> > + if (addr >= df_cfg.dram_hole_base)
> > + addr -= (BIT_ULL(32) - df_cfg.dram_hole_base);
> > +
> > + return addr;
> > }
> > static u64 get_base_addr(struct addr_ctx *ctx)
> > @@ -72,14 +83,16 @@ static u64 get_base_addr(struct addr_ctx *ctx)
> > return base_addr << DF_DRAM_BASE_LIMIT_LSB;
> > }
> > -static int add_base_and_hole(struct addr_ctx *ctx)
> > +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr)
> > {
> > - ctx->ret_addr += get_base_addr(ctx);
> > -
> > - if (add_legacy_hole(ctx))
> > - return -EINVAL;
> > + addr += get_base_addr(ctx);
> > + return add_legacy_hole(ctx, addr);
> > +}
> > - return 0;
> > +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr)
> > +{
> > + addr -= get_base_addr(ctx);
> > + return remove_legacy_hole(ctx, addr);
>
> This should be the inverse of the "add" operation, I think. So remove
> the legacy hole first, then remove the base address.
>
> > }
> > static bool late_hole_remove(struct addr_ctx *ctx)
> > @@ -123,14 +136,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig
> > if (denormalize_address(&ctx))
> > return -EINVAL;
> > - if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
> > - return -EINVAL;
> > + if (!late_hole_remove(&ctx))
> > + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
> > if (dehash_address(&ctx))
> > return -EINVAL;
> > - if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
> > - return -EINVAL;
> > + if (late_hole_remove(&ctx))
> > + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
> > if (addr_over_limit(&ctx))
> > return -EINVAL;
> > diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> > index 1413c8ddc6c5..05b870fcb24e 100644
> > --- a/drivers/ras/amd/atl/internal.h
> > +++ b/drivers/ras/amd/atl/internal.h
> > @@ -236,6 +236,9 @@ int dehash_address(struct addr_ctx *ctx);
> > unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
> > unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
> > +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
> > +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
>
> remove_base_and_hole() is only used in denormalize.c, correct? So why
> not define it there as static? Other than trying to keep the code
> together and symmetrical, I mean.

In addition to keeping the two inverse functions together,
remove_base_and_hole depends on other functions in core.c. So if we
don't expose remove_base_and_hole in the header, then we would need to
expose get_base_addr and remove_legacy_hole in the header.
Alternatively, we could move remove_legacy_hole to denormalize.c and
expose get_base_addr and legacy_hole_en in the header instead. So
exposing one function that's the inverse of the other just looks better
to me than exposing two.

Thanks,
John

2024-03-25 20:00:36

by John Allen

[permalink] [raw]
Subject: Re: [PATCH 3/4] RAS: ATL: Add map_bits_valid to header

On Mon, Mar 18, 2024 at 11:46:39AM -0400, Yazen Ghannam wrote:
> On 3/14/24 12:35, John Allen wrote:
> > Make map_bits_valid available in the AMD ATL internal header as the
> > function can be used in other parts of the library.
> >
> > Signed-off-by: John Allen <[email protected]>
> > ---
> > drivers/ras/amd/atl/dehash.c | 2 +-
> > drivers/ras/amd/atl/internal.h | 3 +++
> > 2 files changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
> > index 4ea46262c4f5..a20cf615b83a 100644
> > --- a/drivers/ras/amd/atl/dehash.c
> > +++ b/drivers/ras/amd/atl/dehash.c
> > @@ -19,7 +19,7 @@
> > * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
> > * respective interleaving is disabled.
> > */
> > -static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> > +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> > u8 num_intlv_dies, u8 num_intlv_sockets)
> > {
> > if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
> > diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
> > index 05b870fcb24e..4681449321de 100644
> > --- a/drivers/ras/amd/atl/internal.h
> > +++ b/drivers/ras/amd/atl/internal.h
> > @@ -239,6 +239,9 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
> > u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
> > u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
> > +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
> > + u8 num_intlv_dies, u8 num_intlv_sockets);
> > +
> > /*
> > * Make a gap in @data that is @num_bits long starting at @bit_num.
> > * e.g. data = 11111111'b
>
> Ultimately, the maps should be validated as soon as they are gathered. I
> figured we would do that later. But that would wipe out this change.
> And, after looking at dehash.c again, map_bits_valid() isn't used in too
> many places right now.
>
> So I think validate_address_map() from the following patch should be
> done for all modes first. That way we don't need to add and then remove
> this function from the header.

I'm not sure I understand. Are you saying that we should just move the
map_bits_valid function to map.c and then make the map_bits_valid calls
that are currently in dehash.c to validate_address_map?

Thanks,
John

2024-03-26 13:48:32

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH 3/4] RAS: ATL: Add map_bits_valid to header



On 3/25/24 15:57, John Allen wrote:
> On Mon, Mar 18, 2024 at 11:46:39AM -0400, Yazen Ghannam wrote:
>> On 3/14/24 12:35, John Allen wrote:
>>> Make map_bits_valid available in the AMD ATL internal header as the
>>> function can be used in other parts of the library.
>>>
>>> Signed-off-by: John Allen <[email protected]>
>>> ---
>>> drivers/ras/amd/atl/dehash.c | 2 +-
>>> drivers/ras/amd/atl/internal.h | 3 +++
>>> 2 files changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
>>> index 4ea46262c4f5..a20cf615b83a 100644
>>> --- a/drivers/ras/amd/atl/dehash.c
>>> +++ b/drivers/ras/amd/atl/dehash.c
>>> @@ -19,7 +19,7 @@
>>> * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
>>> * respective interleaving is disabled.
>>> */
>>> -static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
>>> +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
>>> u8 num_intlv_dies, u8 num_intlv_sockets)
>>> {
>>> if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
>>> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
>>> index 05b870fcb24e..4681449321de 100644
>>> --- a/drivers/ras/amd/atl/internal.h
>>> +++ b/drivers/ras/amd/atl/internal.h
>>> @@ -239,6 +239,9 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
>>> u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
>>> u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
>>> +inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
>>> + u8 num_intlv_dies, u8 num_intlv_sockets);
>>> +
>>> /*
>>> * Make a gap in @data that is @num_bits long starting at @bit_num.
>>> * e.g. data = 11111111'b
>>
>> Ultimately, the maps should be validated as soon as they are gathered. I
>> figured we would do that later. But that would wipe out this change.
>> And, after looking at dehash.c again, map_bits_valid() isn't used in too
>> many places right now.
>>
>> So I think validate_address_map() from the following patch should be
>> done for all modes first. That way we don't need to add and then remove
>> this function from the header.
>
> I'm not sure I understand. Are you saying that we should just move the
> map_bits_valid function to map.c and then make the map_bits_valid calls
> that are currently in dehash.c to validate_address_map?
>

I mean that the maps should be validated once as soon as they are
gathered (in map.c). This would happen before we get to dehash
functions. So we don't need any map valid checks in dehash.c.

Thanks,
Yazen

2024-03-26 14:17:48

by Yazen Ghannam

[permalink] [raw]
Subject: Re: [PATCH 2/4] RAS: ATL: Expand helpers for adding and removing base and hole

On 3/25/24 15:27, John Allen wrote:
> On Mon, Mar 18, 2024 at 11:28:48AM -0400, Yazen Ghannam wrote:
>> On 3/14/24 12:35, John Allen wrote:
>>> Data fabric 4.5 denormalization will need to frequently add and remove
>>
>> More specifically, the non-power-of-2 cases will need this.
>>
>>> the base and the legacy MMIO hole. Modify existing helpers to improve DF
>>> 4.5 denormalization flow and add helper to remove the base and hole.
>>
>> Please write the what/context, why/issue, and how/fix information as
>> separate paragraphs even if they're just a single sentence each. I think
>> this helps to find the details more easily.
>>
>>>
>>> Signed-off-by: John Allen <[email protected]>
>>> ---
>>> drivers/ras/amd/atl/core.c | 43 ++++++++++++++++++++++------------
>>> drivers/ras/amd/atl/internal.h | 3 +++
>>> 2 files changed, 31 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
>>> index c1710d233adb..cafdfc57d929 100644
>>> --- a/drivers/ras/amd/atl/core.c
>>> +++ b/drivers/ras/amd/atl/core.c
>>> @@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx)
>>> return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg);
>>> }
>>> -static int add_legacy_hole(struct addr_ctx *ctx)
>>> +static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr)
>>> {
>>> if (!legacy_hole_en(ctx))
>>> - return 0;
>>> + return addr;
>>> - if (ctx->addr >= df_cfg.dram_hole_base)
>>> - ctx->addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
>>> + if (addr >= df_cfg.dram_hole_base)
>>> + addr += (BIT_ULL(32) - df_cfg.dram_hole_base);
>>> - return 0;
>>> + return addr;
>>> +}
>>> +
>>> +static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr)
>>> +{
>>> + if (!legacy_hole_en(ctx))
>>> + return addr;
>>> +
>>> + if (addr >= df_cfg.dram_hole_base)
>>> + addr -= (BIT_ULL(32) - df_cfg.dram_hole_base);
>>> +
>>> + return addr;
>>> }
>>> static u64 get_base_addr(struct addr_ctx *ctx)
>>> @@ -72,14 +83,16 @@ static u64 get_base_addr(struct addr_ctx *ctx)
>>> return base_addr << DF_DRAM_BASE_LIMIT_LSB;
>>> }
>>> -static int add_base_and_hole(struct addr_ctx *ctx)
>>> +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr)
>>> {
>>> - ctx->ret_addr += get_base_addr(ctx);
>>> -
>>> - if (add_legacy_hole(ctx))
>>> - return -EINVAL;
>>> + addr += get_base_addr(ctx);
>>> + return add_legacy_hole(ctx, addr);
>>> +}
>>> - return 0;
>>> +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr)
>>> +{
>>> + addr -= get_base_addr(ctx);
>>> + return remove_legacy_hole(ctx, addr);
>>
>> This should be the inverse of the "add" operation, I think. So remove
>> the legacy hole first, then remove the base address.
>>
>>> }
>>> static bool late_hole_remove(struct addr_ctx *ctx)
>>> @@ -123,14 +136,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig
>>> if (denormalize_address(&ctx))
>>> return -EINVAL;
>>> - if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx))
>>> - return -EINVAL;
>>> + if (!late_hole_remove(&ctx))
>>> + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
>>> if (dehash_address(&ctx))
>>> return -EINVAL;
>>> - if (late_hole_remove(&ctx) && add_base_and_hole(&ctx))
>>> - return -EINVAL;
>>> + if (late_hole_remove(&ctx))
>>> + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr);
>>> if (addr_over_limit(&ctx))
>>> return -EINVAL;
>>> diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
>>> index 1413c8ddc6c5..05b870fcb24e 100644
>>> --- a/drivers/ras/amd/atl/internal.h
>>> +++ b/drivers/ras/amd/atl/internal.h
>>> @@ -236,6 +236,9 @@ int dehash_address(struct addr_ctx *ctx);
>>> unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
>>> unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
>>> +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
>>> +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
>>
>> remove_base_and_hole() is only used in denormalize.c, correct? So why
>> not define it there as static? Other than trying to keep the code
>> together and symmetrical, I mean.
>
> In addition to keeping the two inverse functions together,
> remove_base_and_hole depends on other functions in core.c. So if we
> don't expose remove_base_and_hole in the header, then we would need to
> expose get_base_addr and remove_legacy_hole in the header.
> Alternatively, we could move remove_legacy_hole to denormalize.c and
> expose get_base_addr and legacy_hole_en in the header instead. So
> exposing one function that's the inverse of the other just looks better
> to me than exposing two.
>

Right, fair point. So what you have is good then.

Thanks,
Yazen