2024-04-07 12:39:53

by yunhui cui

[permalink] [raw]
Subject: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

When the type and level information of this_leaf cannot be obtained
from arch, cacheinfo is completely filled in with the content of PPTT.

Signed-off-by: Yunhui Cui <[email protected]>
---
drivers/acpi/pptt.c | 135 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 124 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index a35dd0e41c27..6c54fc8e3039 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -21,6 +21,9 @@
#include <linux/cacheinfo.h>
#include <acpi/processor.h>

+void acpi_fill_cacheinfo(struct acpi_pptt_cache *cache, struct acpi_table_header *table,
+ int cpu, int level, int *index);
+
static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr,
u32 pptt_ref)
{
@@ -77,6 +80,18 @@ static inline bool acpi_pptt_match_type(int table_type, int type)
table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
}

+static inline u32 get_cache_id(struct acpi_pptt_cache *cache)
+{
+ struct acpi_pptt_cache_v1 *cache_v1;
+
+ if (cache->flags & ACPI_PPTT_CACHE_ID_VALID) {
+ cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
+ cache, sizeof(struct acpi_pptt_cache));
+ return cache_v1->cache_id;
+ }
+ return 0;
+}
+
/**
* acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
* @table_hdr: Pointer to the head of the PPTT table
@@ -104,7 +119,7 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
unsigned int *split_levels,
struct acpi_subtable_header *res,
struct acpi_pptt_cache **found,
- unsigned int level, int type)
+ unsigned int level, int type, int cpu, int *index)
{
struct acpi_pptt_cache *cache;

@@ -125,7 +140,7 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
acpi_pptt_match_type(cache->attributes, ACPI_PPTT_CACHE_TYPE_INSTR)))
*split_levels = local_level;

- if (local_level == level &&
+ if (level && local_level == level &&
acpi_pptt_match_type(cache->attributes, type)) {
if (*found != NULL && cache != *found)
pr_warn("Found duplicate cache level/type unable to determine uniqueness\n");
@@ -137,7 +152,9 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
* to verify that we don't find a duplicate
* cache node.
*/
- }
+ } else
+ acpi_fill_cacheinfo(cache, table_hdr, cpu, local_level, index);
+
cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
}
return local_level;
@@ -147,7 +164,7 @@ static struct acpi_pptt_cache *
acpi_find_cache_level(struct acpi_table_header *table_hdr,
struct acpi_pptt_processor *cpu_node,
unsigned int *starting_level, unsigned int *split_levels,
- unsigned int level, int type)
+ unsigned int level, int type, int cpu, int *index)
{
struct acpi_subtable_header *res;
unsigned int number_of_levels = *starting_level;
@@ -161,7 +178,8 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr,

local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
split_levels, res, &ret,
- level, type);
+ level, type, cpu, index);
+
/*
* we are looking for the max depth. Since its potentially
* possible for a given node to have resources with differing
@@ -197,7 +215,7 @@ static void acpi_count_levels(struct acpi_table_header *table_hdr,
unsigned int *levels, unsigned int *split_levels)
{
do {
- acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0);
+ acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0, 0, NULL);
cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
} while (cpu_node);
}
@@ -316,6 +334,7 @@ static u8 acpi_cache_type(enum cache_type type)
}

static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr,
+ int cpu,
u32 acpi_cpu_id,
enum cache_type type,
unsigned int level,
@@ -325,6 +344,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
struct acpi_pptt_cache *found = NULL;
struct acpi_pptt_processor *cpu_node;
u8 acpi_type = acpi_cache_type(type);
+ int index = 0;

pr_debug("Looking for CPU %d's level %u cache type %d\n",
acpi_cpu_id, level, acpi_type);
@@ -333,7 +353,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta

while (cpu_node && !found) {
found = acpi_find_cache_level(table_hdr, cpu_node,
- &total_levels, NULL, level, acpi_type);
+ &total_levels, NULL, level, acpi_type, cpu, &index);
*node = cpu_node;
cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
}
@@ -406,8 +426,14 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
* specified in PPTT.
*/
if (this_leaf->type == CACHE_TYPE_NOCACHE &&
- found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID)
- this_leaf->type = CACHE_TYPE_UNIFIED;
+ found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) {
+ if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_DATA))
+ this_leaf->type = CACHE_TYPE_DATA;
+ if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_INSTR))
+ this_leaf->type = CACHE_TYPE_INST;
+ if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_UNIFIED))
+ this_leaf->type = CACHE_TYPE_UNIFIED;
+ }

if (revision >= 3 && (found_cache->flags & ACPI_PPTT_CACHE_ID_VALID)) {
found_cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
@@ -417,19 +443,106 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
}
}

+static bool cache_is_filled_id(struct acpi_pptt_cache *cache, int cpu)
+{
+ u32 id = get_cache_id(cache);
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo *this_leaf;
+ int index = 0;
+
+ while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
+ this_leaf = this_cpu_ci->info_list + index;
+ if (this_leaf->id == id)
+ return true;
+ index++;
+ }
+ return false;
+}
+
+static bool cache_is_filled_content(struct acpi_pptt_cache *cache,
+ struct acpi_table_header *table,
+ int cpu, int level, u8 revision)
+{
+ struct acpi_pptt_processor *cpu_node;
+ struct cacheinfo *this_leaf, *tleaf;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo tmp_leaf = {0};
+ int index = 0;
+
+ cpu_node = acpi_find_processor_node(table, get_acpi_id_for_cpu(cpu));
+ tleaf = &tmp_leaf;
+ tleaf->level = level;
+
+ while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
+ this_leaf = this_cpu_ci->info_list + index;
+ update_cache_properties(tleaf, cache,
+ ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)),
+ revision);
+ if (!memcmp(this_leaf, tleaf, sizeof(struct cacheinfo)))
+ return true;
+ index++;
+ }
+ return false;
+}
+
+static bool cache_is_filled(struct acpi_pptt_cache *cache, struct acpi_table_header *table,
+ int cpu, int level)
+{
+ u8 revision = table->revision;
+
+ /*
+ * If revision >= 3, compare the cacheid directly,
+ * otherwise compare the entire contents of the cache.
+ */
+ if (revision >= 3)
+ return cache_is_filled_id(cache, cpu);
+ else
+ return cache_is_filled_content(cache, table, cpu, level, revision);
+}
+
+void acpi_fill_cacheinfo(struct acpi_pptt_cache *cache,
+ struct acpi_table_header *table,
+ int cpu, int level, int *index)
+{
+ struct cacheinfo *this_leaf;
+ struct acpi_pptt_processor *cpu_node;
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+
+ if (!index)
+ return;
+
+ cpu_node = acpi_find_processor_node(table, get_acpi_id_for_cpu(cpu));
+ this_leaf = this_cpu_ci->info_list + *index;
+ if (this_leaf) {
+ this_leaf->level = level;
+ if (cache_is_filled(cache, table, cpu, level))
+ return;
+ update_cache_properties(this_leaf, cache,
+ ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node,
+ table)),
+ table->revision);
+ *index += 1;
+ }
+}
+
static void cache_setup_acpi_cpu(struct acpi_table_header *table,
unsigned int cpu)
{
struct acpi_pptt_cache *found_cache;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
- struct cacheinfo *this_leaf;
+ struct cacheinfo *this_leaf = this_cpu_ci->info_list;
unsigned int index = 0;
struct acpi_pptt_processor *cpu_node = NULL;

+ if (!this_leaf->type && !this_leaf->level) {
+ acpi_find_cache_node(table, acpi_cpu_id, cpu, 0, 0, &cpu_node);
+ return;
+ }
+
while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
this_leaf = this_cpu_ci->info_list + index;
- found_cache = acpi_find_cache_node(table, acpi_cpu_id,
+ found_cache = acpi_find_cache_node(table, acpi_cpu_id, cpu,
this_leaf->type,
this_leaf->level,
&cpu_node);
--
2.20.1



2024-04-07 12:40:07

by yunhui cui

[permalink] [raw]
Subject: [PATCH 2/2] RISC-V: Select ACPI PPTT drivers

RSIC-V currently does not have a set of registers similar to ARM64
that describe cache-related attributes. In order to make RISC-V
cacheinfo normally supported by ACPI, through the optimization of
pptt.c, RISC-V can build cacheinfo through the ACPI PPTT table.

Signed-off-by: Yunhui Cui <[email protected]>
---
arch/riscv/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8f10a2fb5f86..cc516c12cb92 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -14,6 +14,7 @@ config RISCV
def_bool y
select ACPI_GENERIC_GSI if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+ select ACPI_PPTT if ACPI
select ARCH_DMA_DEFAULT_COHERENT
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
--
2.20.1


2024-04-09 06:11:31

by yunhui cui

[permalink] [raw]
Subject: Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Added committers of pptt.c. Could you give some comments? thanks.

On Sun, Apr 7, 2024 at 8:39 PM Yunhui Cui <[email protected]> wrote:
>
> When the type and level information of this_leaf cannot be obtained
> from arch, cacheinfo is completely filled in with the content of PPTT.
>
> Signed-off-by: Yunhui Cui <[email protected]>
> ---
> drivers/acpi/pptt.c | 135 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 124 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
> index a35dd0e41c27..6c54fc8e3039 100644
> --- a/drivers/acpi/pptt.c
> +++ b/drivers/acpi/pptt.c
> @@ -21,6 +21,9 @@
> #include <linux/cacheinfo.h>
> #include <acpi/processor.h>
>
> +void acpi_fill_cacheinfo(struct acpi_pptt_cache *cache, struct acpi_table_header *table,
> + int cpu, int level, int *index);
> +
> static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr,
> u32 pptt_ref)
> {
> @@ -77,6 +80,18 @@ static inline bool acpi_pptt_match_type(int table_type, int type)
> table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
> }
>
> +static inline u32 get_cache_id(struct acpi_pptt_cache *cache)
> +{
> + struct acpi_pptt_cache_v1 *cache_v1;
> +
> + if (cache->flags & ACPI_PPTT_CACHE_ID_VALID) {
> + cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
> + cache, sizeof(struct acpi_pptt_cache));
> + return cache_v1->cache_id;
> + }
> + return 0;
> +}
> +
> /**
> * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
> * @table_hdr: Pointer to the head of the PPTT table
> @@ -104,7 +119,7 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> unsigned int *split_levels,
> struct acpi_subtable_header *res,
> struct acpi_pptt_cache **found,
> - unsigned int level, int type)
> + unsigned int level, int type, int cpu, int *index)
> {
> struct acpi_pptt_cache *cache;
>
> @@ -125,7 +140,7 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> acpi_pptt_match_type(cache->attributes, ACPI_PPTT_CACHE_TYPE_INSTR)))
> *split_levels = local_level;
>
> - if (local_level == level &&
> + if (level && local_level == level &&
> acpi_pptt_match_type(cache->attributes, type)) {
> if (*found != NULL && cache != *found)
> pr_warn("Found duplicate cache level/type unable to determine uniqueness\n");
> @@ -137,7 +152,9 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> * to verify that we don't find a duplicate
> * cache node.
> */
> - }
> + } else
> + acpi_fill_cacheinfo(cache, table_hdr, cpu, local_level, index);
> +
> cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
> }
> return local_level;
> @@ -147,7 +164,7 @@ static struct acpi_pptt_cache *
> acpi_find_cache_level(struct acpi_table_header *table_hdr,
> struct acpi_pptt_processor *cpu_node,
> unsigned int *starting_level, unsigned int *split_levels,
> - unsigned int level, int type)
> + unsigned int level, int type, int cpu, int *index)
> {
> struct acpi_subtable_header *res;
> unsigned int number_of_levels = *starting_level;
> @@ -161,7 +178,8 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr,
>
> local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
> split_levels, res, &ret,
> - level, type);
> + level, type, cpu, index);
> +
> /*
> * we are looking for the max depth. Since its potentially
> * possible for a given node to have resources with differing
> @@ -197,7 +215,7 @@ static void acpi_count_levels(struct acpi_table_header *table_hdr,
> unsigned int *levels, unsigned int *split_levels)
> {
> do {
> - acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0);
> + acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0, 0, NULL);
> cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> } while (cpu_node);
> }
> @@ -316,6 +334,7 @@ static u8 acpi_cache_type(enum cache_type type)
> }
>
> static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr,
> + int cpu,
> u32 acpi_cpu_id,
> enum cache_type type,
> unsigned int level,
> @@ -325,6 +344,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
> struct acpi_pptt_cache *found = NULL;
> struct acpi_pptt_processor *cpu_node;
> u8 acpi_type = acpi_cache_type(type);
> + int index = 0;
>
> pr_debug("Looking for CPU %d's level %u cache type %d\n",
> acpi_cpu_id, level, acpi_type);
> @@ -333,7 +353,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
>
> while (cpu_node && !found) {
> found = acpi_find_cache_level(table_hdr, cpu_node,
> - &total_levels, NULL, level, acpi_type);
> + &total_levels, NULL, level, acpi_type, cpu, &index);
> *node = cpu_node;
> cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> }
> @@ -406,8 +426,14 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
> * specified in PPTT.
> */
> if (this_leaf->type == CACHE_TYPE_NOCACHE &&
> - found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID)
> - this_leaf->type = CACHE_TYPE_UNIFIED;
> + found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) {
> + if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_DATA))
> + this_leaf->type = CACHE_TYPE_DATA;
> + if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_INSTR))
> + this_leaf->type = CACHE_TYPE_INST;
> + if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_UNIFIED))
> + this_leaf->type = CACHE_TYPE_UNIFIED;
> + }
>
> if (revision >= 3 && (found_cache->flags & ACPI_PPTT_CACHE_ID_VALID)) {
> found_cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
> @@ -417,19 +443,106 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
> }
> }
>
> +static bool cache_is_filled_id(struct acpi_pptt_cache *cache, int cpu)
> +{
> + u32 id = get_cache_id(cache);
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> + struct cacheinfo *this_leaf;
> + int index = 0;
> +
> + while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> + this_leaf = this_cpu_ci->info_list + index;
> + if (this_leaf->id == id)
> + return true;
> + index++;
> + }
> + return false;
> +}
> +
> +static bool cache_is_filled_content(struct acpi_pptt_cache *cache,
> + struct acpi_table_header *table,
> + int cpu, int level, u8 revision)
> +{
> + struct acpi_pptt_processor *cpu_node;
> + struct cacheinfo *this_leaf, *tleaf;
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> + struct cacheinfo tmp_leaf = {0};
> + int index = 0;
> +
> + cpu_node = acpi_find_processor_node(table, get_acpi_id_for_cpu(cpu));
> + tleaf = &tmp_leaf;
> + tleaf->level = level;
> +
> + while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> + this_leaf = this_cpu_ci->info_list + index;
> + update_cache_properties(tleaf, cache,
> + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)),
> + revision);
> + if (!memcmp(this_leaf, tleaf, sizeof(struct cacheinfo)))
> + return true;
> + index++;
> + }
> + return false;
> +}
> +
> +static bool cache_is_filled(struct acpi_pptt_cache *cache, struct acpi_table_header *table,
> + int cpu, int level)
> +{
> + u8 revision = table->revision;
> +
> + /*
> + * If revision >= 3, compare the cacheid directly,
> + * otherwise compare the entire contents of the cache.
> + */
> + if (revision >= 3)
> + return cache_is_filled_id(cache, cpu);
> + else
> + return cache_is_filled_content(cache, table, cpu, level, revision);
> +}
> +
> +void acpi_fill_cacheinfo(struct acpi_pptt_cache *cache,
> + struct acpi_table_header *table,
> + int cpu, int level, int *index)
> +{
> + struct cacheinfo *this_leaf;
> + struct acpi_pptt_processor *cpu_node;
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +
> + if (!index)
> + return;
> +
> + cpu_node = acpi_find_processor_node(table, get_acpi_id_for_cpu(cpu));
> + this_leaf = this_cpu_ci->info_list + *index;
> + if (this_leaf) {
> + this_leaf->level = level;
> + if (cache_is_filled(cache, table, cpu, level))
> + return;
> + update_cache_properties(this_leaf, cache,
> + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node,
> + table)),
> + table->revision);
> + *index += 1;
> + }
> +}
> +
> static void cache_setup_acpi_cpu(struct acpi_table_header *table,
> unsigned int cpu)
> {
> struct acpi_pptt_cache *found_cache;
> struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
> - struct cacheinfo *this_leaf;
> + struct cacheinfo *this_leaf = this_cpu_ci->info_list;
> unsigned int index = 0;
> struct acpi_pptt_processor *cpu_node = NULL;
>
> + if (!this_leaf->type && !this_leaf->level) {
> + acpi_find_cache_node(table, acpi_cpu_id, cpu, 0, 0, &cpu_node);
> + return;
> + }
> +
> while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> this_leaf = this_cpu_ci->info_list + index;
> - found_cache = acpi_find_cache_node(table, acpi_cpu_id,
> + found_cache = acpi_find_cache_node(table, acpi_cpu_id, cpu,
> this_leaf->type,
> this_leaf->level,
> &cpu_node);
> --
> 2.20.1
>

Thanks,
Yunhui

2024-04-10 01:30:18

by Jeremy Linton

[permalink] [raw]
Subject: Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Hi,


First thanks for working on this.

On 4/7/24 07:38, Yunhui Cui wrote:
> When the type and level information of this_leaf cannot be obtained
> from arch, cacheinfo is completely filled in with the content of PPTT.

I started reviewing this, based on what I understood to be the need to
generate the topology entirely from the PPTT. But, it was raising more
questions than answers because the PPTT is far too flexable in its
ability to represent cache hierachies that arn't logically useful. For
example multiple I or D caches at the same level, or I or D caches
higher in the topology than unified ones.

At least for arm64 (and I think others) there is an understood
simplification that there will be N levels of split I/D caches and M
unified levels. And from that, the number of cache leaves are computed
and allocated, and then we go in and largly skip PPTT cache nodes which
don't make sense in view of a generic topology like that. (see the
comment in cacheinfo.c:506)

Both of those pieces of information are available in
acpi_get_cache_info(). The missing part is marking those N levels of I/D
cache as such.

Looking at this code I don't really see all the error/allocation
logic/etc that assures the cache leaf indexing is allocated correctly
which worries me, although admidditly I could be missing something
important.

In summary, did you consider just allocating matching I/D caches from
the number of split levels in acpi_get_cache_info() then removing or
invalidating the ones that don't have matching PPTT entries after
running cache_setup_acpi()? Thats a fairly trivial change AFAIK if the
decision is based on the lack of a cache_id or just changing the
this_leaf->type = CACHE_TYPE_UNIFIED assignment to the correct type and
assuring left over CACHE_TYPE_NOCACHE entries are removed. I think much
of the "significant work" is likely fixed for that to work. Just
tweaking detect_cache_level()/get_cache_type() to set
CACHE_TYPE_SEPERATE if the level is less than the acpi_get_cache_info()
split_level value probably also does the majority of what you need
outside of having unequal counts of I and D caches.

There are probably other choices as well, thoughts?


Thanks,







>
> Signed-off-by: Yunhui Cui <[email protected]>
> ---
> drivers/acpi/pptt.c | 135 ++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 124 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
> index a35dd0e41c27..6c54fc8e3039 100644
> --- a/drivers/acpi/pptt.c
> +++ b/drivers/acpi/pptt.c
> @@ -21,6 +21,9 @@
> #include <linux/cacheinfo.h>
> #include <acpi/processor.h>
>
> +void acpi_fill_cacheinfo(struct acpi_pptt_cache *cache, struct acpi_table_header *table,
> + int cpu, int level, int *index);
> +
> static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr,
> u32 pptt_ref)
> {
> @@ -77,6 +80,18 @@ static inline bool acpi_pptt_match_type(int table_type, int type)
> table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type);
> }
>
> +static inline u32 get_cache_id(struct acpi_pptt_cache *cache)
> +{
> + struct acpi_pptt_cache_v1 *cache_v1;
> +
> + if (cache->flags & ACPI_PPTT_CACHE_ID_VALID) {
> + cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
> + cache, sizeof(struct acpi_pptt_cache));
> + return cache_v1->cache_id;
> + }
> + return 0;
> +}
> +
> /**
> * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache
> * @table_hdr: Pointer to the head of the PPTT table
> @@ -104,7 +119,7 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> unsigned int *split_levels,
> struct acpi_subtable_header *res,
> struct acpi_pptt_cache **found,
> - unsigned int level, int type)
> + unsigned int level, int type, int cpu, int *index)
> {
> struct acpi_pptt_cache *cache;
>
> @@ -125,7 +140,7 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> acpi_pptt_match_type(cache->attributes, ACPI_PPTT_CACHE_TYPE_INSTR)))
> *split_levels = local_level;
>
> - if (local_level == level &&
> + if (level && local_level == level &&
> acpi_pptt_match_type(cache->attributes, type)) {
> if (*found != NULL && cache != *found)
> pr_warn("Found duplicate cache level/type unable to determine uniqueness\n");
> @@ -137,7 +152,9 @@ static unsigned int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr,
> * to verify that we don't find a duplicate
> * cache node.
> */
> - }
> + } else
> + acpi_fill_cacheinfo(cache, table_hdr, cpu, local_level, index);
> +
> cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache);
> }
> return local_level;
> @@ -147,7 +164,7 @@ static struct acpi_pptt_cache *
> acpi_find_cache_level(struct acpi_table_header *table_hdr,
> struct acpi_pptt_processor *cpu_node,
> unsigned int *starting_level, unsigned int *split_levels,
> - unsigned int level, int type)
> + unsigned int level, int type, int cpu, int *index)
> {
> struct acpi_subtable_header *res;
> unsigned int number_of_levels = *starting_level;
> @@ -161,7 +178,8 @@ acpi_find_cache_level(struct acpi_table_header *table_hdr,
>
> local_level = acpi_pptt_walk_cache(table_hdr, *starting_level,
> split_levels, res, &ret,
> - level, type);
> + level, type, cpu, index);
> +
> /*
> * we are looking for the max depth. Since its potentially
> * possible for a given node to have resources with differing
> @@ -197,7 +215,7 @@ static void acpi_count_levels(struct acpi_table_header *table_hdr,
> unsigned int *levels, unsigned int *split_levels)
> {
> do {
> - acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0);
> + acpi_find_cache_level(table_hdr, cpu_node, levels, split_levels, 0, 0, 0, NULL);
> cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> } while (cpu_node);
> }
> @@ -316,6 +334,7 @@ static u8 acpi_cache_type(enum cache_type type)
> }
>
> static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr,
> + int cpu,
> u32 acpi_cpu_id,
> enum cache_type type,
> unsigned int level,
> @@ -325,6 +344,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
> struct acpi_pptt_cache *found = NULL;
> struct acpi_pptt_processor *cpu_node;
> u8 acpi_type = acpi_cache_type(type);
> + int index = 0;
>
> pr_debug("Looking for CPU %d's level %u cache type %d\n",
> acpi_cpu_id, level, acpi_type);
> @@ -333,7 +353,7 @@ static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *ta
>
> while (cpu_node && !found) {
> found = acpi_find_cache_level(table_hdr, cpu_node,
> - &total_levels, NULL, level, acpi_type);
> + &total_levels, NULL, level, acpi_type, cpu, &index);
> *node = cpu_node;
> cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent);
> }
> @@ -406,8 +426,14 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
> * specified in PPTT.
> */
> if (this_leaf->type == CACHE_TYPE_NOCACHE &&
> - found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID)
> - this_leaf->type = CACHE_TYPE_UNIFIED;
> + found_cache->flags & ACPI_PPTT_CACHE_TYPE_VALID) {
> + if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_DATA))
> + this_leaf->type = CACHE_TYPE_DATA;
> + if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_INSTR))
> + this_leaf->type = CACHE_TYPE_INST;
> + if (acpi_pptt_match_type(found_cache->attributes, ACPI_PPTT_CACHE_TYPE_UNIFIED))
> + this_leaf->type = CACHE_TYPE_UNIFIED;
> + }
>
> if (revision >= 3 && (found_cache->flags & ACPI_PPTT_CACHE_ID_VALID)) {
> found_cache_v1 = ACPI_ADD_PTR(struct acpi_pptt_cache_v1,
> @@ -417,19 +443,106 @@ static void update_cache_properties(struct cacheinfo *this_leaf,
> }
> }
>
> +static bool cache_is_filled_id(struct acpi_pptt_cache *cache, int cpu)
> +{
> + u32 id = get_cache_id(cache);
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> + struct cacheinfo *this_leaf;
> + int index = 0;
> +
> + while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> + this_leaf = this_cpu_ci->info_list + index;
> + if (this_leaf->id == id)
> + return true;
> + index++;
> + }
> + return false;
> +}
> +
> +static bool cache_is_filled_content(struct acpi_pptt_cache *cache,
> + struct acpi_table_header *table,
> + int cpu, int level, u8 revision)
> +{
> + struct acpi_pptt_processor *cpu_node;
> + struct cacheinfo *this_leaf, *tleaf;
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> + struct cacheinfo tmp_leaf = {0};
> + int index = 0;
> +
> + cpu_node = acpi_find_processor_node(table, get_acpi_id_for_cpu(cpu));
> + tleaf = &tmp_leaf;
> + tleaf->level = level;
> +
> + while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> + this_leaf = this_cpu_ci->info_list + index;
> + update_cache_properties(tleaf, cache,
> + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node, table)),
> + revision);
> + if (!memcmp(this_leaf, tleaf, sizeof(struct cacheinfo)))
> + return true;
> + index++;
> + }
> + return false;
> +}
> +
> +static bool cache_is_filled(struct acpi_pptt_cache *cache, struct acpi_table_header *table,
> + int cpu, int level)
> +{
> + u8 revision = table->revision;
> +
> + /*
> + * If revision >= 3, compare the cacheid directly,
> + * otherwise compare the entire contents of the cache.
> + */
> + if (revision >= 3)
> + return cache_is_filled_id(cache, cpu);
> + else
> + return cache_is_filled_content(cache, table, cpu, level, revision);
> +}
> +
> +void acpi_fill_cacheinfo(struct acpi_pptt_cache *cache,
> + struct acpi_table_header *table,
> + int cpu, int level, int *index)
> +{
> + struct cacheinfo *this_leaf;
> + struct acpi_pptt_processor *cpu_node;
> + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> +
> + if (!index)
> + return;
> +
> + cpu_node = acpi_find_processor_node(table, get_acpi_id_for_cpu(cpu));
> + this_leaf = this_cpu_ci->info_list + *index;
> + if (this_leaf) {
> + this_leaf->level = level;
> + if (cache_is_filled(cache, table, cpu, level))
> + return;
> + update_cache_properties(this_leaf, cache,
> + ACPI_TO_POINTER(ACPI_PTR_DIFF(cpu_node,
> + table)),
> + table->revision);
> + *index += 1;
> + }
> +}
> +
> static void cache_setup_acpi_cpu(struct acpi_table_header *table,
> unsigned int cpu)
> {
> struct acpi_pptt_cache *found_cache;
> struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
> - struct cacheinfo *this_leaf;
> + struct cacheinfo *this_leaf = this_cpu_ci->info_list;
> unsigned int index = 0;
> struct acpi_pptt_processor *cpu_node = NULL;
>
> + if (!this_leaf->type && !this_leaf->level) {
> + acpi_find_cache_node(table, acpi_cpu_id, cpu, 0, 0, &cpu_node);
> + return;
> + }
> +
> while (index < get_cpu_cacheinfo(cpu)->num_leaves) {
> this_leaf = this_cpu_ci->info_list + index;
> - found_cache = acpi_find_cache_node(table, acpi_cpu_id,
> + found_cache = acpi_find_cache_node(table, acpi_cpu_id, cpu,
> this_leaf->type,
> this_leaf->level,
> &cpu_node);


2024-04-11 02:28:56

by yunhui cui

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Hi Jeremy,

On Wed, Apr 10, 2024 at 9:30 AM Jeremy Linton <[email protected]> wrote:
>
> Hi,
>
>
> First thanks for working on this.
>
> On 4/7/24 07:38, Yunhui Cui wrote:
> > When the type and level information of this_leaf cannot be obtained
> > from arch, cacheinfo is completely filled in with the content of PPTT.
>
> I started reviewing this, based on what I understood to be the need to
> generate the topology entirely from the PPTT. But, it was raising more
> questions than answers because the PPTT is far too flexable in its
> ability to represent cache hierachies that arn't logically useful. For
> example multiple I or D caches at the same level, or I or D caches
> higher in the topology than unified ones.
>
> At least for arm64 (and I think others) there is an understood
> simplification that there will be N levels of split I/D caches and M
> unified levels. And from that, the number of cache leaves are computed
> and allocated, and then we go in and largly skip PPTT cache nodes which
> don't make sense in view of a generic topology like that. (see the
> comment in cacheinfo.c:506)
>
> Both of those pieces of information are available in
> acpi_get_cache_info(). The missing part is marking those N levels of I/D
> cache as such.
>
> Looking at this code I don't really see all the error/allocation
> logic/etc that assures the cache leaf indexing is allocated correctly
> which worries me, although admidditly I could be missing something
> important.
>
> In summary, did you consider just allocating matching I/D caches from
> the number of split levels in acpi_get_cache_info() then removing or
> invalidating the ones that don't have matching PPTT entries after
> running cache_setup_acpi()? Thats a fairly trivial change AFAIK if the
> decision is based on the lack of a cache_id or just changing the
> this_leaf->type = CACHE_TYPE_UNIFIED assignment to the correct type and
> assuring left over CACHE_TYPE_NOCACHE entries are removed. I think much
> of the "significant work" is likely fixed for that to work. Just
> tweaking detect_cache_level()/get_cache_type() to set
> CACHE_TYPE_SEPERATE if the level is less than the acpi_get_cache_info()
> split_level value probably also does the majority of what you need
> outside of having unequal counts of I and D caches.
>
> There are probably other choices as well, thoughts?
>

First, I think the current state of the ACPI PPTT specification meets
the requirements and is logically complete. Otherwise, the PPTT
specification needs to be updated, right?
Our discussion is best focused on the existing and usual case, even on
ARM64, which is as you say "N-level separated I/D cache, M-level
unified".

And then, the problem we have now is that the RISC-V architecture does
not have a set of registers to describe the cache level and type like
ARM64 does, so we need to fully trust the contents of the PPTT table.
Please check the patch:
https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/
.

Judging from this requirement, can you help review this patch? Thanks.


Thanks,
Yunhui

2024-04-11 09:30:22

by Sudeep Holla

[permalink] [raw]
Subject: Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

On Sun, Apr 07, 2024 at 08:38:28PM +0800, Yunhui Cui wrote:
> When the type and level information of this_leaf cannot be obtained
> from arch, cacheinfo is completely filled in with the content of PPTT.
>

Which platform is this ? Is this arm64 based or something else like RISC-V
based ? That would help to understand some context here.

--
Regards,
Sudeep

2024-04-12 02:10:27

by yunhui cui

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Hi Sudeep,

On Thu, Apr 11, 2024 at 5:30 PM Sudeep Holla <[email protected]> wrote:
>
> On Sun, Apr 07, 2024 at 08:38:28PM +0800, Yunhui Cui wrote:
> > When the type and level information of this_leaf cannot be obtained
> > from arch, cacheinfo is completely filled in with the content of PPTT.
> >
>
> Which platform is this ? Is this arm64 based or something else like RISC-V
> based ? That would help to understand some context here.
>

Thanks for your response, RISC-V currently has this requirement.
Please see: https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/

Thanks,
Yunhui

2024-04-12 09:29:15

by Sudeep Holla

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

On Fri, Apr 12, 2024 at 10:10:05AM +0800, yunhui cui wrote:
> Hi Sudeep,
>
> On Thu, Apr 11, 2024 at 5:30 PM Sudeep Holla <[email protected]> wrote:
> >
> > On Sun, Apr 07, 2024 at 08:38:28PM +0800, Yunhui Cui wrote:
> > > When the type and level information of this_leaf cannot be obtained
> > > from arch, cacheinfo is completely filled in with the content of PPTT.
> > >
> >
> > Which platform is this ? Is this arm64 based or something else like RISC-V
> > based ? That would help to understand some context here.
> >
>
> Thanks for your response, RISC-V currently has this requirement.
> Please see: https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/
>

It would be helpful for the review if you have such information in the
commit message.

--
Regards,
Sudeep

2024-04-12 09:42:44

by yunhui cui

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Hi Sudeep,

On Fri, Apr 12, 2024 at 5:29 PM Sudeep Holla <[email protected]> wrote:
>
> On Fri, Apr 12, 2024 at 10:10:05AM +0800, yunhui cui wrote:
> > Hi Sudeep,
> >
> > On Thu, Apr 11, 2024 at 5:30 PM Sudeep Holla <[email protected]> wrote:
> > >
> > > On Sun, Apr 07, 2024 at 08:38:28PM +0800, Yunhui Cui wrote:
> > > > When the type and level information of this_leaf cannot be obtained
> > > > from arch, cacheinfo is completely filled in with the content of PPTT.
> > > >
> > >
> > > Which platform is this ? Is this arm64 based or something else like RISC-V
> > > based ? That would help to understand some context here.
> > >
> >
> > Thanks for your response, RISC-V currently has this requirement.
> > Please see: https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/
> >
>
> It would be helpful for the review if you have such information in the
> commit message.

Okay, I will update it in the commit message in v2. Do you have some
comments on the content of the patch?

Thanks,
Yunhui

2024-04-12 09:49:04

by Sudeep Holla

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

On Fri, Apr 12, 2024 at 05:42:22PM +0800, yunhui cui wrote:
> Hi Sudeep,
>
> On Fri, Apr 12, 2024 at 5:29 PM Sudeep Holla <[email protected]> wrote:
> >
> > On Fri, Apr 12, 2024 at 10:10:05AM +0800, yunhui cui wrote:
> > > Hi Sudeep,
> > >
> > > On Thu, Apr 11, 2024 at 5:30 PM Sudeep Holla <[email protected]> wrote:
> > > >
> > > > On Sun, Apr 07, 2024 at 08:38:28PM +0800, Yunhui Cui wrote:
> > > > > When the type and level information of this_leaf cannot be obtained
> > > > > from arch, cacheinfo is completely filled in with the content of PPTT.
> > > > >
> > > >
> > > > Which platform is this ? Is this arm64 based or something else like RISC-V
> > > > based ? That would help to understand some context here.
> > > >
> > >
> > > Thanks for your response, RISC-V currently has this requirement.
> > > Please see: https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/
> > >
> >
> > It would be helpful for the review if you have such information in the
> > commit message.
>
> Okay, I will update it in the commit message in v2. Do you have some
> comments on the content of the patch?
>

I will take a look at the patch later today or early next week now that
I understand the requirement better. Sorry for the delay.

--
Regards,
Sudeep

2024-04-12 15:04:01

by Jeremy Linton

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Hi,


On 4/10/24 21:28, yunhui cui wrote:
> Hi Jeremy,
>
> On Wed, Apr 10, 2024 at 9:30 AM Jeremy Linton <[email protected]> wrote:
>>
>> Hi,
>>
>>
>> First thanks for working on this.
>>
>> On 4/7/24 07:38, Yunhui Cui wrote:
>>> When the type and level information of this_leaf cannot be obtained
>>> from arch, cacheinfo is completely filled in with the content of PPTT.
>>
>> I started reviewing this, based on what I understood to be the need to
>> generate the topology entirely from the PPTT. But, it was raising more
>> questions than answers because the PPTT is far too flexable in its
>> ability to represent cache hierachies that arn't logically useful. For
>> example multiple I or D caches at the same level, or I or D caches
>> higher in the topology than unified ones.
>>
>> At least for arm64 (and I think others) there is an understood
>> simplification that there will be N levels of split I/D caches and M
>> unified levels. And from that, the number of cache leaves are computed
>> and allocated, and then we go in and largly skip PPTT cache nodes which
>> don't make sense in view of a generic topology like that. (see the
>> comment in cacheinfo.c:506)
>>
>> Both of those pieces of information are available in
>> acpi_get_cache_info(). The missing part is marking those N levels of I/D
>> cache as such.
>>
>> Looking at this code I don't really see all the error/allocation
>> logic/etc that assures the cache leaf indexing is allocated correctly
>> which worries me, although admidditly I could be missing something
>> important.
>>
>> In summary, did you consider just allocating matching I/D caches from
>> the number of split levels in acpi_get_cache_info() then removing or
>> invalidating the ones that don't have matching PPTT entries after
>> running cache_setup_acpi()? Thats a fairly trivial change AFAIK if the
>> decision is based on the lack of a cache_id or just changing the
>> this_leaf->type = CACHE_TYPE_UNIFIED assignment to the correct type and
>> assuring left over CACHE_TYPE_NOCACHE entries are removed. I think much
>> of the "significant work" is likely fixed for that to work. Just
>> tweaking detect_cache_level()/get_cache_type() to set
>> CACHE_TYPE_SEPERATE if the level is less than the acpi_get_cache_info()
>> split_level value probably also does the majority of what you need
>> outside of having unequal counts of I and D caches.
>>
>> There are probably other choices as well, thoughts?
>>
>
> First, I think the current state of the ACPI PPTT specification meets
> the requirements and is logically complete. Otherwise, the PPTT
> specification needs to be updated, right?

The specification is capable of representing all the actual physical
topologies I'm aware of, its also capable of representing quite a number
of topologies that don't make sense to (cant be) build and are largely
nonsense. There are some further details around replacement and
allocation which could be represented but were AFAIK not considered
worth the effort because it makes the representation far more complex.

So, except for minor clarifications I'm not aware of a need to update it.

> Our discussion is best focused on the existing and usual case, even on
> ARM64, which is as you say "N-level separated I/D cache, M-level
> unified".

I understood this was for RISC-V when I reviewed it, but i'm fairly
certain the general cache topologies of RISC-V machines don't vary in
their core concepts and layout much from the wide variety of
possibilities available elsewhere. So its roughly the same problem, you
will likely have one or two layers of split I/D caches and then a number
of unified private or shared caches above that.


>
> And then, the problem we have now is that the RISC-V architecture does
> not have a set of registers to describe the cache level and type like
> ARM64 does, so we need to fully trust the contents of the PPTT table.
> Please check the patch:
> https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/

I don't think "fully trust" is something you really want, or is helpful
as I suggested above.

I suspect all that is needed is the construction of the missing I/D
layers, as the existing code will create the Unified layers above as it
does today. On Arm64 the unified system level caches aren't usually
described by the core cache registers either, which is why there is that
existing bit of code to change unknown caches to unified. But it also
turns out the pptt walking code is capable of providing the information
to create the I/D layers as well, if they are matched (ex there exists
an I cache for every D cache at a certain level).

So, more clarity about the kinds of topologies you expect is helpful to
understand where the current code is failing.

AKA, As I mentioned I'm fairly certain that its possible to construct
the entire cache topology from the PPTT by changing get_cache_type() to
return "CACHE_TYPE_SEPERATE" when the level is less than the value
returned as "split_level" from acpi_get_cache_info(). That tends to
ignore some edge cases, but I might put them in the "don't to that" bucket.


> .
>
> Judging from this requirement, can you help review this patch? Thanks.

If so, can you describe the edge case you care about that isn't solved
with a change like that?

>
>
> Thanks,
> Yunhui


2024-04-12 16:08:15

by Sudeep Holla

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

On Fri, Apr 12, 2024 at 10:03:14AM -0500, Jeremy Linton wrote:
> Hi,
> On 4/10/24 21:28, yunhui cui wrote:
> > Hi Jeremy,
> >
> > On Wed, Apr 10, 2024 at 9:30 AM Jeremy Linton <[email protected]> wrote:
> > >
> > > Hi,
> > > First thanks for working on this.
> > >
> > > On 4/7/24 07:38, Yunhui Cui wrote:
> > > > When the type and level information of this_leaf cannot be obtained
> > > > from arch, cacheinfo is completely filled in with the content of PPTT.
> > >
> > > I started reviewing this, based on what I understood to be the need to
> > > generate the topology entirely from the PPTT. But, it was raising more
> > > questions than answers because the PPTT is far too flexable in its
> > > ability to represent cache hierachies that arn't logically useful. For
> > > example multiple I or D caches at the same level, or I or D caches
> > > higher in the topology than unified ones.
> > >
> > > At least for arm64 (and I think others) there is an understood
> > > simplification that there will be N levels of split I/D caches and M
> > > unified levels. And from that, the number of cache leaves are computed
> > > and allocated, and then we go in and largly skip PPTT cache nodes which
> > > don't make sense in view of a generic topology like that. (see the
> > > comment in cacheinfo.c:506)
> > >
> > > Both of those pieces of information are available in
> > > acpi_get_cache_info(). The missing part is marking those N levels of I/D
> > > cache as such.
> > >
> > > Looking at this code I don't really see all the error/allocation
> > > logic/etc that assures the cache leaf indexing is allocated correctly
> > > which worries me, although admidditly I could be missing something
> > > important.
> > >
> > > In summary, did you consider just allocating matching I/D caches from
> > > the number of split levels in acpi_get_cache_info() then removing or
> > > invalidating the ones that don't have matching PPTT entries after
> > > running cache_setup_acpi()? Thats a fairly trivial change AFAIK if the
> > > decision is based on the lack of a cache_id or just changing the
> > > this_leaf->type = CACHE_TYPE_UNIFIED assignment to the correct type and
> > > assuring left over CACHE_TYPE_NOCACHE entries are removed. I think much
> > > of the "significant work" is likely fixed for that to work. Just
> > > tweaking detect_cache_level()/get_cache_type() to set
> > > CACHE_TYPE_SEPERATE if the level is less than the acpi_get_cache_info()
> > > split_level value probably also does the majority of what you need
> > > outside of having unequal counts of I and D caches.
> > >
> > > There are probably other choices as well, thoughts?
> > >
> >
> > First, I think the current state of the ACPI PPTT specification meets
> > the requirements and is logically complete. Otherwise, the PPTT
> > specification needs to be updated, right?
>
> The specification is capable of representing all the actual physical
> topologies I'm aware of, its also capable of representing quite a number of
> topologies that don't make sense to (cant be) build and are largely
> nonsense. There are some further details around replacement and allocation
> which could be represented but were AFAIK not considered worth the effort
> because it makes the representation far more complex.
>
> So, except for minor clarifications I'm not aware of a need to update it.
>
> > Our discussion is best focused on the existing and usual case, even on
> > ARM64, which is as you say "N-level separated I/D cache, M-level
> > unified".
>
> I understood this was for RISC-V when I reviewed it, but i'm fairly certain
> the general cache topologies of RISC-V machines don't vary in their core
> concepts and layout much from the wide variety of possibilities available
> elsewhere. So its roughly the same problem, you will likely have one or two
> layers of split I/D caches and then a number of unified private or shared
> caches above that.
>
>
> >
> > And then, the problem we have now is that the RISC-V architecture does
> > not have a set of registers to describe the cache level and type like
> > ARM64 does, so we need to fully trust the contents of the PPTT table.
> > Please check the patch:
> > https://patchwork.kernel.org/project/linux-riscv/patch/[email protected]/
>
> I don't think "fully trust" is something you really want, or is helpful as I
> suggested above.
>
> I suspect all that is needed is the construction of the missing I/D layers,
> as the existing code will create the Unified layers above as it does today.
> On Arm64 the unified system level caches aren't usually described by the
> core cache registers either, which is why there is that existing bit of code
> to change unknown caches to unified. But it also turns out the pptt walking
> code is capable of providing the information to create the I/D layers as
> well, if they are matched (ex there exists an I cache for every D cache at a
> certain level).
>
> So, more clarity about the kinds of topologies you expect is helpful to
> understand where the current code is failing.
>
> AKA, As I mentioned I'm fairly certain that its possible to construct the
> entire cache topology from the PPTT by changing get_cache_type() to return
> "CACHE_TYPE_SEPERATE" when the level is less than the value returned as
> "split_level" from acpi_get_cache_info(). That tends to ignore some edge
> cases, but I might put them in the "don't to that" bucket.
>
>

I agree with Jeremy's analysis. Can you check if something like below
will address your issue. Not compile tested but you get an idea of what
we are trying to do here.

Regards,
Sudeep

-->8
diff --git i/arch/riscv/kernel/cacheinfo.c w/arch/riscv/kernel/cacheinfo.c
index 09e9b88110d1..92ab73ed5234 100644
--- i/arch/riscv/kernel/cacheinfo.c
+++ w/arch/riscv/kernel/cacheinfo.c
@@ -79,6 +79,27 @@ int populate_cache_leaves(unsigned int cpu)
struct device_node *prev = NULL;
int levels = 1, level = 1;

+ if (!acpi_disabled) {
+ int ret, fw_levels, split_levels;
+
+ ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels);
+ if (ret)
+ return ret;
+
+ /* must be set, so we can drop num_leaves assignment below */
+ this_cpu_ci->num_leaves = fw_levels + split_levels;
+
+ for (idx = 0; level <= this_cpu_ci->num_levels &&
+ idx < this_cpu_ci->num_leaves; idx++, level++) {
+ if (level <= split_levels) {
+ ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
+ ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
+ } else {
+ ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
+ }
+ }
+ }
+
if (of_property_read_bool(np, "cache-size"))
ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
if (of_property_read_bool(np, "i-cache-size"))


2024-04-12 16:28:33

by Sudeep Holla

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

On Fri, Apr 12, 2024 at 05:08:01PM +0100, Sudeep Holla wrote:
> diff --git i/arch/riscv/kernel/cacheinfo.c w/arch/riscv/kernel/cacheinfo.c
> index 09e9b88110d1..92ab73ed5234 100644
> --- i/arch/riscv/kernel/cacheinfo.c
> +++ w/arch/riscv/kernel/cacheinfo.c
> @@ -79,6 +79,27 @@ int populate_cache_leaves(unsigned int cpu)
> struct device_node *prev = NULL;
> int levels = 1, level = 1;
>
> + if (!acpi_disabled) {
> + int ret, fw_levels, split_levels;
> +
> + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels);
> + if (ret)
> + return ret;
> +
> + /* must be set, so we can drop num_leaves assignment below */
> + this_cpu_ci->num_leaves = fw_levels + split_levels;
> +
> + for (idx = 0; level <= this_cpu_ci->num_levels &&
> + idx < this_cpu_ci->num_leaves; idx++, level++) {
> + if (level <= split_levels) {
> + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
> + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
> + } else {
> + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
> + }
> + }

Ofcourse we need to add here,
return 0;

> + }
> +
> if (of_property_read_bool(np, "cache-size"))
> ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
> if (of_property_read_bool(np, "i-cache-size"))
>

--
Regards,
Sudeep

2024-04-14 02:40:56

by yunhui cui

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 1/2] ACPI: PPTT: Populate cacheinfo entirely with PPTT

Hi Sudeep,Jeremy,

On Sat, Apr 13, 2024 at 12:27 AM Sudeep Holla <[email protected]> wrote:
>
> On Fri, Apr 12, 2024 at 05:08:01PM +0100, Sudeep Holla wrote:
> > diff --git i/arch/riscv/kernel/cacheinfo.c w/arch/riscv/kernel/cacheinfo.c
> > index 09e9b88110d1..92ab73ed5234 100644
> > --- i/arch/riscv/kernel/cacheinfo.c
> > +++ w/arch/riscv/kernel/cacheinfo.c
> > @@ -79,6 +79,27 @@ int populate_cache_leaves(unsigned int cpu)
> > struct device_node *prev = NULL;
> > int levels = 1, level = 1;
> >
> > + if (!acpi_disabled) {
> > + int ret, fw_levels, split_levels;
> > +
> > + ret = acpi_get_cache_info(cpu, &fw_levels, &split_levels);
> > + if (ret)
> > + return ret;
> > +
> > + /* must be set, so we can drop num_leaves assignment below */
> > + this_cpu_ci->num_leaves = fw_levels + split_levels;
> > +
> > + for (idx = 0; level <= this_cpu_ci->num_levels &&
> > + idx < this_cpu_ci->num_leaves; idx++, level++) {
> > + if (level <= split_levels) {
> > + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level);
> > + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level);
> > + } else {
> > + ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
> > + }
> > + }
>
> Ofcourse we need to add here,
> return 0;
>
> > + }
> > +
> > if (of_property_read_bool(np, "cache-size"))
> > ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
> > if (of_property_read_bool(np, "i-cache-size"))
> >
>
> --
> Regards,
> Sudeep

With this modification, I tested that I can obtain cacheinfo
correctly. If RISC-V later adds a new register describing the cache,
then we can continue to improve it in populate_cache_leaves().
Thank you for your comments and I will add you as Suggested-by in v2.

Thanks,
Yunhui