2023-07-21 02:03:54

by Huang, Ying

[permalink] [raw]
Subject: [PATCH RESEND 2/4] acpi, hmat: refactor hmat_register_target_initiators()

Previously, in hmat_register_target_initiators(), the performance
attributes are calculated and the corresponding sysfs links and files
are created too. Which is called during memory onlining.

But now, to calculate the abstract distance of a memory target before
memory onlining, we need to calculate the performance attributes for
a memory target without creating sysfs links and files.

To do that, hmat_register_target_initiators() is refactored to make it
possible to calculate performance attributes separately.

Signed-off-by: "Huang, Ying" <[email protected]>
Cc: Aneesh Kumar K.V <[email protected]>
Cc: Wei Xu <[email protected]>
Cc: Alistair Popple <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Jonathan Cameron <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Yang Shi <[email protected]>
Cc: Rafael J Wysocki <[email protected]>
---
drivers/acpi/numa/hmat.c | 81 +++++++++++++++-------------------------
1 file changed, 30 insertions(+), 51 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index bba268ecd802..2dee0098f1a9 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -582,28 +582,25 @@ static int initiators_to_nodemask(unsigned long *p_nodes)
return 0;
}

-static void hmat_register_target_initiators(struct memory_target *target)
+static void hmat_update_target_attrs(struct memory_target *target,
+ unsigned long *p_nodes, int access)
{
- static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
struct memory_initiator *initiator;
- unsigned int mem_nid, cpu_nid;
+ unsigned int cpu_nid;
struct memory_locality *loc = NULL;
u32 best = 0;
- bool access0done = false;
int i;

- mem_nid = pxm_to_node(target->memory_pxm);
+ bitmap_zero(p_nodes, MAX_NUMNODES);
/*
- * If the Address Range Structure provides a local processor pxm, link
+ * If the Address Range Structure provides a local processor pxm, set
* only that one. Otherwise, find the best performance attributes and
- * register all initiators that match.
+ * collect all initiators that match.
*/
if (target->processor_pxm != PXM_INVAL) {
cpu_nid = pxm_to_node(target->processor_pxm);
- register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
- access0done = true;
- if (node_state(cpu_nid, N_CPU)) {
- register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
+ if (access == 0 || node_state(cpu_nid, N_CPU)) {
+ set_bit(target->processor_pxm, p_nodes);
return;
}
}
@@ -617,47 +614,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
* We'll also use the sorting to prime the candidate nodes with known
* initiators.
*/
- bitmap_zero(p_nodes, MAX_NUMNODES);
list_sort(NULL, &initiators, initiator_cmp);
if (initiators_to_nodemask(p_nodes) < 0)
return;

- if (!access0done) {
- for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
- loc = localities_types[i];
- if (!loc)
- continue;
-
- best = 0;
- list_for_each_entry(initiator, &initiators, node) {
- u32 value;
-
- if (!test_bit(initiator->processor_pxm, p_nodes))
- continue;
-
- value = hmat_initiator_perf(target, initiator,
- loc->hmat_loc);
- if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
- bitmap_clear(p_nodes, 0, initiator->processor_pxm);
- if (value != best)
- clear_bit(initiator->processor_pxm, p_nodes);
- }
- if (best)
- hmat_update_target_access(target, loc->hmat_loc->data_type,
- best, 0);
- }
-
- for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
- cpu_nid = pxm_to_node(i);
- register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
- }
- }
-
- /* Access 1 ignores Generic Initiators */
- bitmap_zero(p_nodes, MAX_NUMNODES);
- if (initiators_to_nodemask(p_nodes) < 0)
- return;
-
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
loc = localities_types[i];
if (!loc)
@@ -667,7 +627,7 @@ static void hmat_register_target_initiators(struct memory_target *target)
list_for_each_entry(initiator, &initiators, node) {
u32 value;

- if (!initiator->has_cpu) {
+ if (access == 1 && !initiator->has_cpu) {
clear_bit(initiator->processor_pxm, p_nodes);
continue;
}
@@ -681,14 +641,33 @@ static void hmat_register_target_initiators(struct memory_target *target)
clear_bit(initiator->processor_pxm, p_nodes);
}
if (best)
- hmat_update_target_access(target, loc->hmat_loc->data_type, best, 1);
+ hmat_update_target_access(target, loc->hmat_loc->data_type, best, access);
}
+}
+
+static void __hmat_register_target_initiators(struct memory_target *target,
+ unsigned long *p_nodes,
+ int access)
+{
+ unsigned int mem_nid, cpu_nid;
+ int i;
+
+ mem_nid = pxm_to_node(target->memory_pxm);
+ hmat_update_target_attrs(target, p_nodes, access);
for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
cpu_nid = pxm_to_node(i);
- register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
+ register_memory_node_under_compute_node(mem_nid, cpu_nid, access);
}
}

+static void hmat_register_target_initiators(struct memory_target *target)
+{
+ static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
+
+ __hmat_register_target_initiators(target, p_nodes, 0);
+ __hmat_register_target_initiators(target, p_nodes, 1);
+}
+
static void hmat_register_target_cache(struct memory_target *target)
{
unsigned mem_nid = pxm_to_node(target->memory_pxm);
--
2.39.2



2023-07-25 03:27:41

by Alistair Popple

[permalink] [raw]
Subject: Re: [PATCH RESEND 2/4] acpi, hmat: refactor hmat_register_target_initiators()


Huang Ying <[email protected]> writes:

> Previously, in hmat_register_target_initiators(), the performance
> attributes are calculated and the corresponding sysfs links and files
> are created too. Which is called during memory onlining.
>
> But now, to calculate the abstract distance of a memory target before
> memory onlining, we need to calculate the performance attributes for
> a memory target without creating sysfs links and files.
>
> To do that, hmat_register_target_initiators() is refactored to make it
> possible to calculate performance attributes separately.

The refactor looks good and I have run the whole series on a system with
some hmat data so:

Reviewed-by: Alistair Popple <[email protected]>
Tested-by: Alistair Popple <[email protected]>

> Signed-off-by: "Huang, Ying" <[email protected]>
> Cc: Aneesh Kumar K.V <[email protected]>
> Cc: Wei Xu <[email protected]>
> Cc: Alistair Popple <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: Davidlohr Bueso <[email protected]>
> Cc: Johannes Weiner <[email protected]>
> Cc: Jonathan Cameron <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Yang Shi <[email protected]>
> Cc: Rafael J Wysocki <[email protected]>
> ---
> drivers/acpi/numa/hmat.c | 81 +++++++++++++++-------------------------
> 1 file changed, 30 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> index bba268ecd802..2dee0098f1a9 100644
> --- a/drivers/acpi/numa/hmat.c
> +++ b/drivers/acpi/numa/hmat.c
> @@ -582,28 +582,25 @@ static int initiators_to_nodemask(unsigned long *p_nodes)
> return 0;
> }
>
> -static void hmat_register_target_initiators(struct memory_target *target)
> +static void hmat_update_target_attrs(struct memory_target *target,
> + unsigned long *p_nodes, int access)
> {
> - static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
> struct memory_initiator *initiator;
> - unsigned int mem_nid, cpu_nid;
> + unsigned int cpu_nid;
> struct memory_locality *loc = NULL;
> u32 best = 0;
> - bool access0done = false;
> int i;
>
> - mem_nid = pxm_to_node(target->memory_pxm);
> + bitmap_zero(p_nodes, MAX_NUMNODES);
> /*
> - * If the Address Range Structure provides a local processor pxm, link
> + * If the Address Range Structure provides a local processor pxm, set
> * only that one. Otherwise, find the best performance attributes and
> - * register all initiators that match.
> + * collect all initiators that match.
> */
> if (target->processor_pxm != PXM_INVAL) {
> cpu_nid = pxm_to_node(target->processor_pxm);
> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
> - access0done = true;
> - if (node_state(cpu_nid, N_CPU)) {
> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
> + if (access == 0 || node_state(cpu_nid, N_CPU)) {
> + set_bit(target->processor_pxm, p_nodes);
> return;
> }
> }
> @@ -617,47 +614,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
> * We'll also use the sorting to prime the candidate nodes with known
> * initiators.
> */
> - bitmap_zero(p_nodes, MAX_NUMNODES);
> list_sort(NULL, &initiators, initiator_cmp);
> if (initiators_to_nodemask(p_nodes) < 0)
> return;
>
> - if (!access0done) {
> - for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
> - loc = localities_types[i];
> - if (!loc)
> - continue;
> -
> - best = 0;
> - list_for_each_entry(initiator, &initiators, node) {
> - u32 value;
> -
> - if (!test_bit(initiator->processor_pxm, p_nodes))
> - continue;
> -
> - value = hmat_initiator_perf(target, initiator,
> - loc->hmat_loc);
> - if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
> - bitmap_clear(p_nodes, 0, initiator->processor_pxm);
> - if (value != best)
> - clear_bit(initiator->processor_pxm, p_nodes);
> - }
> - if (best)
> - hmat_update_target_access(target, loc->hmat_loc->data_type,
> - best, 0);
> - }
> -
> - for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
> - cpu_nid = pxm_to_node(i);
> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
> - }
> - }
> -
> - /* Access 1 ignores Generic Initiators */
> - bitmap_zero(p_nodes, MAX_NUMNODES);
> - if (initiators_to_nodemask(p_nodes) < 0)
> - return;
> -
> for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
> loc = localities_types[i];
> if (!loc)
> @@ -667,7 +627,7 @@ static void hmat_register_target_initiators(struct memory_target *target)
> list_for_each_entry(initiator, &initiators, node) {
> u32 value;
>
> - if (!initiator->has_cpu) {
> + if (access == 1 && !initiator->has_cpu) {
> clear_bit(initiator->processor_pxm, p_nodes);
> continue;
> }
> @@ -681,14 +641,33 @@ static void hmat_register_target_initiators(struct memory_target *target)
> clear_bit(initiator->processor_pxm, p_nodes);
> }
> if (best)
> - hmat_update_target_access(target, loc->hmat_loc->data_type, best, 1);
> + hmat_update_target_access(target, loc->hmat_loc->data_type, best, access);
> }
> +}
> +
> +static void __hmat_register_target_initiators(struct memory_target *target,
> + unsigned long *p_nodes,
> + int access)
> +{
> + unsigned int mem_nid, cpu_nid;
> + int i;
> +
> + mem_nid = pxm_to_node(target->memory_pxm);
> + hmat_update_target_attrs(target, p_nodes, access);
> for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
> cpu_nid = pxm_to_node(i);
> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
> + register_memory_node_under_compute_node(mem_nid, cpu_nid, access);
> }
> }
>
> +static void hmat_register_target_initiators(struct memory_target *target)
> +{
> + static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
> +
> + __hmat_register_target_initiators(target, p_nodes, 0);
> + __hmat_register_target_initiators(target, p_nodes, 1);
> +}
> +
> static void hmat_register_target_cache(struct memory_target *target)
> {
> unsigned mem_nid = pxm_to_node(target->memory_pxm);


2023-08-11 02:38:17

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH RESEND 2/4] acpi, hmat: refactor hmat_register_target_initiators()

Hi, Jonathan,

Thanks for review!

Jonathan Cameron <[email protected]> writes:

> On Fri, 21 Jul 2023 09:29:30 +0800
> Huang Ying <[email protected]> wrote:
>
>> Previously, in hmat_register_target_initiators(), the performance
>> attributes are calculated and the corresponding sysfs links and files
>> are created too. Which is called during memory onlining.
>>
>> But now, to calculate the abstract distance of a memory target before
>> memory onlining, we need to calculate the performance attributes for
>> a memory target without creating sysfs links and files.
>>
>> To do that, hmat_register_target_initiators() is refactored to make it
>> possible to calculate performance attributes separately.
>>
>> Signed-off-by: "Huang, Ying" <[email protected]>
>> Cc: Aneesh Kumar K.V <[email protected]>
>> Cc: Wei Xu <[email protected]>
>> Cc: Alistair Popple <[email protected]>
>> Cc: Dan Williams <[email protected]>
>> Cc: Dave Hansen <[email protected]>
>> Cc: Davidlohr Bueso <[email protected]>
>> Cc: Johannes Weiner <[email protected]>
>> Cc: Jonathan Cameron <[email protected]>
>> Cc: Michal Hocko <[email protected]>
>> Cc: Yang Shi <[email protected]>
>> Cc: Rafael J Wysocki <[email protected]>
>
> Unfortunately I don't think I still have the tables I used to test the
> generic initiator and won't get time to generate them all again in
> next few weeks. So just a superficial review for now.
> I 'think' the cleanup looks good but the original code was rather fiddly
> so I'm not 100% sure nothing is missed.
>
> One comment inline on the fact the list is now sorted twice.
>
>
>> ---
>> drivers/acpi/numa/hmat.c | 81 +++++++++++++++-------------------------
>> 1 file changed, 30 insertions(+), 51 deletions(-)
>>
>> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
>> index bba268ecd802..2dee0098f1a9 100644
>> --- a/drivers/acpi/numa/hmat.c
>> +++ b/drivers/acpi/numa/hmat.c
>> @@ -582,28 +582,25 @@ static int initiators_to_nodemask(unsigned long *p_nodes)
>> return 0;
>> }
>>
>> -static void hmat_register_target_initiators(struct memory_target *target)
>> +static void hmat_update_target_attrs(struct memory_target *target,
>> + unsigned long *p_nodes, int access)
>> {
>> - static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
>> struct memory_initiator *initiator;
>> - unsigned int mem_nid, cpu_nid;
>> + unsigned int cpu_nid;
>> struct memory_locality *loc = NULL;
>> u32 best = 0;
>> - bool access0done = false;
>> int i;
>>
>> - mem_nid = pxm_to_node(target->memory_pxm);
>> + bitmap_zero(p_nodes, MAX_NUMNODES);
>> /*
>> - * If the Address Range Structure provides a local processor pxm, link
>> + * If the Address Range Structure provides a local processor pxm, set
>> * only that one. Otherwise, find the best performance attributes and
>> - * register all initiators that match.
>> + * collect all initiators that match.
>> */
>> if (target->processor_pxm != PXM_INVAL) {
>> cpu_nid = pxm_to_node(target->processor_pxm);
>> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
>> - access0done = true;
>> - if (node_state(cpu_nid, N_CPU)) {
>> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
>> + if (access == 0 || node_state(cpu_nid, N_CPU)) {
>> + set_bit(target->processor_pxm, p_nodes);
>> return;
>> }
>> }
>> @@ -617,47 +614,10 @@ static void hmat_register_target_initiators(struct memory_target *target)
>> * We'll also use the sorting to prime the candidate nodes with known
>> * initiators.
>> */
>> - bitmap_zero(p_nodes, MAX_NUMNODES);
>> list_sort(NULL, &initiators, initiator_cmp);
>> if (initiators_to_nodemask(p_nodes) < 0)
>> return;
>
> One result of this refactor is that a few things run twice, that previously only ran once
> like this list_sort()
> Not necessarily a problem though as probably fairly cheap.

Yes. The original code sorts once for each target. But it appears that
it's unnecessary too. We can sort the initiators list when adding new
item to it in alloc_memory_initiator(). If necessary, I can add an
additional patch to do that. But as you said, it may be unnecessary
because the sort should be fairly cheap.

--
Best Regards,
Huang, Ying

>>
>> - if (!access0done) {
>> - for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
>> - loc = localities_types[i];
>> - if (!loc)
>> - continue;
>> -
>> - best = 0;
>> - list_for_each_entry(initiator, &initiators, node) {
>> - u32 value;
>> -
>> - if (!test_bit(initiator->processor_pxm, p_nodes))
>> - continue;
>> -
>> - value = hmat_initiator_perf(target, initiator,
>> - loc->hmat_loc);
>> - if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
>> - bitmap_clear(p_nodes, 0, initiator->processor_pxm);
>> - if (value != best)
>> - clear_bit(initiator->processor_pxm, p_nodes);
>> - }
>> - if (best)
>> - hmat_update_target_access(target, loc->hmat_loc->data_type,
>> - best, 0);
>> - }
>> -
>> - for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
>> - cpu_nid = pxm_to_node(i);
>> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
>> - }
>> - }
>> -
>> - /* Access 1 ignores Generic Initiators */
>> - bitmap_zero(p_nodes, MAX_NUMNODES);
>> - if (initiators_to_nodemask(p_nodes) < 0)
>> - return;
>> -
>> for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
>> loc = localities_types[i];
>> if (!loc)
>> @@ -667,7 +627,7 @@ static void hmat_register_target_initiators(struct memory_target *target)
>> list_for_each_entry(initiator, &initiators, node) {
>> u32 value;
>>
>> - if (!initiator->has_cpu) {
>> + if (access == 1 && !initiator->has_cpu) {
>> clear_bit(initiator->processor_pxm, p_nodes);
>> continue;
>> }
>> @@ -681,14 +641,33 @@ static void hmat_register_target_initiators(struct memory_target *target)
>> clear_bit(initiator->processor_pxm, p_nodes);
>> }
>> if (best)
>> - hmat_update_target_access(target, loc->hmat_loc->data_type, best, 1);
>> + hmat_update_target_access(target, loc->hmat_loc->data_type, best, access);
>> }
>> +}
>> +
>> +static void __hmat_register_target_initiators(struct memory_target *target,
>> + unsigned long *p_nodes,
>> + int access)
>> +{
>> + unsigned int mem_nid, cpu_nid;
>> + int i;
>> +
>> + mem_nid = pxm_to_node(target->memory_pxm);
>> + hmat_update_target_attrs(target, p_nodes, access);
>> for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
>> cpu_nid = pxm_to_node(i);
>> - register_memory_node_under_compute_node(mem_nid, cpu_nid, 1);
>> + register_memory_node_under_compute_node(mem_nid, cpu_nid, access);
>> }
>> }
>>
>> +static void hmat_register_target_initiators(struct memory_target *target)
>> +{
>> + static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
>> +
>> + __hmat_register_target_initiators(target, p_nodes, 0);
>> + __hmat_register_target_initiators(target, p_nodes, 1);
>> +}
>> +
>> static void hmat_register_target_cache(struct memory_target *target)
>> {
>> unsigned mem_nid = pxm_to_node(target->memory_pxm);