2020-02-21 05:19:44

by Gautham R Shenoy

[permalink] [raw]
Subject: [PATCH v2 4/5] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

From: "Gautham R. Shenoy" <[email protected]>

On Pseries LPARs, to calculate utilization, we need to know the
[S]PURR ticks when the CPUs were busy or idle.

The total PURR and SPURR ticks are already exposed via the per-cpu
sysfs files "purr" and "spurr". This patch adds support for exposing
the idle PURR and SPURR ticks via new per-cpu sysfs files named
"idle_purr" and "idle_spurr".

Signed-off-by: Gautham R. Shenoy <[email protected]>
---
arch/powerpc/kernel/sysfs.c | 54 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676d..5b4b450 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/idle.h>
#include <asm/svm.h>

#include "cacheinfo.h"
@@ -733,6 +734,42 @@ static void create_svm_file(void)
}
#endif /* CONFIG_PPC_SVM */

+static void read_idle_purr(void *val)
+{
+ u64 *ret = (u64 *)val;
+
+ *ret = read_this_idle_purr();
+}
+
+static ssize_t idle_purr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ u64 val;
+
+ smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
+ return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
+
+static void read_idle_spurr(void *val)
+{
+ u64 *ret = (u64 *)val;
+
+ *ret = read_this_idle_spurr();
+}
+
+static ssize_t idle_spurr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ u64 val;
+
+ smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
+ return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -794,10 +831,15 @@ static int register_cpu_online(unsigned int cpu)
if (!firmware_has_feature(FW_FEATURE_LPAR))
add_write_permission_dev_attr(&dev_attr_purr);
device_create_file(s, &dev_attr_purr);
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_idle_purr);
}

- if (cpu_has_feature(CPU_FTR_SPURR))
+ if (cpu_has_feature(CPU_FTR_SPURR)) {
device_create_file(s, &dev_attr_spurr);
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_idle_spurr);
+ }

if (cpu_has_feature(CPU_FTR_DSCR))
device_create_file(s, &dev_attr_dscr);
@@ -879,11 +921,17 @@ static int unregister_cpu_online(unsigned int cpu)
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, &dev_attr_mmcra);

- if (cpu_has_feature(CPU_FTR_PURR))
+ if (cpu_has_feature(CPU_FTR_PURR)) {
device_remove_file(s, &dev_attr_purr);
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_idle_purr);
+ }

- if (cpu_has_feature(CPU_FTR_SPURR))
+ if (cpu_has_feature(CPU_FTR_SPURR)) {
device_remove_file(s, &dev_attr_spurr);
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_idle_spurr);
+ }

if (cpu_has_feature(CPU_FTR_DSCR))
device_remove_file(s, &dev_attr_dscr);
--
1.9.4


2020-02-21 16:50:47

by Nathan Lynch

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

"Gautham R. Shenoy" <[email protected]> writes:
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 80a676d..5b4b450 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -19,6 +19,7 @@
> #include <asm/smp.h>
> #include <asm/pmc.h>
> #include <asm/firmware.h>
> +#include <asm/idle.h>
> #include <asm/svm.h>
>
> #include "cacheinfo.h"
> @@ -733,6 +734,42 @@ static void create_svm_file(void)
> }
> #endif /* CONFIG_PPC_SVM */
>
> +static void read_idle_purr(void *val)
> +{
> + u64 *ret = (u64 *)val;

No cast from void* needed.


> +
> + *ret = read_this_idle_purr();
> +}
> +
> +static ssize_t idle_purr_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct cpu *cpu = container_of(dev, struct cpu, dev);
> + u64 val;
> +
> + smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
> + return sprintf(buf, "%llx\n", val);
> +}
> +static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
> +
> +static void read_idle_spurr(void *val)
> +{
> + u64 *ret = (u64 *)val;
> +
> + *ret = read_this_idle_spurr();
> +}
> +
> +static ssize_t idle_spurr_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct cpu *cpu = container_of(dev, struct cpu, dev);
> + u64 val;
> +
> + smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
> + return sprintf(buf, "%llx\n", val);
> +}
> +static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);

It's regrettable that we have to wake up potentially idle CPUs in order
to derive correct idle statistics for them, but I suppose the main user
(lparstat) of these interfaces already is causing this to happen by
polling the existing per-cpu purr and spurr attributes.

So now lparstat will incur at minimum four syscalls and four IPIs per
CPU per polling interval -- one for each of purr, spurr, idle_purr and
idle_spurr. Correct?

At some point it's going to make sense to batch sampling of remote CPUs'
SPRs.


> static int register_cpu_online(unsigned int cpu)
> {
> struct cpu *c = &per_cpu(cpu_devices, cpu);
> @@ -794,10 +831,15 @@ static int register_cpu_online(unsigned int cpu)
> if (!firmware_has_feature(FW_FEATURE_LPAR))
> add_write_permission_dev_attr(&dev_attr_purr);
> device_create_file(s, &dev_attr_purr);
> + if (firmware_has_feature(FW_FEATURE_LPAR))
> + device_create_file(s, &dev_attr_idle_purr);
> }
>
> - if (cpu_has_feature(CPU_FTR_SPURR))
> + if (cpu_has_feature(CPU_FTR_SPURR)) {
> device_create_file(s, &dev_attr_spurr);
> + if (firmware_has_feature(FW_FEATURE_LPAR))
> + device_create_file(s, &dev_attr_idle_spurr);
> + }
>
> if (cpu_has_feature(CPU_FTR_DSCR))
> device_create_file(s, &dev_attr_dscr);
> @@ -879,11 +921,17 @@ static int unregister_cpu_online(unsigned int cpu)
> if (cpu_has_feature(CPU_FTR_MMCRA))
> device_remove_file(s, &dev_attr_mmcra);
>
> - if (cpu_has_feature(CPU_FTR_PURR))
> + if (cpu_has_feature(CPU_FTR_PURR)) {
> device_remove_file(s, &dev_attr_purr);
> + if (firmware_has_feature(FW_FEATURE_LPAR))
> + device_remove_file(s, &dev_attr_idle_purr);
> + }
>
> - if (cpu_has_feature(CPU_FTR_SPURR))
> + if (cpu_has_feature(CPU_FTR_SPURR)) {
> device_remove_file(s, &dev_attr_spurr);
> + if (firmware_has_feature(FW_FEATURE_LPAR))
> + device_remove_file(s, &dev_attr_idle_spurr);
> + }
>
> if (cpu_has_feature(CPU_FTR_DSCR))
> device_remove_file(s, &dev_attr_dscr);

The cpu register/unregister stuff here looks correct.

2020-02-24 05:16:47

by Gautham R Shenoy

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

On Fri, Feb 21, 2020 at 10:50:12AM -0600, Nathan Lynch wrote:
> "Gautham R. Shenoy" <[email protected]> writes:
> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > index 80a676d..5b4b450 100644
> > --- a/arch/powerpc/kernel/sysfs.c
> > +++ b/arch/powerpc/kernel/sysfs.c
> > @@ -19,6 +19,7 @@
> > #include <asm/smp.h>
> > #include <asm/pmc.h>
> > #include <asm/firmware.h>
> > +#include <asm/idle.h>
> > #include <asm/svm.h>
> >
> > #include "cacheinfo.h"
> > @@ -733,6 +734,42 @@ static void create_svm_file(void)
> > }
> > #endif /* CONFIG_PPC_SVM */
> >
> > +static void read_idle_purr(void *val)
> > +{
> > + u64 *ret = (u64 *)val;
>
> No cast from void* needed.

Will fix this. Thanks.

>
>
> > +
> > + *ret = read_this_idle_purr();
> > +}
> > +
> > +static ssize_t idle_purr_show(struct device *dev,
> > + struct device_attribute *attr, char *buf)
> > +{
> > + struct cpu *cpu = container_of(dev, struct cpu, dev);
> > + u64 val;
> > +
> > + smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
> > + return sprintf(buf, "%llx\n", val);
> > +}
> > +static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
> > +
> > +static void read_idle_spurr(void *val)
> > +{
> > + u64 *ret = (u64 *)val;
> > +
> > + *ret = read_this_idle_spurr();
> > +}
> > +
> > +static ssize_t idle_spurr_show(struct device *dev,
> > + struct device_attribute *attr, char *buf)
> > +{
> > + struct cpu *cpu = container_of(dev, struct cpu, dev);
> > + u64 val;
> > +
> > + smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
> > + return sprintf(buf, "%llx\n", val);
> > +}
> > +static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);
>
> It's regrettable that we have to wake up potentially idle CPUs in order
> to derive correct idle statistics for them, but I suppose the main user
> (lparstat) of these interfaces already is causing this to happen by
> polling the existing per-cpu purr and spurr attributes.
>
> So now lparstat will incur at minimum four syscalls and four IPIs per
> CPU per polling interval -- one for each of purr, spurr, idle_purr and
> idle_spurr. Correct?

Yes, it is unforunate that we will end up making four syscalls and
generating IPI noise, and this is something that I discussed with
Naveen and Kamalesh. We have the following two constraints:

1) These values of PURR and SPURR required are per-cpu. Hence putting
them in lparcfg is not an option.

2) sysfs semantics encourages a single value per key, the key being
the sysfs-file. Something like the following would have made far more
sense.

cat /sys/devices/system/cpu/cpuX/purr_spurr_accounting
purr:A
idle_purr:B
spurr:C
idle_spurr:D

There are some sysfs files which allow something like this. Eg:
/sys/devices/system/cpu/cpu0/cpufreq/stats/time_in_state

Thoughts on any other alternatives?


>
> At some point it's going to make sense to batch sampling of remote CPUs'
> SPRs.
>
>
> > static int register_cpu_online(unsigned int cpu)
> > {
> > struct cpu *c = &per_cpu(cpu_devices, cpu);
> > @@ -794,10 +831,15 @@ static int register_cpu_online(unsigned int cpu)
> > if (!firmware_has_feature(FW_FEATURE_LPAR))
> > add_write_permission_dev_attr(&dev_attr_purr);
> > device_create_file(s, &dev_attr_purr);
> > + if (firmware_has_feature(FW_FEATURE_LPAR))
> > + device_create_file(s, &dev_attr_idle_purr);
> > }
> >
> > - if (cpu_has_feature(CPU_FTR_SPURR))
> > + if (cpu_has_feature(CPU_FTR_SPURR)) {
> > device_create_file(s, &dev_attr_spurr);
> > + if (firmware_has_feature(FW_FEATURE_LPAR))
> > + device_create_file(s, &dev_attr_idle_spurr);
> > + }
> >
> > if (cpu_has_feature(CPU_FTR_DSCR))
> > device_create_file(s, &dev_attr_dscr);
> > @@ -879,11 +921,17 @@ static int unregister_cpu_online(unsigned int cpu)
> > if (cpu_has_feature(CPU_FTR_MMCRA))
> > device_remove_file(s, &dev_attr_mmcra);
> >
> > - if (cpu_has_feature(CPU_FTR_PURR))
> > + if (cpu_has_feature(CPU_FTR_PURR)) {
> > device_remove_file(s, &dev_attr_purr);
> > + if (firmware_has_feature(FW_FEATURE_LPAR))
> > + device_remove_file(s, &dev_attr_idle_purr);
> > + }
> >
> > - if (cpu_has_feature(CPU_FTR_SPURR))
> > + if (cpu_has_feature(CPU_FTR_SPURR)) {
> > device_remove_file(s, &dev_attr_spurr);
> > + if (firmware_has_feature(FW_FEATURE_LPAR))
> > + device_remove_file(s, &dev_attr_idle_spurr);
> > + }
> >
> > if (cpu_has_feature(CPU_FTR_DSCR))
> > device_remove_file(s, &dev_attr_dscr);
>
> The cpu register/unregister stuff here looks correct.

Thanks for reviewing the patch.

--
Thanks and Regards
gautham.

2020-02-25 10:23:23

by Naveen N. Rao

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

Gautham R Shenoy wrote:
> On Fri, Feb 21, 2020 at 10:50:12AM -0600, Nathan Lynch wrote:
>> "Gautham R. Shenoy" <[email protected]> writes:
>> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
>> > index 80a676d..5b4b450 100644
>> > --- a/arch/powerpc/kernel/sysfs.c
>> > +++ b/arch/powerpc/kernel/sysfs.c
>> > @@ -19,6 +19,7 @@
>> > #include <asm/smp.h>
>> > #include <asm/pmc.h>
>> > #include <asm/firmware.h>
>> > +#include <asm/idle.h>
>> > #include <asm/svm.h>
>> >
>> > #include "cacheinfo.h"
>> > @@ -733,6 +734,42 @@ static void create_svm_file(void)
>> > }
>> > #endif /* CONFIG_PPC_SVM */
>> >
>> > +static void read_idle_purr(void *val)
>> > +{
>> > + u64 *ret = (u64 *)val;
>>
>> No cast from void* needed.
>
> Will fix this. Thanks.
>
>>
>>
>> > +
>> > + *ret = read_this_idle_purr();
>> > +}
>> > +
>> > +static ssize_t idle_purr_show(struct device *dev,
>> > + struct device_attribute *attr, char *buf)
>> > +{
>> > + struct cpu *cpu = container_of(dev, struct cpu, dev);
>> > + u64 val;
>> > +
>> > + smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
>> > + return sprintf(buf, "%llx\n", val);
>> > +}
>> > +static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
>> > +
>> > +static void read_idle_spurr(void *val)
>> > +{
>> > + u64 *ret = (u64 *)val;
>> > +
>> > + *ret = read_this_idle_spurr();
>> > +}
>> > +
>> > +static ssize_t idle_spurr_show(struct device *dev,
>> > + struct device_attribute *attr, char *buf)
>> > +{
>> > + struct cpu *cpu = container_of(dev, struct cpu, dev);
>> > + u64 val;
>> > +
>> > + smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
>> > + return sprintf(buf, "%llx\n", val);
>> > +}
>> > +static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);
>>
>> It's regrettable that we have to wake up potentially idle CPUs in order
>> to derive correct idle statistics for them, but I suppose the main user
>> (lparstat) of these interfaces already is causing this to happen by
>> polling the existing per-cpu purr and spurr attributes.
>>
>> So now lparstat will incur at minimum four syscalls and four IPIs per
>> CPU per polling interval -- one for each of purr, spurr, idle_purr and
>> idle_spurr. Correct?
>
> Yes, it is unforunate that we will end up making four syscalls and
> generating IPI noise, and this is something that I discussed with
> Naveen and Kamalesh. We have the following two constraints:
>
> 1) These values of PURR and SPURR required are per-cpu. Hence putting
> them in lparcfg is not an option.
>
> 2) sysfs semantics encourages a single value per key, the key being
> the sysfs-file. Something like the following would have made far more
> sense.
>
> cat /sys/devices/system/cpu/cpuX/purr_spurr_accounting
> purr:A
> idle_purr:B
> spurr:C
> idle_spurr:D
>
> There are some sysfs files which allow something like this. Eg:
> /sys/devices/system/cpu/cpu0/cpufreq/stats/time_in_state
>
> Thoughts on any other alternatives?

Umm... procfs?
/me ducks

>
>
>>
>> At some point it's going to make sense to batch sampling of remote CPUs'
>> SPRs.

How did you mean this? It looks like we first need to provide a separate
user interface, since with the existing sysfs interface providing
separate files, I am not sure if we can batch such reads.


- Naveen

2020-03-06 17:06:36

by Nathan Lynch

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

"Naveen N. Rao" <[email protected]> writes:
> Gautham R Shenoy wrote:
>> On Fri, Feb 21, 2020 at 10:50:12AM -0600, Nathan Lynch wrote:
>>> It's regrettable that we have to wake up potentially idle CPUs in order
>>> to derive correct idle statistics for them, but I suppose the main user
>>> (lparstat) of these interfaces already is causing this to happen by
>>> polling the existing per-cpu purr and spurr attributes.
>>>
>>> So now lparstat will incur at minimum four syscalls and four IPIs per
>>> CPU per polling interval -- one for each of purr, spurr, idle_purr and
>>> idle_spurr. Correct?
>>
>> Yes, it is unforunate that we will end up making four syscalls and
>> generating IPI noise, and this is something that I discussed with
>> Naveen and Kamalesh. We have the following two constraints:
>>
>> 1) These values of PURR and SPURR required are per-cpu. Hence putting
>> them in lparcfg is not an option.
>>
>> 2) sysfs semantics encourages a single value per key, the key being
>> the sysfs-file. Something like the following would have made far more
>> sense.
>>
>> cat /sys/devices/system/cpu/cpuX/purr_spurr_accounting
>> purr:A
>> idle_purr:B
>> spurr:C
>> idle_spurr:D
>>
>> There are some sysfs files which allow something like this. Eg:
>> /sys/devices/system/cpu/cpu0/cpufreq/stats/time_in_state
>>
>> Thoughts on any other alternatives?
>
> Umm... procfs?
> /me ducks

I had wondered about perf events but I'm not sure that's any more suitable.


>>> At some point it's going to make sense to batch sampling of remote CPUs'
>>> SPRs.
>
> How did you mean this? It looks like we first need to provide a separate
> user interface, since with the existing sysfs interface providing
> separate files, I am not sure if we can batch such reads.

I mean in order to minimize IPI traffic something like: sample/calculate
all of a CPU's purr, idle_purr, spurr, idle_spurr in a single IPI upon a
read of any of the attributes, and cache the result for some time, so
that the anticipated subsequent reads of the other attributes use the
cached results instead of generating more IPIs.

That would keep the current sysfs interface at the cost of imposing a
certain coarseness in the results.

Anyway, that's a mitigation that could be considered if the
implementation in this patch is found to be too expensive in practice.

2020-03-06 17:40:12

by Naveen N. Rao

[permalink] [raw]
Subject: Re: [PATCH v2 4/5] powerpc/sysfs: Show idle_purr and idle_spurr for every CPU

Nathan Lynch wrote:
> "Naveen N. Rao" <[email protected]> writes:
>> Gautham R Shenoy wrote:
>>> On Fri, Feb 21, 2020 at 10:50:12AM -0600, Nathan Lynch wrote:
>>>> It's regrettable that we have to wake up potentially idle CPUs in order
>>>> to derive correct idle statistics for them, but I suppose the main user
>>>> (lparstat) of these interfaces already is causing this to happen by
>>>> polling the existing per-cpu purr and spurr attributes.
>>>>
>>>> So now lparstat will incur at minimum four syscalls and four IPIs per
>>>> CPU per polling interval -- one for each of purr, spurr, idle_purr and
>>>> idle_spurr. Correct?
>>>
>>> Yes, it is unforunate that we will end up making four syscalls and
>>> generating IPI noise, and this is something that I discussed with
>>> Naveen and Kamalesh. We have the following two constraints:
>>>
>>> 1) These values of PURR and SPURR required are per-cpu. Hence putting
>>> them in lparcfg is not an option.
>>>
>>> 2) sysfs semantics encourages a single value per key, the key being
>>> the sysfs-file. Something like the following would have made far more
>>> sense.
>>>
>>> cat /sys/devices/system/cpu/cpuX/purr_spurr_accounting
>>> purr:A
>>> idle_purr:B
>>> spurr:C
>>> idle_spurr:D
>>>
>>> There are some sysfs files which allow something like this. Eg:
>>> /sys/devices/system/cpu/cpu0/cpufreq/stats/time_in_state
>>>
>>> Thoughts on any other alternatives?
>>
>> Umm... procfs?
>> /me ducks
>
> I had wondered about perf events but I'm not sure that's any more suitable.

Yes, we considered that, but it looks like the event reads are not
"batched" in any manner. So, the IPI overhead will be similar.

>
>>>> At some point it's going to make sense to batch sampling of remote CPUs'
>>>> SPRs.
>>
>> How did you mean this? It looks like we first need to provide a separate
>> user interface, since with the existing sysfs interface providing
>> separate files, I am not sure if we can batch such reads.
>
> I mean in order to minimize IPI traffic something like: sample/calculate
> all of a CPU's purr, idle_purr, spurr, idle_spurr in a single IPI upon a
> read of any of the attributes, and cache the result for some time, so
> that the anticipated subsequent reads of the other attributes use the
> cached results instead of generating more IPIs.
>
> That would keep the current sysfs interface at the cost of imposing a
> certain coarseness in the results.

Thanks for clarifying, that makes sense. Though we need to be careful in
ensuring the sysfs semantics work as expected.

>
> Anyway, that's a mitigation that could be considered if the
> implementation in this patch is found to be too expensive in practice.

That's a good point. We can optimize later if this turns out to be a
problem in practice, if we end up using this approach.


- Naveen