Expose the resctrl information in /proc/{pid}/resctrl
so that the user space tool is able to locate the resctrl
directory path in O(1) rather than O(n) - n is the number
of tasks in the system.
Chen Yu (2):
resctrl: Add CPU_RESCTRL
x86/resctrl: Add task resctrl information display
arch/Kconfig | 4 +++
arch/x86/Kconfig | 1 +
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 47 ++++++++++++++++++++++++++
fs/proc/base.c | 7 ++++
include/linux/resctrl.h | 16 +++++++++
5 files changed, 75 insertions(+)
create mode 100644 include/linux/resctrl.h
--
2.17.1
Introduce a generic option called CPU_RESCTRL which
is selected by the arch-specific ones CONFIG_X86_RESCTRL
or CONFIG_ARM64_RESCTRL in the future. The generic one will
cover the resctrl filesystem and other generic and shared
bits of functionality.
Suggested-by: Borislav Petkov <[email protected]>
Signed-off-by: Chen Yu <[email protected]>
---
arch/Kconfig | 4 ++++
arch/x86/Kconfig | 1 +
2 files changed, 5 insertions(+)
diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..3886cf0052a8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,10 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool
+config CPU_RESCTRL
+ bool
+ def_bool n
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ef85139553f..a8a12493e8c2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -455,6 +455,7 @@ config X86_CPU_RESCTRL
bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
select KERNFS
+ select CPU_RESCTRL
help
Enable x86 CPU resource control support.
--
2.17.1
Monitoring tools that want to find out which resctrl CTRL
and MONITOR groups a task belongs to must currently read
the "tasks" file in every group until they locate the process
ID.
Add an additional file /proc/{pid}/resctrl to provide this
information.
For example:
cat /proc/1193/resctrl
CTRL_MON:/ctrl_grp0
MON:/ctrl_grp0/mon_groups/mon_grp0
If the resctrl filesystem has not been mounted,
reading /proc/{pid}/resctrl returns an error:
cat: /proc/1193/resctrl: No such device
Tested-by: Jinshi Chen <[email protected]>
Reviewed-by: Reinette Chatre <[email protected]>
Reviewed-by: Fenghua Yu <[email protected]>
Reviewed-by: Tony Luck <[email protected]>
Signed-off-by: Chen Yu <[email protected]>
---
v2: Reduce indentation level in proc_resctrl_show()
according to Boris's suggestion.
Create the include/linux/resctrl.h header and
declare proc_resctrl_show() in this file, so
that other architectures would probably use it
in the future. Different architectures should
implement architectural specific proc_resctrl_show()
accordingly.
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 47 ++++++++++++++++++++++++++
fs/proc/base.c | 7 ++++
include/linux/resctrl.h | 16 +++++++++
3 files changed, 70 insertions(+)
create mode 100644 include/linux/resctrl.h
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index a46dee8e78db..b53bf52a04f5 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -727,6 +727,53 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
return ret;
}
+int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk)
+{
+ struct rdtgroup *rdtg;
+ int ret = 0;
+
+ mutex_lock(&rdtgroup_mutex);
+
+ /* Make sure resctrl has been mounted. */
+ if (!static_branch_unlikely(&rdt_enable_key)) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+
+ list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
+ struct rdtgroup *crg;
+
+ /*
+ * Task information is only relevant for shareable
+ * and exclusive groups.
+ */
+ if (rdtg->mode != RDT_MODE_SHAREABLE &&
+ rdtg->mode != RDT_MODE_EXCLUSIVE)
+ continue;
+
+ if (rdtg->closid != tsk->closid)
+ continue;
+
+ seq_printf(s, "CTRL_MON:/%s\n", rdtg->kn->name);
+ list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
+ mon.crdtgrp_list) {
+ if (tsk->rmid != crg->mon.rmid)
+ continue;
+ seq_printf(s, "MON:%s%s/mon_groups/%s\n",
+ rdtg == &rdtgroup_default ? "" : "/",
+ rdtg->kn->name, crg->kn->name);
+ goto unlock;
+ }
+ goto unlock;
+ }
+ ret = -ENOENT;
+unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return ret;
+}
+
static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ebea9501afb8..3ddcb0cae3bd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,6 +94,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/stat.h>
#include <linux/posix-timers.h>
+#include <linux/resctrl.h>
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@@ -3060,6 +3061,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
#ifdef CONFIG_CGROUPS
ONE("cgroup", S_IRUGO, proc_cgroup_show),
+#endif
+#ifdef CONFIG_CPU_RESCTRL
+ ONE("resctrl", S_IRUGO, proc_resctrl_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
@@ -3460,6 +3464,9 @@ static const struct pid_entry tid_base_stuff[] = {
#endif
#ifdef CONFIG_CGROUPS
ONE("cgroup", S_IRUGO, proc_cgroup_show),
+#endif
+#ifdef CONFIG_CPU_RESCTRL
+ ONE("resctrl", S_IRUGO, proc_resctrl_show),
#endif
ONE("oom_score", S_IRUGO, proc_oom_score),
REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations),
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
new file mode 100644
index 000000000000..9f0e6a892cb3
--- /dev/null
+++ b/include/linux/resctrl.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CPU_RESCTRL_H
+#define _CPU_RESCTRL_H
+
+#ifdef CONFIG_CPU_RESCTRL
+
+#include <linux/proc_fs.h>
+
+int proc_resctrl_show(struct seq_file *m,
+ struct pid_namespace *ns,
+ struct pid *pid,
+ struct task_struct *tsk);
+
+#endif
+
+#endif /* _CPU_RESCTRL_H */
--
2.17.1
On Fri, Nov 15, 2019 at 01:24:20PM +0800, Chen Yu wrote:
> Introduce a generic option called CPU_RESCTRL which
> is selected by the arch-specific ones CONFIG_X86_RESCTRL
> or CONFIG_ARM64_RESCTRL in the future. The generic one will
> cover the resctrl filesystem and other generic and shared
> bits of functionality.
>
> Suggested-by: Borislav Petkov <[email protected]>
I don't remember suggesting adding a separate CONFIG option...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Fri, Nov 15, 2019 at 01:25:06PM +0800, Chen Yu wrote:
> Monitoring tools that want to find out which resctrl CTRL
> and MONITOR groups a task belongs to must currently read
> the "tasks" file in every group until they locate the process
> ID.
>
> Add an additional file /proc/{pid}/resctrl to provide this
> information.
>
> For example:
> cat /proc/1193/resctrl
> CTRL_MON:/ctrl_grp0
> MON:/ctrl_grp0/mon_groups/mon_grp0
>
> If the resctrl filesystem has not been mounted,
> reading /proc/{pid}/resctrl returns an error:
> cat: /proc/1193/resctrl: No such device
Eww, this doesn't sound very user-friendly. How is the user supposed to
know that the resctrl fs needs to be mounted for this to work?
Why does the resctrl fs need to be mounted at all to show this?
I'm guessing if it is not mounted, you have no groups so you don't have
to return an error - you simply return "". Right?
> Tested-by: Jinshi Chen <[email protected]>
> Reviewed-by: Reinette Chatre <[email protected]>
> Reviewed-by: Fenghua Yu <[email protected]>
> Reviewed-by: Tony Luck <[email protected]>
When you send a new version which has non-trivial changes, you should
drop those tags because they don't apply anymore. Unless those people
have managed to review and test the new version ...
Looking at CONFIG_PROC_PID_ARCH_STATUS for an example of proc/ calling
arch-specific functions, I guess you need to do:
select CPU_RESCTRL if PROC_FS
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Hi Boris,
On Fri, Nov 15, 2019 at 5:25 PM Borislav Petkov <[email protected]> wrote:
>
> On Fri, Nov 15, 2019 at 01:25:06PM +0800, Chen Yu wrote:
> > Monitoring tools that want to find out which resctrl CTRL
> > and MONITOR groups a task belongs to must currently read
> > the "tasks" file in every group until they locate the process
> > ID.
> >
> > Add an additional file /proc/{pid}/resctrl to provide this
> > information.
> >
> > For example:
> > cat /proc/1193/resctrl
> > CTRL_MON:/ctrl_grp0
> > MON:/ctrl_grp0/mon_groups/mon_grp0
> >
> > If the resctrl filesystem has not been mounted,
> > reading /proc/{pid}/resctrl returns an error:
> > cat: /proc/1193/resctrl: No such device
>
> Eww, this doesn't sound very user-friendly. How is the user supposed to
> know that the resctrl fs needs to be mounted for this to work?
>
> Why does the resctrl fs need to be mounted at all to show this?
>
> I'm guessing if it is not mounted, you have no groups so you don't have
> to return an error - you simply return "". Right?
>
Right, we can return 'blank' to user and let the user to parse the information.
And there is a similar behavior in cgroup that, for kernel thread that
does not belong
to any cgroup, /proc/{pid}/cgroup just show 'blank' without returning an error.
> > Tested-by: Jinshi Chen <[email protected]>
> > Reviewed-by: Reinette Chatre <[email protected]>
> > Reviewed-by: Fenghua Yu <[email protected]>
> > Reviewed-by: Tony Luck <[email protected]>
>
> When you send a new version which has non-trivial changes, you should
> drop those tags because they don't apply anymore. Unless those people
> have managed to review and test the new version ...
>
Got it, thanks for telling me this.
> Looking at CONFIG_PROC_PID_ARCH_STATUS for an example of proc/ calling
> arch-specific functions, I guess you need to do:
>
> select CPU_RESCTRL if PROC_FS
>
Yes, only when PROC_FS is set, /proc/{pid}/resctrl
can be displayed. However, CPU_RESCTRL might not
depend on proc fs, it is possible that the CPU_RESCTRL
is enabled but without PROC_FS set. If I understand correctly,
CPU_RESCTRL is the 'root' config for X86_CPU_RESCTRL,
after reading this thread:
https://lists.gt.net/linux/kernel/3211659
If this is the case, shall we add the new file at kernel/resctrl/resctrl.c?
And the generic proc_resctrl_show() could be put into this file. In the future
the generic code for resctrl could be added/moved to kernel/resctrl/resctrl.c
Thanks,
Chenyu
> Thx.
>
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette
On Sat, Nov 16, 2019 at 11:01:12PM +0800, Ryan Chen wrote:
> Right, we can return 'blank' to user and let the user to parse the information.
There is nothing to parse - the task doesn't belong to any groups. That's it.
> And there is a similar behavior in cgroup that, for kernel thread that
> does not belong
> to any cgroup, /proc/{pid}/cgroup just show 'blank' without returning an error.
By 'blank' I assume you mean the empty string '' ?
> Yes, only when PROC_FS is set, /proc/{pid}/resctrl
> can be displayed. However, CPU_RESCTRL might not
> depend on proc fs, it is possible that the CPU_RESCTRL
> is enabled but without PROC_FS set. If I understand correctly,
> CPU_RESCTRL is the 'root' config for X86_CPU_RESCTRL,
> after reading this thread:
> https://lists.gt.net/linux/kernel/3211659
I'm not sure I know what you mean here. There's no CPU_RESCTRL option - you've
added it in the previous patch:
[ ~/kernel/linux> git grep -E CONFIG_CPU_RESCTRL
[ ~/kernel/linux> git grep -E "\WCPU_RESCTRL"
[ ~/kernel/linux>
And if you want to use that option in proc/, then it needs
to depend on PROC_FS, like the the example I gave you with
CONFIG_PROC_PID_ARCH_STATUS.
Or do you mean something else?
> If this is the case, shall we add the new file at kernel/resctrl/resctrl.c?
> And the generic proc_resctrl_show() could be put into this file. In the future
> the generic code for resctrl could be added/moved to kernel/resctrl/resctrl.c
Not worth it for a single function. Leave it in
arch/x86/kernel/cpu/resctrl/rdtgroup.c where you had it.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Hi Boris,
On Mon, Nov 18, 2019 at 10:48 PM Borislav Petkov <[email protected]> wrote:
>
> On Sat, Nov 16, 2019 at 11:01:12PM +0800, Ryan Chen wrote:
> > Right, we can return 'blank' to user and let the user to parse the information.
>
> There is nothing to parse - the task doesn't belong to any groups. That's it.
>
> > And there is a similar behavior in cgroup that, for kernel thread that
> > does not belong
> > to any cgroup, /proc/{pid}/cgroup just show 'blank' without returning an error.
>
> By 'blank' I assume you mean the empty string '' ?
>
Yes, it is 'empty string'.
> > Yes, only when PROC_FS is set, /proc/{pid}/resctrl
> > can be displayed. However, CPU_RESCTRL might not
> > depend on proc fs, it is possible that the CPU_RESCTRL
> > is enabled but without PROC_FS set. If I understand correctly,
> > CPU_RESCTRL is the 'root' config for X86_CPU_RESCTRL,
> > after reading this thread:
> > https://lists.gt.net/linux/kernel/3211659
>
> I'm not sure I know what you mean here. There's no CPU_RESCTRL option - you've
> added it in the previous patch:
>
> [ ~/kernel/linux> git grep -E CONFIG_CPU_RESCTRL
> [ ~/kernel/linux> git grep -E "\WCPU_RESCTRL"
> [ ~/kernel/linux>
>
> And if you want to use that option in proc/, then it needs
> to depend on PROC_FS, like the the example I gave you with
> CONFIG_PROC_PID_ARCH_STATUS.
>
> Or do you mean something else?
>
Previously I was thinking of introducing CONFIG_CPU_RESCTRL
to the kernel, so that platform-independent resctrl code could be moved under
this config. The idea was seen in the following commit log:
commit e6d429313ea5c776d (x86/resctrl: Avoid confusion over the
new X86_RESCTRL config)
But since we only touch proc fs in this patch, I think a config named
CONFIG_PROC_CPU_RESCTRL declared in fs/proc/Kconfig
might be more appropriate(similar to CONFIG_PROC_PID_ARCH_STATUS).
> > If this is the case, shall we add the new file at kernel/resctrl/resctrl.c?
> > And the generic proc_resctrl_show() could be put into this file. In the future
> > the generic code for resctrl could be added/moved to kernel/resctrl/resctrl.c
>
> Not worth it for a single function. Leave it in
> arch/x86/kernel/cpu/resctrl/rdtgroup.c where you had it.
>
Okay, got it. Let me send the v3 patch out and to see if it is suitable.
Thanks,
Chenyu
> Thx.
>
> --
> Regards/Gruss,
> Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette