To change the class of service for a large group of tasks, such as an
application container, a container manager must write all of the tasks'
IDs into the tasks file interface of the new control group.
If a container manager is tracking containers' bandwidth usage by
placing tasks from each into their own monitoring group, it must first
move the tasks to the default monitoring group of the new control group
before it can move the tasks into their new monitoring groups. This is
undesirable because it makes bandwidth usage during the move
unattributable to the correct tasks and resets monitoring event counters
and cache usage information for the group.
To address this, implement the rename operation for resctrlfs mon groups
to effect a change in CLOSID for a MON group while otherwise leaving the
monitoring group intact.
It's important to note that this solution relies on the fact that Intel
and AMD hardware allow the RMID to be assigned independently of the
CLOSID. Without this, the operation may not be as useful.
Signed-off-by: Peter Newman <[email protected]>
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 66 ++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 03b51543c26d..d6562d98b816 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3230,6 +3230,71 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
return ret;
}
+static void mongrp_move(struct rdtgroup *rdtgrp, struct rdtgroup *new_prdtgrp)
+{
+ struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+ struct task_struct *p, *t;
+
+ WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+ list_del(&rdtgrp->mon.crdtgrp_list);
+
+ list_add_tail(&rdtgrp->mon.crdtgrp_list,
+ &new_prdtgrp->mon.crdtgrp_list);
+ rdtgrp->mon.parent = new_prdtgrp;
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(p, t) {
+ if (is_closid_match(t, prdtgrp) && is_rmid_match(t, rdtgrp))
+ WRITE_ONCE(t->closid, new_prdtgrp->closid);
+ }
+ read_unlock(&tasklist_lock);
+
+ update_closid_rmid(cpu_online_mask, NULL);
+}
+
+static int rdtgroup_rename(struct kernfs_node *kn,
+ struct kernfs_node *new_parent, const char *new_name)
+{
+ struct rdtgroup *new_prdtgrp;
+ struct rdtgroup *rdtgrp;
+ int ret;
+
+ rdtgrp = kernfs_to_rdtgroup(kn);
+ new_prdtgrp = kernfs_to_rdtgroup(new_parent);
+ if (!rdtgrp || !new_prdtgrp)
+ return -EPERM;
+
+ /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
+ rdtgroup_kn_get(rdtgrp, kn);
+ rdtgroup_kn_get(new_prdtgrp, new_parent);
+
+ mutex_lock(&rdtgroup_mutex);
+
+ if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
+ ret = -ESRCH;
+ goto out;
+ }
+
+ /* Only a mon group can be moved to a new mon_groups directory. */
+ if (rdtgrp->type != RDTMON_GROUP ||
+ !is_mon_groups(new_parent, kn->name)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ ret = kernfs_rename(kn, new_parent, new_name);
+ if (ret)
+ goto out;
+
+ mongrp_move(rdtgrp, new_prdtgrp);
+
+out:
+ mutex_unlock(&rdtgroup_mutex);
+ rdtgroup_kn_put(rdtgrp, kn);
+ rdtgroup_kn_put(new_prdtgrp, new_parent);
+ return ret;
+}
+
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
@@ -3247,6 +3312,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
.mkdir = rdtgroup_mkdir,
.rmdir = rdtgroup_rmdir,
+ .rename = rdtgroup_rename,
.show_options = rdtgroup_show_options,
};
--
2.38.1.584.g0f3c55d4c2-goog
On Tue, Nov 29, 2022 at 1:02 PM Peter Newman <[email protected]> wrote:
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 03b51543c26d..d6562d98b816 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -3230,6 +3230,71 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
> return ret;
> }
>
> +static void mongrp_move(struct rdtgroup *rdtgrp, struct rdtgroup *new_prdtgrp)
> +{
> + struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
> + struct task_struct *p, *t;
> +
> + WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
> + list_del(&rdtgrp->mon.crdtgrp_list);
> +
> + list_add_tail(&rdtgrp->mon.crdtgrp_list,
> + &new_prdtgrp->mon.crdtgrp_list);
> + rdtgrp->mon.parent = new_prdtgrp;
> +
> + read_lock(&tasklist_lock);
> + for_each_process_thread(p, t) {
> + if (is_closid_match(t, prdtgrp) && is_rmid_match(t, rdtgrp))
> + WRITE_ONCE(t->closid, new_prdtgrp->closid);
> + }
> + read_unlock(&tasklist_lock);
> +
> + update_closid_rmid(cpu_online_mask, NULL);
I will need to refresh this patch now that we're back to building an
update mask.
This will once again depend on
https://lore.kernel.org/lkml/[email protected]/