Hi Reinette, Fenghua,
This patch series implements the solution Reinette suggested in the
earlier RFD thread[1] for the problem of moving a container's tasks to a
different control group on systems that don't provide enough CLOSIDs to
give every container its own control group.
Without this, we were forced to write all of the task IDs from the
container into the tasks file of the new control group. Because a group
is free to dynamically create new tasks, we found this approach
unusable.
This change originally depended on the CLOSID update race fix[2] to
provide a race-free mechanism for notifying the CPUs where moved tasks
were residing. However, now that the current guidance for group task
movement is to broadcast IPIs to all CPUs, this patch can be applied
independently.
This patch series assumes that a MON group's CLOSID can simply be
changed to that of a new parent CTRL_MON group. This is allowed on Intel
and AMD, but not MPAM implementations. While we (Google) only foresee
needing this functionality on Intel and AMD systems, this series should
hopefully be a good starting point for supporting MPAM.
These patches are based on v6.1-rc5
[1] https://lore.kernel.org/lkml/[email protected]/
[2] https://lore.kernel.org/lkml/[email protected]/
Thanks!
-Peter
Peter Newman (2):
x86/resctrl: factor rdtgroup lock for multi-file ops
x86/resctrl: Implement rename op for mon groups
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 101 +++++++++++++++++++++----
1 file changed, 88 insertions(+), 13 deletions(-)
--
2.38.1.493.g58b659f92b-goog
To help implement kernfs operations on multiple files, such as rename,
factor the work needed to respectively break and unbreak active
protection on an individual file into rdtgroup_kn_{get,put}().
The existing rdtgroup_kn_lock_live() could only release the kernfs lock
for a single file before waiting on the rdtgroup_mutex.
This refactoring should not result in any functional change.
Signed-off-by: Peter Newman <[email protected]>
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++++++++++++++++----------
1 file changed, 22 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e5a48f05e787..03b51543c26d 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2026,6 +2026,26 @@ static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
}
}
+static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+ atomic_inc(&rdtgrp->waitcount);
+ kernfs_break_active_protection(kn);
+}
+
+static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
+{
+ if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+ (rdtgrp->flags & RDT_DELETED)) {
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+ rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+ rdtgroup_pseudo_lock_remove(rdtgrp);
+ kernfs_unbreak_active_protection(kn);
+ rdtgroup_remove(rdtgrp);
+ } else {
+ kernfs_unbreak_active_protection(kn);
+ }
+}
+
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
{
struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
@@ -2033,8 +2053,7 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
if (!rdtgrp)
return NULL;
- atomic_inc(&rdtgrp->waitcount);
- kernfs_break_active_protection(kn);
+ rdtgroup_kn_get(rdtgrp, kn);
mutex_lock(&rdtgroup_mutex);
@@ -2053,17 +2072,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
return;
mutex_unlock(&rdtgroup_mutex);
-
- if (atomic_dec_and_test(&rdtgrp->waitcount) &&
- (rdtgrp->flags & RDT_DELETED)) {
- if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
- rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
- rdtgroup_pseudo_lock_remove(rdtgrp);
- kernfs_unbreak_active_protection(kn);
- rdtgroup_remove(rdtgrp);
- } else {
- kernfs_unbreak_active_protection(kn);
- }
+ rdtgroup_kn_put(rdtgrp, kn);
}
static int mkdir_mondata_all(struct kernfs_node *parent_kn,
--
2.38.1.493.g58b659f92b-goog
Implement the rename operation for resctrlfs mon groups to effect a
change in CLOSID for a MON group while otherwise leaving the group
intact.
This operation is useful as a solution to changing the class of service
of a running container without impacting its resource usage counters on
systems which do not implement enough CLOSIDs to give every container
it's own CTRL_MON group. Previously, on such systems, it would have been
necessary to move every task individually to a new CTRL_MON group using
the tasks file. This approach races with the creation of new tasks in
the group, making it practically unusable.
It's important to note that this solution relies on the fact that Intel
and AMD hardware allow the RMID to be assigned independently of the
CLOSID. Without this, this operation may not be as useful.
Signed-off-by: Peter Newman <[email protected]>
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 66 ++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 03b51543c26d..d6562d98b816 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3230,6 +3230,71 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
return ret;
}
+static void mongrp_move(struct rdtgroup *rdtgrp, struct rdtgroup *new_prdtgrp)
+{
+ struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
+ struct task_struct *p, *t;
+
+ WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
+ list_del(&rdtgrp->mon.crdtgrp_list);
+
+ list_add_tail(&rdtgrp->mon.crdtgrp_list,
+ &new_prdtgrp->mon.crdtgrp_list);
+ rdtgrp->mon.parent = new_prdtgrp;
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(p, t) {
+ if (is_closid_match(t, prdtgrp) && is_rmid_match(t, rdtgrp))
+ WRITE_ONCE(t->closid, new_prdtgrp->closid);
+ }
+ read_unlock(&tasklist_lock);
+
+ update_closid_rmid(cpu_online_mask, NULL);
+}
+
+static int rdtgroup_rename(struct kernfs_node *kn,
+ struct kernfs_node *new_parent, const char *new_name)
+{
+ struct rdtgroup *new_prdtgrp;
+ struct rdtgroup *rdtgrp;
+ int ret;
+
+ rdtgrp = kernfs_to_rdtgroup(kn);
+ new_prdtgrp = kernfs_to_rdtgroup(new_parent);
+ if (!rdtgrp || !new_prdtgrp)
+ return -EPERM;
+
+ /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
+ rdtgroup_kn_get(rdtgrp, kn);
+ rdtgroup_kn_get(new_prdtgrp, new_parent);
+
+ mutex_lock(&rdtgroup_mutex);
+
+ if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
+ ret = -ESRCH;
+ goto out;
+ }
+
+ /* Only a mon group can be moved to a new mon_groups directory. */
+ if (rdtgrp->type != RDTMON_GROUP ||
+ !is_mon_groups(new_parent, kn->name)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ ret = kernfs_rename(kn, new_parent, new_name);
+ if (ret)
+ goto out;
+
+ mongrp_move(rdtgrp, new_prdtgrp);
+
+out:
+ mutex_unlock(&rdtgroup_mutex);
+ rdtgroup_kn_put(rdtgrp, kn);
+ rdtgroup_kn_put(new_prdtgrp, new_parent);
+ return ret;
+}
+
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
@@ -3247,6 +3312,7 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
.mkdir = rdtgroup_mkdir,
.rmdir = rdtgroup_rmdir,
+ .rename = rdtgroup_rename,
.show_options = rdtgroup_show_options,
};
--
2.38.1.493.g58b659f92b-goog