2022-08-28 05:09:41

by Tejun Heo

[permalink] [raw]
Subject: [PATCHSET v2 for-6.1] kernfs, cgroup: implement kernfs_show() and cgroup_file_show()

Hello,

Greg: If this looks good to you, can you apply 0001-0008 to your tree? I'll
likely have more cgroup updates on top, so I think it'd be better if I pull
your tree and then handle the cgroup part in cgroup tree.

Currently, deactivated kernfs nodes are used for two purposes - during
removal to kill and drain nodes and during creation to make multiple
kernfs_nodes creations to succeed or fail as a group.

This patchset implements kernfs_show() which can dynamically show and hide
kernfs_nodes using the [de]activation mechanisms, and, on top, implements
cgroup_file_show() which allows toggling cgroup file visiblity.

This is for the following pending patchset to allow disabling PSI on
per-cgroup basis:

https://lore.kernel.org/all/[email protected]/t/#u

which requires hiding the corresponding cgroup interface files while
disabled.

This patchset contains the following nine patches.

0001-kernfs-Simply-by-replacing-kernfs_deref_open_node-wi.patch
0002-kernfs-Drop-unnecessary-mutex-local-variable-initial.patch
0003-kernfs-Refactor-kernfs_get_open_node.patch
0004-kernfs-Skip-kernfs_drain_open_files-more-aggressivel.patch
0005-kernfs-Improve-kernfs_drain-and-always-call-on-remov.patch
0006-kernfs-Add-KERNFS_REMOVING-flags.patch
0007-kernfs-Factor-out-kernfs_activate_one.patch
0008-kernfs-Implement-kernfs_show.patch
0009-cgroup-Implement-cgroup_file_show.patch

0001-0003 are misc prep patches. 0004-0008 implement kernsf_deactivate().
0009 implements cgroup_file_show() on top. The patches are also available in
the following git branch:

git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git kernfs-show

The main difference from the previous version[1] is that while show/hide
still use the same mechanism as [de]activation, their states are tracked
separately so that e.g. an unrelated kernfs_activate() higher up in the tree
doesn't interfere with hidden nodes in the subtree.

diffstat follows. Thanks.

fs/kernfs/dir.c | 102 +++++++++++++++++++++++++++++++----------------
fs/kernfs/file.c | 151 +++++++++++++++++++++++++++++++++-------------------------------------
fs/kernfs/kernfs-internal.h | 1
include/linux/cgroup.h | 1
include/linux/kernfs.h | 3 +
kernel/cgroup/cgroup.c | 20 +++++++++
6 files changed, 166 insertions(+), 112 deletions(-)

--
tejun

[1] http://lkml.kernel.org/r/[email protected]


2022-08-28 05:10:31

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 4/9] kernfs: Skip kernfs_drain_open_files() more aggressively

Track the number of mmapped files and files that need to be released and
skip kernfs_drain_open_file() if both are zero, which are the precise
conditions which require draining open_files. The early exit test is
factored into kernfs_should_drain_open_files() which is now tested by
kernfs_drain_open_files()'s caller - kernfs_drain().

This isn't a meaningful optimization on its own but will enable future
stand-alone kernfs_deactivate() implementation.

v2: Chengming noticed that on->nr_to_release was leaking after ->open()
failure. Fix it by telling kernfs_unlink_open_file() that it's called
from the ->open() fail path and should dec the counter. Use kzalloc() to
allocate kernfs_open_node so that the tracking fields are correctly
initialized.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Chengming Zhou <[email protected]>
---
fs/kernfs/dir.c | 3 +-
fs/kernfs/file.c | 65 +++++++++++++++++++++++++------------
fs/kernfs/kernfs-internal.h | 1 +
3 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 1cc88ba6de90..8ae44db920d4 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -489,7 +489,8 @@ static void kernfs_drain(struct kernfs_node *kn)
rwsem_release(&kn->dep_map, _RET_IP_);
}

- kernfs_drain_open_files(kn);
+ if (kernfs_should_drain_open_files(kn))
+ kernfs_drain_open_files(kn);

down_write(&root->kernfs_rwsem);
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 7060a2a714b8..9ab6c92e02da 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -23,6 +23,8 @@ struct kernfs_open_node {
atomic_t event;
wait_queue_head_t poll;
struct list_head files; /* goes through kernfs_open_file.list */
+ unsigned int nr_mmapped;
+ unsigned int nr_to_release;
};

/*
@@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)

rc = 0;
of->mmapped = true;
+ of_on(of)->nr_mmapped++;
of->vm_ops = vma->vm_ops;
vma->vm_ops = &kernfs_vm_ops;
out_put:
@@ -562,7 +565,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,

if (!on) {
/* not there, initialize a new one */
- on = kmalloc(sizeof(*on), GFP_KERNEL);
+ on = kzalloc(sizeof(*on), GFP_KERNEL);
if (!on) {
mutex_unlock(mutex);
return -ENOMEM;
@@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
}

list_add_tail(&of->list, &on->files);
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ on->nr_to_release++;

mutex_unlock(mutex);
return 0;
@@ -584,6 +589,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
*
* @kn: target kernfs_node
* @of: associated kernfs_open_file
+ * @open_failed: ->open() failed, cancel ->release()
*
* Unlink @of from list of @kn's associated open files. If list of
* associated open files becomes empty, disassociate and free
@@ -593,7 +599,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
* None.
*/
static void kernfs_unlink_open_file(struct kernfs_node *kn,
- struct kernfs_open_file *of)
+ struct kernfs_open_file *of,
+ bool open_failed)
{
struct kernfs_open_node *on;
struct mutex *mutex;
@@ -606,8 +613,16 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
return;
}

- if (of)
+ if (of) {
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ WARN_ON_ONCE(of->released == open_failed);
+ if (open_failed)
+ on->nr_to_release--;
+ }
+ if (of->mmapped)
+ on->nr_mmapped--;
list_del(&of->list);
+ }

if (list_empty(&on->files)) {
rcu_assign_pointer(kn->attr.open, NULL);
@@ -734,7 +749,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
return 0;

err_put_node:
- kernfs_unlink_open_file(kn, of);
+ kernfs_unlink_open_file(kn, of, true);
err_seq_release:
seq_release(inode, file);
err_free:
@@ -766,6 +781,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
*/
kn->attr.ops->release(of);
of->released = true;
+ of_on(of)->nr_to_release--;
}
}

@@ -782,7 +798,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
mutex_unlock(mutex);
}

- kernfs_unlink_open_file(kn, of);
+ kernfs_unlink_open_file(kn, of, false);
seq_release(inode, filp);
kfree(of->prealloc_buf);
kfree(of);
@@ -790,25 +806,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
return 0;
}

-void kernfs_drain_open_files(struct kernfs_node *kn)
+bool kernfs_should_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
- struct kernfs_open_file *of;
- struct mutex *mutex;
-
- if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
- return;
+ bool ret;

/*
- * lockless opportunistic check is safe below because no one is adding to
- * ->attr.open at this point of time. This check allows early bail out
- * if ->attr.open is already NULL. kernfs_unlink_open_file makes
- * ->attr.open NULL only while holding kernfs_open_file_mutex so below
- * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
- * ->attr.open became NULL while waiting for the mutex.
+ * @kn being deactivated guarantees that @kn->attr.open can't change
+ * beneath us making the lockless test below safe.
*/
- if (!rcu_access_pointer(kn->attr.open))
- return;
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ rcu_read_lock();
+ on = rcu_dereference(kn->attr.open);
+ ret = on && (on->nr_mmapped || on->nr_to_release);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+void kernfs_drain_open_files(struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+ struct kernfs_open_file *of;
+ struct mutex *mutex;

mutex = kernfs_open_file_mutex_lock(kn);
on = kernfs_deref_open_node_locked(kn);
@@ -820,13 +841,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);

- if (kn->flags & KERNFS_HAS_MMAP)
+ if (of->mmapped) {
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ of->mmapped = false;
+ on->nr_mmapped--;
+ }

if (kn->flags & KERNFS_HAS_RELEASE)
kernfs_release_file(kn, of);
}

+ WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
mutex_unlock(mutex);
}

diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3ae214d02d44..fc5821effd97 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
*/
extern const struct file_operations kernfs_file_fops;

+bool kernfs_should_drain_open_files(struct kernfs_node *kn);
void kernfs_drain_open_files(struct kernfs_node *kn);

/*
--
2.37.2

2022-08-28 05:10:41

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 6/9] kernfs: Add KERNFS_REMOVING flags

KERNFS_ACTIVATED tracks whether a given node has ever been activated. As a
node was only deactivated on removal, this was used for

1. Drain optimization (removed by the previous patch).
2. To hide !activated nodes
3. To avoid double activations
4. Reject adding children to a node being removed
5. Skip activaing a node which is being removed.

We want to decouple deactivation from removal so that nodes can be
deactivated and hidden dynamically, which makes KERNFS_ACTIVATED useless for
all of the above purposes.

#1 is already gone. #2 and #3 can instead test whether the node is currently
active. A new flag KERNFS_REMOVING is added to explicitly mark nodes which
are being removed for #4 and #5.

While this leaves KERNFS_ACTIVATED with no users, leave it be as it will be
used in a following patch.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Chengming Zhou <[email protected]>
---
fs/kernfs/dir.c | 21 +++++++--------------
include/linux/kernfs.h | 1 +
2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index b3d2018a334d..f8cbd05e9b68 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -705,13 +705,7 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
goto err_unlock;
}

- /*
- * ACTIVATED is protected with kernfs_mutex but it was clear when
- * @kn was added to idr and we just wanna see it set. No need to
- * grab kernfs_mutex.
- */
- if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
- !atomic_inc_not_zero(&kn->count)))
+ if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
goto err_unlock;

spin_unlock(&kernfs_idr_lock);
@@ -753,10 +747,7 @@ int kernfs_add_one(struct kernfs_node *kn)
goto out_unlock;

ret = -ENOENT;
- if (parent->flags & KERNFS_EMPTY_DIR)
- goto out_unlock;
-
- if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+ if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
goto out_unlock;

kn->hash = kernfs_name_hash(kn->name, kn->ns);
@@ -1336,7 +1327,7 @@ void kernfs_activate(struct kernfs_node *kn)

pos = NULL;
while ((pos = kernfs_next_descendant_post(pos, kn))) {
- if (pos->flags & KERNFS_ACTIVATED)
+ if (kernfs_active(pos) || (pos->flags & KERNFS_REMOVING))
continue;

WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
@@ -1368,11 +1359,13 @@ static void __kernfs_remove(struct kernfs_node *kn)

pr_debug("kernfs %s: removing\n", kn->name);

- /* prevent any new usage under @kn by deactivating all nodes */
+ /* prevent new usage by marking all nodes removing and deactivating */
pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn)))
+ while ((pos = kernfs_next_descendant_post(pos, kn))) {
+ pos->flags |= KERNFS_REMOVING;
if (kernfs_active(pos))
atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
+ }

/* deactivate and unlink the subtree node-by-node */
do {
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 367044d7708c..b77d257c1f7e 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -112,6 +112,7 @@ enum kernfs_node_flag {
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
KERNFS_HAS_RELEASE = 0x2000,
+ KERNFS_REMOVING = 0x4000,
};

/* @flags for kernfs_create_root() */
--
2.37.2

2022-08-28 05:14:50

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 9/9] cgroup: Implement cgroup_file_show()

Add cgroup_file_show() which allows toggling visibility of a cgroup file
using the new kernfs_show(). This will be used to hide psi interface files
on cgroups where it's disabled.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Chengming Zhou <[email protected]>
Cc: Johannes Weiner <[email protected]>
---
include/linux/cgroup.h | 1 +
kernel/cgroup/cgroup.c | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed53bfe7c46c..f61dd135efbe 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -114,6 +114,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
+void cgroup_file_show(struct cgroup_file *cfile, bool show);

int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ffaccd6373f1..217469a1ea29 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4339,6 +4339,26 @@ void cgroup_file_notify(struct cgroup_file *cfile)
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}

+/**
+ * cgroup_file_show - show or hide a hidden cgroup file
+ * @cfile: target cgroup_file obtained by setting cftype->file_offset
+ * @show: whether to show or hide
+ */
+void cgroup_file_show(struct cgroup_file *cfile, bool show)
+{
+ struct kernfs_node *kn;
+
+ spin_lock_irq(&cgroup_file_kn_lock);
+ kn = cfile->kn;
+ kernfs_get(kn);
+ spin_unlock_irq(&cgroup_file_kn_lock);
+
+ if (kn)
+ kernfs_show(kn, show);
+
+ kernfs_put(kn);
+}
+
/**
* css_next_child - find the next child of a given css
* @pos: the current position (%NULL to initiate traversal)
--
2.37.2

2022-08-28 05:17:40

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 5/9] kernfs: Improve kernfs_drain() and always call on removal

__kernfs_remove() was skipping draining based on KERNFS_ACTIVATED - whether
the node has ever been activated since creation. Instead, update it to
always call kernfs_drain() which now drains or skips based on the precise
drain conditions. This ensures that the nodes will be deactivated and
drained regardless of their states.

This doesn't make meaningful difference now but will enable deactivating and
draining nodes dynamically by making removals safe when racing those
operations.

While at it, drop / update comments.

v2: Fix the inverted test on kernfs_should_drain_open_files() noted by
Chengming. This was fixed by the next unrelated patch in the previous
posting.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Chengming Zhou <[email protected]>
---
fs/kernfs/dir.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 8ae44db920d4..b3d2018a334d 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -472,6 +472,16 @@ static void kernfs_drain(struct kernfs_node *kn)
lockdep_assert_held_write(&root->kernfs_rwsem);
WARN_ON_ONCE(kernfs_active(kn));

+ /*
+ * Skip draining if already fully drained. This avoids draining and its
+ * lockdep annotations for nodes which have never been activated
+ * allowing embedding kernfs_remove() in create error paths without
+ * worrying about draining.
+ */
+ if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS &&
+ !kernfs_should_drain_open_files(kn))
+ return;
+
up_write(&root->kernfs_rwsem);

if (kernfs_lockdep(kn)) {
@@ -480,7 +490,6 @@ static void kernfs_drain(struct kernfs_node *kn)
lock_contended(&kn->dep_map, _RET_IP_);
}

- /* but everyone should wait for draining */
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);

@@ -1370,23 +1379,14 @@ static void __kernfs_remove(struct kernfs_node *kn)
pos = kernfs_leftmost_descendant(kn);

/*
- * kernfs_drain() drops kernfs_rwsem temporarily and @pos's
+ * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's
* base ref could have been put by someone else by the time
* the function returns. Make sure it doesn't go away
* underneath us.
*/
kernfs_get(pos);

- /*
- * Drain iff @kn was activated. This avoids draining and
- * its lockdep annotations for nodes which have never been
- * activated and allows embedding kernfs_remove() in create
- * error paths without worrying about draining.
- */
- if (kn->flags & KERNFS_ACTIVATED)
- kernfs_drain(pos);
- else
- WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+ kernfs_drain(pos);

/*
* kernfs_unlink_sibling() succeeds once per node. Use it
--
2.37.2

2022-08-28 05:22:55

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 1/9] kernfs: Simply by replacing kernfs_deref_open_node() with of_on()

kernfs_node->attr.open is an RCU pointer to kernfs_open_node. However, RCU
dereference is currently only used in kernfs_notify(). Everywhere else,
either we're holding the lock which protects it or know that the
kernfs_open_node is pinned becaused we have a pointer to a kernfs_open_file
which is hanging off of it.

kernfs_deref_open_node() is used for the latter case - accessing
kernfs_open_node from kernfs_open_file. The lifetime and visibility rules
are simple and clear here. To someone who can access a kernfs_open_file, its
kernfs_open_node is pinned and visible through of->kn->attr.open.

Replace kernfs_deref_open_node() which simpler of_on(). The former takes
both @kn and @of and RCU deref @kn->attr.open while sanity checking with
@of. The latter takes @of and uses protected deref on of->kn->attr.open.

As the return value can't be NULL, remove the error handling in the callers
too.

This shouldn't cause any functional changes.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Imran Khan <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
---
fs/kernfs/file.c | 56 +++++++++++-------------------------------------
1 file changed, 13 insertions(+), 43 deletions(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b3ec34386b43..32b16fe00a9e 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -57,31 +57,17 @@ static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
}

/**
- * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
- *
- * @of: associated kernfs_open_file instance.
- * @kn: target kernfs_node.
- *
- * Fetch and return ->attr.open of @kn if @of->list is non empty.
- * If @of->list is not empty we can safely assume that @of is on
- * @kn->attr.open->files list and this guarantees that @kn->attr.open
- * will not vanish i.e. dereferencing outside RCU read-side critical
- * section is safe here.
- *
- * The caller needs to make sure that @of->list is not empty.
+ * of_on - Return the kernfs_open_node of the specified kernfs_open_file
+ * @of: taret kernfs_open_file
*/
-static struct kernfs_open_node *
-kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
+static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
{
- struct kernfs_open_node *on;
-
- on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list));
-
- return on;
+ return rcu_dereference_protected(of->kn->attr.open,
+ !list_empty(&of->list));
}

/**
- * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn
+ * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
*
* @kn: target kernfs_node.
*
@@ -96,7 +82,7 @@ kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
* The caller needs to make sure that kernfs_open_file_mutex is held.
*/
static struct kernfs_open_node *
-kernfs_deref_open_node_protected(struct kernfs_node *kn)
+kernfs_deref_open_node_locked(struct kernfs_node *kn)
{
return rcu_dereference_protected(kn->attr.open,
lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
@@ -207,12 +193,8 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
static int kernfs_seq_show(struct seq_file *sf, void *v)
{
struct kernfs_open_file *of = sf->private;
- struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn);
-
- if (!on)
- return -EINVAL;

- of->event = atomic_read(&on->event);
+ of->event = atomic_read(&of_on(of)->event);

return of->kn->attr.ops->seq_show(sf, v);
}
@@ -235,7 +217,6 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
const struct kernfs_ops *ops;
- struct kernfs_open_node *on;
char *buf;

buf = of->prealloc_buf;
@@ -257,14 +238,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_free;
}

- on = kernfs_deref_open_node(of, of->kn);
- if (!on) {
- len = -EINVAL;
- mutex_unlock(&of->mutex);
- goto out_free;
- }
-
- of->event = atomic_read(&on->event);
+ of->event = atomic_read(&of_on(of)->event);

ops = kernfs_ops(of->kn);
if (ops->read)
@@ -584,7 +558,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
struct mutex *mutex = NULL;

mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);

if (on) {
list_add_tail(&of->list, &on->files);
@@ -629,7 +603,7 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,

mutex = kernfs_open_file_mutex_lock(kn);

- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
if (!on) {
mutex_unlock(mutex);
return;
@@ -839,7 +813,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
return;

mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
if (!on) {
mutex_unlock(mutex);
return;
@@ -874,11 +848,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
*/
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
{
- struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
- struct kernfs_open_node *on = kernfs_deref_open_node(of, kn);
-
- if (!on)
- return EPOLLERR;
+ struct kernfs_open_node *on = of_on(of);

poll_wait(of->file, &on->poll, wait);

--
2.37.2

2022-08-28 05:30:07

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 7/9] kernfs: Factor out kernfs_activate_one()

Factor out kernfs_activate_one() from kernfs_activate() and reorder
operations so that KERNFS_ACTIVATED now simply indicates whether activation
was attempted on the node ignoring whether activation took place. As the
flag doesn't have a reader, the refactoring and reordering shouldn't cause
any behavior difference.

Signed-off-by: Tejun Heo <[email protected]>
---
fs/kernfs/dir.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index f8cbd05e9b68..c9323956c63c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1305,6 +1305,21 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
return pos->parent;
}

+static void kernfs_activate_one(struct kernfs_node *kn)
+{
+ lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
+
+ kn->flags |= KERNFS_ACTIVATED;
+
+ if (kernfs_active(kn) || (kn->flags & KERNFS_REMOVING))
+ return;
+
+ WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
+}
+
/**
* kernfs_activate - activate a node which started deactivated
* @kn: kernfs_node whose subtree is to be activated
@@ -1326,16 +1341,8 @@ void kernfs_activate(struct kernfs_node *kn)
down_write(&root->kernfs_rwsem);

pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn))) {
- if (kernfs_active(pos) || (pos->flags & KERNFS_REMOVING))
- continue;
-
- WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
- WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
-
- atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
- pos->flags |= KERNFS_ACTIVATED;
- }
+ while ((pos = kernfs_next_descendant_post(pos, kn)))
+ kernfs_activate_one(pos);

up_write(&root->kernfs_rwsem);
}
--
2.37.2

2022-08-28 05:36:46

by Tejun Heo

[permalink] [raw]
Subject: [PATCH 8/9] kernfs: Implement kernfs_show()

Currently, kernfs nodes can be created hidden and activated later by calling
kernfs_activate() to allow creation of multiple nodes to succeed or fail as
a unit. This is an one-way one-time-only transition. This patch introduces
kernfs_show() which can toggle visibility dynamically.

As the currently proposed use - toggling the cgroup pressure files - only
requires operating on leaf nodes, for the sake of simplicity, restrict it as
such for now.

Hiding uses the same mechanism as deactivation and likewise guarantees that
there are no in-flight operations on completion. KERNFS_ACTIVATED and
KERNFS_HIDDEN are used to manage the interactions between activations and
show/hide operations. A node is visible iff both activated & !hidden.

Signed-off-by: Tejun Heo <[email protected]>
Cc: Chengming Zhou <[email protected]>
Cc: Johannes Weiner <[email protected]>
---
fs/kernfs/dir.c | 37 ++++++++++++++++++++++++++++++++++++-
include/linux/kernfs.h | 2 ++
2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index c9323956c63c..7fb5a72cc96d 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1311,7 +1311,7 @@ static void kernfs_activate_one(struct kernfs_node *kn)

kn->flags |= KERNFS_ACTIVATED;

- if (kernfs_active(kn) || (kn->flags & KERNFS_REMOVING))
+ if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING)))
return;

WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
@@ -1347,6 +1347,41 @@ void kernfs_activate(struct kernfs_node *kn)
up_write(&root->kernfs_rwsem);
}

+/**
+ * kernfs_show - show or hide a node
+ * @kn: kernfs_node to show or hide
+ * @show: whether to show or hide
+ *
+ * If @show is %false, @kn is marked hidden and deactivated. A hidden node is
+ * ignored in future activaitons. If %true, the mark is removed and activation
+ * state is restored. This function won't implicitly activate a new node in a
+ * %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet.
+ *
+ * To avoid recursion complexities, directories aren't supported for now.
+ */
+void kernfs_show(struct kernfs_node *kn, bool show)
+{
+ struct kernfs_root *root = kernfs_root(kn);
+
+ if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR))
+ return;
+
+ down_write(&root->kernfs_rwsem);
+
+ if (show) {
+ kn->flags &= ~KERNFS_HIDDEN;
+ if (kn->flags & KERNFS_ACTIVATED)
+ kernfs_activate_one(kn);
+ } else {
+ kn->flags |= KERNFS_HIDDEN;
+ if (kernfs_active(kn))
+ atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
+ kernfs_drain(kn);
+ }
+
+ up_write(&root->kernfs_rwsem);
+}
+
static void __kernfs_remove(struct kernfs_node *kn)
{
struct kernfs_node *pos;
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b77d257c1f7e..73f5c120def8 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -108,6 +108,7 @@ enum kernfs_node_flag {
KERNFS_HAS_SEQ_SHOW = 0x0040,
KERNFS_HAS_MMAP = 0x0080,
KERNFS_LOCKDEP = 0x0100,
+ KERNFS_HIDDEN = 0x0200,
KERNFS_SUICIDAL = 0x0400,
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
@@ -430,6 +431,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
const char *name,
struct kernfs_node *target);
void kernfs_activate(struct kernfs_node *kn);
+void kernfs_show(struct kernfs_node *kn, bool show);
void kernfs_remove(struct kernfs_node *kn);
void kernfs_break_active_protection(struct kernfs_node *kn);
void kernfs_unbreak_active_protection(struct kernfs_node *kn);
--
2.37.2

2022-08-28 08:40:18

by Chengming Zhou

[permalink] [raw]
Subject: Re: [PATCHSET v2 for-6.1] kernfs, cgroup: implement kernfs_show() and cgroup_file_show()

On 2022/8/28 13:04, Tejun Heo wrote:
> Hello,
>
> Greg: If this looks good to you, can you apply 0001-0008 to your tree? I'll
> likely have more cgroup updates on top, so I think it'd be better if I pull
> your tree and then handle the cgroup part in cgroup tree.
>
> Currently, deactivated kernfs nodes are used for two purposes - during
> removal to kill and drain nodes and during creation to make multiple
> kernfs_nodes creations to succeed or fail as a group.
>
> This patchset implements kernfs_show() which can dynamically show and hide
> kernfs_nodes using the [de]activation mechanisms, and, on top, implements
> cgroup_file_show() which allows toggling cgroup file visiblity.
>
> This is for the following pending patchset to allow disabling PSI on
> per-cgroup basis:
>
> https://lore.kernel.org/all/[email protected]/t/#u
>
> which requires hiding the corresponding cgroup interface files while
> disabled.
>
> This patchset contains the following nine patches.
>
> 0001-kernfs-Simply-by-replacing-kernfs_deref_open_node-wi.patch
> 0002-kernfs-Drop-unnecessary-mutex-local-variable-initial.patch
> 0003-kernfs-Refactor-kernfs_get_open_node.patch
> 0004-kernfs-Skip-kernfs_drain_open_files-more-aggressivel.patch
> 0005-kernfs-Improve-kernfs_drain-and-always-call-on-remov.patch
> 0006-kernfs-Add-KERNFS_REMOVING-flags.patch
> 0007-kernfs-Factor-out-kernfs_activate_one.patch
> 0008-kernfs-Implement-kernfs_show.patch
> 0009-cgroup-Implement-cgroup_file_show.patch
>
> 0001-0003 are misc prep patches. 0004-0008 implement kernsf_deactivate().
> 0009 implements cgroup_file_show() on top. The patches are also available in
> the following git branch:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git kernfs-show
>
> The main difference from the previous version[1] is that while show/hide
> still use the same mechanism as [de]activation, their states are tracked
> separately so that e.g. an unrelated kernfs_activate() higher up in the tree
> doesn't interfere with hidden nodes in the subtree.

Hello,

For this series:
Reviewed-by: Chengming Zhou <[email protected]>
Tested-by: Chengming Zhou <[email protected]>

Thanks!

>
> diffstat follows. Thanks.
>
> fs/kernfs/dir.c | 102 +++++++++++++++++++++++++++++++----------------
> fs/kernfs/file.c | 151 +++++++++++++++++++++++++++++++++-------------------------------------
> fs/kernfs/kernfs-internal.h | 1
> include/linux/cgroup.h | 1
> include/linux/kernfs.h | 3 +
> kernel/cgroup/cgroup.c | 20 +++++++++
> 6 files changed, 166 insertions(+), 112 deletions(-)
>
> --
> tejun
>
> [1] http://lkml.kernel.org/r/[email protected]
>

Subject: [tip: sched/psi] kernfs: Skip kernfs_drain_open_files() more aggressively

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: bdb2fd7fc56e197a63c0b0e7e07d25d5e20e7c72
Gitweb: https://git.kernel.org/tip/bdb2fd7fc56e197a63c0b0e7e07d25d5e20e7c72
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:35 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:44 +02:00

kernfs: Skip kernfs_drain_open_files() more aggressively

Track the number of mmapped files and files that need to be released and
skip kernfs_drain_open_file() if both are zero, which are the precise
conditions which require draining open_files. The early exit test is
factored into kernfs_should_drain_open_files() which is now tested by
kernfs_drain_open_files()'s caller - kernfs_drain().

This isn't a meaningful optimization on its own but will enable future
stand-alone kernfs_deactivate() implementation.

v2: Chengming noticed that on->nr_to_release was leaking after ->open()
failure. Fix it by telling kernfs_unlink_open_file() that it's called
from the ->open() fail path and should dec the counter. Use kzalloc() to
allocate kernfs_open_node so that the tracking fields are correctly
initialized.

Cc: Chengming Zhou <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
fs/kernfs/dir.c | 3 +-
fs/kernfs/file.c | 65 ++++++++++++++++++++++++------------
fs/kernfs/kernfs-internal.h | 1 +-
3 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 1cc88ba..8ae44db 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -489,7 +489,8 @@ static void kernfs_drain(struct kernfs_node *kn)
rwsem_release(&kn->dep_map, _RET_IP_);
}

- kernfs_drain_open_files(kn);
+ if (kernfs_should_drain_open_files(kn))
+ kernfs_drain_open_files(kn);

down_write(&root->kernfs_rwsem);
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 7060a2a..9ab6c92 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -23,6 +23,8 @@ struct kernfs_open_node {
atomic_t event;
wait_queue_head_t poll;
struct list_head files; /* goes through kernfs_open_file.list */
+ unsigned int nr_mmapped;
+ unsigned int nr_to_release;
};

/*
@@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)

rc = 0;
of->mmapped = true;
+ of_on(of)->nr_mmapped++;
of->vm_ops = vma->vm_ops;
vma->vm_ops = &kernfs_vm_ops;
out_put:
@@ -562,7 +565,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,

if (!on) {
/* not there, initialize a new one */
- on = kmalloc(sizeof(*on), GFP_KERNEL);
+ on = kzalloc(sizeof(*on), GFP_KERNEL);
if (!on) {
mutex_unlock(mutex);
return -ENOMEM;
@@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
}

list_add_tail(&of->list, &on->files);
+ if (kn->flags & KERNFS_HAS_RELEASE)
+ on->nr_to_release++;

mutex_unlock(mutex);
return 0;
@@ -584,6 +589,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
*
* @kn: target kernfs_node
* @of: associated kernfs_open_file
+ * @open_failed: ->open() failed, cancel ->release()
*
* Unlink @of from list of @kn's associated open files. If list of
* associated open files becomes empty, disassociate and free
@@ -593,7 +599,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
* None.
*/
static void kernfs_unlink_open_file(struct kernfs_node *kn,
- struct kernfs_open_file *of)
+ struct kernfs_open_file *of,
+ bool open_failed)
{
struct kernfs_open_node *on;
struct mutex *mutex;
@@ -606,8 +613,16 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
return;
}

- if (of)
+ if (of) {
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ WARN_ON_ONCE(of->released == open_failed);
+ if (open_failed)
+ on->nr_to_release--;
+ }
+ if (of->mmapped)
+ on->nr_mmapped--;
list_del(&of->list);
+ }

if (list_empty(&on->files)) {
rcu_assign_pointer(kn->attr.open, NULL);
@@ -734,7 +749,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
return 0;

err_put_node:
- kernfs_unlink_open_file(kn, of);
+ kernfs_unlink_open_file(kn, of, true);
err_seq_release:
seq_release(inode, file);
err_free:
@@ -766,6 +781,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
*/
kn->attr.ops->release(of);
of->released = true;
+ of_on(of)->nr_to_release--;
}
}

@@ -782,7 +798,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
mutex_unlock(mutex);
}

- kernfs_unlink_open_file(kn, of);
+ kernfs_unlink_open_file(kn, of, false);
seq_release(inode, filp);
kfree(of->prealloc_buf);
kfree(of);
@@ -790,25 +806,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
return 0;
}

-void kernfs_drain_open_files(struct kernfs_node *kn)
+bool kernfs_should_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
- struct kernfs_open_file *of;
- struct mutex *mutex;
-
- if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
- return;
+ bool ret;

/*
- * lockless opportunistic check is safe below because no one is adding to
- * ->attr.open at this point of time. This check allows early bail out
- * if ->attr.open is already NULL. kernfs_unlink_open_file makes
- * ->attr.open NULL only while holding kernfs_open_file_mutex so below
- * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
- * ->attr.open became NULL while waiting for the mutex.
+ * @kn being deactivated guarantees that @kn->attr.open can't change
+ * beneath us making the lockless test below safe.
*/
- if (!rcu_access_pointer(kn->attr.open))
- return;
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ rcu_read_lock();
+ on = rcu_dereference(kn->attr.open);
+ ret = on && (on->nr_mmapped || on->nr_to_release);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+void kernfs_drain_open_files(struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+ struct kernfs_open_file *of;
+ struct mutex *mutex;

mutex = kernfs_open_file_mutex_lock(kn);
on = kernfs_deref_open_node_locked(kn);
@@ -820,13 +841,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);

- if (kn->flags & KERNFS_HAS_MMAP)
+ if (of->mmapped) {
unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ of->mmapped = false;
+ on->nr_mmapped--;
+ }

if (kn->flags & KERNFS_HAS_RELEASE)
kernfs_release_file(kn, of);
}

+ WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
mutex_unlock(mutex);
}

diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3ae214d..fc5821e 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
*/
extern const struct file_operations kernfs_file_fops;

+bool kernfs_should_drain_open_files(struct kernfs_node *kn);
void kernfs_drain_open_files(struct kernfs_node *kn);

/*

Subject: [tip: sched/psi] kernfs: Improve kernfs_drain() and always call on removal

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: 2d7f9f8c1815707e9ddb454648a523efc67a04d3
Gitweb: https://git.kernel.org/tip/2d7f9f8c1815707e9ddb454648a523efc67a04d3
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:36 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:44 +02:00

kernfs: Improve kernfs_drain() and always call on removal

__kernfs_remove() was skipping draining based on KERNFS_ACTIVATED - whether
the node has ever been activated since creation. Instead, update it to
always call kernfs_drain() which now drains or skips based on the precise
drain conditions. This ensures that the nodes will be deactivated and
drained regardless of their states.

This doesn't make meaningful difference now but will enable deactivating and
draining nodes dynamically by making removals safe when racing those
operations.

While at it, drop / update comments.

v2: Fix the inverted test on kernfs_should_drain_open_files() noted by
Chengming. This was fixed by the next unrelated patch in the previous
posting.

Cc: Chengming Zhou <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
fs/kernfs/dir.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 8ae44db..b3d2018 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -472,6 +472,16 @@ static void kernfs_drain(struct kernfs_node *kn)
lockdep_assert_held_write(&root->kernfs_rwsem);
WARN_ON_ONCE(kernfs_active(kn));

+ /*
+ * Skip draining if already fully drained. This avoids draining and its
+ * lockdep annotations for nodes which have never been activated
+ * allowing embedding kernfs_remove() in create error paths without
+ * worrying about draining.
+ */
+ if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS &&
+ !kernfs_should_drain_open_files(kn))
+ return;
+
up_write(&root->kernfs_rwsem);

if (kernfs_lockdep(kn)) {
@@ -480,7 +490,6 @@ static void kernfs_drain(struct kernfs_node *kn)
lock_contended(&kn->dep_map, _RET_IP_);
}

- /* but everyone should wait for draining */
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);

@@ -1370,23 +1379,14 @@ static void __kernfs_remove(struct kernfs_node *kn)
pos = kernfs_leftmost_descendant(kn);

/*
- * kernfs_drain() drops kernfs_rwsem temporarily and @pos's
+ * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's
* base ref could have been put by someone else by the time
* the function returns. Make sure it doesn't go away
* underneath us.
*/
kernfs_get(pos);

- /*
- * Drain iff @kn was activated. This avoids draining and
- * its lockdep annotations for nodes which have never been
- * activated and allows embedding kernfs_remove() in create
- * error paths without worrying about draining.
- */
- if (kn->flags & KERNFS_ACTIVATED)
- kernfs_drain(pos);
- else
- WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+ kernfs_drain(pos);

/*
* kernfs_unlink_sibling() succeeds once per node. Use it

Subject: [tip: sched/psi] kernfs: Add KERNFS_REMOVING flags

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: c25491747b21536bd56dccb82a109754bbc8d52c
Gitweb: https://git.kernel.org/tip/c25491747b21536bd56dccb82a109754bbc8d52c
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:37 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:44 +02:00

kernfs: Add KERNFS_REMOVING flags

KERNFS_ACTIVATED tracks whether a given node has ever been activated. As a
node was only deactivated on removal, this was used for

1. Drain optimization (removed by the previous patch).
2. To hide !activated nodes
3. To avoid double activations
4. Reject adding children to a node being removed
5. Skip activaing a node which is being removed.

We want to decouple deactivation from removal so that nodes can be
deactivated and hidden dynamically, which makes KERNFS_ACTIVATED useless for
all of the above purposes.

#1 is already gone. #2 and #3 can instead test whether the node is currently
active. A new flag KERNFS_REMOVING is added to explicitly mark nodes which
are being removed for #4 and #5.

While this leaves KERNFS_ACTIVATED with no users, leave it be as it will be
used in a following patch.

Cc: Chengming Zhou <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
fs/kernfs/dir.c | 21 +++++++--------------
include/linux/kernfs.h | 1 +
2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index b3d2018..f8cbd05 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -705,13 +705,7 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
goto err_unlock;
}

- /*
- * ACTIVATED is protected with kernfs_mutex but it was clear when
- * @kn was added to idr and we just wanna see it set. No need to
- * grab kernfs_mutex.
- */
- if (unlikely(!(kn->flags & KERNFS_ACTIVATED) ||
- !atomic_inc_not_zero(&kn->count)))
+ if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
goto err_unlock;

spin_unlock(&kernfs_idr_lock);
@@ -753,10 +747,7 @@ int kernfs_add_one(struct kernfs_node *kn)
goto out_unlock;

ret = -ENOENT;
- if (parent->flags & KERNFS_EMPTY_DIR)
- goto out_unlock;
-
- if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+ if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
goto out_unlock;

kn->hash = kernfs_name_hash(kn->name, kn->ns);
@@ -1336,7 +1327,7 @@ void kernfs_activate(struct kernfs_node *kn)

pos = NULL;
while ((pos = kernfs_next_descendant_post(pos, kn))) {
- if (pos->flags & KERNFS_ACTIVATED)
+ if (kernfs_active(pos) || (pos->flags & KERNFS_REMOVING))
continue;

WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
@@ -1368,11 +1359,13 @@ static void __kernfs_remove(struct kernfs_node *kn)

pr_debug("kernfs %s: removing\n", kn->name);

- /* prevent any new usage under @kn by deactivating all nodes */
+ /* prevent new usage by marking all nodes removing and deactivating */
pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn)))
+ while ((pos = kernfs_next_descendant_post(pos, kn))) {
+ pos->flags |= KERNFS_REMOVING;
if (kernfs_active(pos))
atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
+ }

/* deactivate and unlink the subtree node-by-node */
do {
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 367044d..b77d257 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -112,6 +112,7 @@ enum kernfs_node_flag {
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
KERNFS_HAS_RELEASE = 0x2000,
+ KERNFS_REMOVING = 0x4000,
};

/* @flags for kernfs_create_root() */

Subject: [tip: sched/psi] kernfs: Simply by replacing kernfs_deref_open_node() with of_on()

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: 3db48aca879db475844182a24d1760ee3d230627
Gitweb: https://git.kernel.org/tip/3db48aca879db475844182a24d1760ee3d230627
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:32 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:43 +02:00

kernfs: Simply by replacing kernfs_deref_open_node() with of_on()

kernfs_node->attr.open is an RCU pointer to kernfs_open_node. However, RCU
dereference is currently only used in kernfs_notify(). Everywhere else,
either we're holding the lock which protects it or know that the
kernfs_open_node is pinned becaused we have a pointer to a kernfs_open_file
which is hanging off of it.

kernfs_deref_open_node() is used for the latter case - accessing
kernfs_open_node from kernfs_open_file. The lifetime and visibility rules
are simple and clear here. To someone who can access a kernfs_open_file, its
kernfs_open_node is pinned and visible through of->kn->attr.open.

Replace kernfs_deref_open_node() which simpler of_on(). The former takes
both @kn and @of and RCU deref @kn->attr.open while sanity checking with
@of. The latter takes @of and uses protected deref on of->kn->attr.open.

As the return value can't be NULL, remove the error handling in the callers
too.

This shouldn't cause any functional changes.

Cc: Imran Khan <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
fs/kernfs/file.c | 56 ++++++++++-------------------------------------
1 file changed, 13 insertions(+), 43 deletions(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b3ec343..32b16fe 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -57,31 +57,17 @@ static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
}

/**
- * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
- *
- * @of: associated kernfs_open_file instance.
- * @kn: target kernfs_node.
- *
- * Fetch and return ->attr.open of @kn if @of->list is non empty.
- * If @of->list is not empty we can safely assume that @of is on
- * @kn->attr.open->files list and this guarantees that @kn->attr.open
- * will not vanish i.e. dereferencing outside RCU read-side critical
- * section is safe here.
- *
- * The caller needs to make sure that @of->list is not empty.
+ * of_on - Return the kernfs_open_node of the specified kernfs_open_file
+ * @of: taret kernfs_open_file
*/
-static struct kernfs_open_node *
-kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
+static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
{
- struct kernfs_open_node *on;
-
- on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list));
-
- return on;
+ return rcu_dereference_protected(of->kn->attr.open,
+ !list_empty(&of->list));
}

/**
- * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn
+ * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
*
* @kn: target kernfs_node.
*
@@ -96,7 +82,7 @@ kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
* The caller needs to make sure that kernfs_open_file_mutex is held.
*/
static struct kernfs_open_node *
-kernfs_deref_open_node_protected(struct kernfs_node *kn)
+kernfs_deref_open_node_locked(struct kernfs_node *kn)
{
return rcu_dereference_protected(kn->attr.open,
lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
@@ -207,12 +193,8 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
static int kernfs_seq_show(struct seq_file *sf, void *v)
{
struct kernfs_open_file *of = sf->private;
- struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn);
-
- if (!on)
- return -EINVAL;

- of->event = atomic_read(&on->event);
+ of->event = atomic_read(&of_on(of)->event);

return of->kn->attr.ops->seq_show(sf, v);
}
@@ -235,7 +217,6 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
const struct kernfs_ops *ops;
- struct kernfs_open_node *on;
char *buf;

buf = of->prealloc_buf;
@@ -257,14 +238,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_free;
}

- on = kernfs_deref_open_node(of, of->kn);
- if (!on) {
- len = -EINVAL;
- mutex_unlock(&of->mutex);
- goto out_free;
- }
-
- of->event = atomic_read(&on->event);
+ of->event = atomic_read(&of_on(of)->event);

ops = kernfs_ops(of->kn);
if (ops->read)
@@ -584,7 +558,7 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
struct mutex *mutex = NULL;

mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);

if (on) {
list_add_tail(&of->list, &on->files);
@@ -629,7 +603,7 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,

mutex = kernfs_open_file_mutex_lock(kn);

- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
if (!on) {
mutex_unlock(mutex);
return;
@@ -839,7 +813,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
return;

mutex = kernfs_open_file_mutex_lock(kn);
- on = kernfs_deref_open_node_protected(kn);
+ on = kernfs_deref_open_node_locked(kn);
if (!on) {
mutex_unlock(mutex);
return;
@@ -874,11 +848,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
*/
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
{
- struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
- struct kernfs_open_node *on = kernfs_deref_open_node(of, kn);
-
- if (!on)
- return EPOLLERR;
+ struct kernfs_open_node *on = of_on(of);

poll_wait(of->file, &on->poll, wait);

Subject: [tip: sched/psi] kernfs: Implement kernfs_show()

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: 783bd07d095b722108725af11113795ee046ed0e
Gitweb: https://git.kernel.org/tip/783bd07d095b722108725af11113795ee046ed0e
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:39 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:44 +02:00

kernfs: Implement kernfs_show()

Currently, kernfs nodes can be created hidden and activated later by calling
kernfs_activate() to allow creation of multiple nodes to succeed or fail as
a unit. This is an one-way one-time-only transition. This patch introduces
kernfs_show() which can toggle visibility dynamically.

As the currently proposed use - toggling the cgroup pressure files - only
requires operating on leaf nodes, for the sake of simplicity, restrict it as
such for now.

Hiding uses the same mechanism as deactivation and likewise guarantees that
there are no in-flight operations on completion. KERNFS_ACTIVATED and
KERNFS_HIDDEN are used to manage the interactions between activations and
show/hide operations. A node is visible iff both activated & !hidden.

Cc: Chengming Zhou <[email protected]>
Cc: Johannes Weiner <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
fs/kernfs/dir.c | 37 ++++++++++++++++++++++++++++++++++++-
include/linux/kernfs.h | 2 ++
2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index c932395..7fb5a72 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1311,7 +1311,7 @@ static void kernfs_activate_one(struct kernfs_node *kn)

kn->flags |= KERNFS_ACTIVATED;

- if (kernfs_active(kn) || (kn->flags & KERNFS_REMOVING))
+ if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING)))
return;

WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
@@ -1347,6 +1347,41 @@ void kernfs_activate(struct kernfs_node *kn)
up_write(&root->kernfs_rwsem);
}

+/**
+ * kernfs_show - show or hide a node
+ * @kn: kernfs_node to show or hide
+ * @show: whether to show or hide
+ *
+ * If @show is %false, @kn is marked hidden and deactivated. A hidden node is
+ * ignored in future activaitons. If %true, the mark is removed and activation
+ * state is restored. This function won't implicitly activate a new node in a
+ * %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet.
+ *
+ * To avoid recursion complexities, directories aren't supported for now.
+ */
+void kernfs_show(struct kernfs_node *kn, bool show)
+{
+ struct kernfs_root *root = kernfs_root(kn);
+
+ if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR))
+ return;
+
+ down_write(&root->kernfs_rwsem);
+
+ if (show) {
+ kn->flags &= ~KERNFS_HIDDEN;
+ if (kn->flags & KERNFS_ACTIVATED)
+ kernfs_activate_one(kn);
+ } else {
+ kn->flags |= KERNFS_HIDDEN;
+ if (kernfs_active(kn))
+ atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
+ kernfs_drain(kn);
+ }
+
+ up_write(&root->kernfs_rwsem);
+}
+
static void __kernfs_remove(struct kernfs_node *kn)
{
struct kernfs_node *pos;
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b77d257..73f5c12 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -108,6 +108,7 @@ enum kernfs_node_flag {
KERNFS_HAS_SEQ_SHOW = 0x0040,
KERNFS_HAS_MMAP = 0x0080,
KERNFS_LOCKDEP = 0x0100,
+ KERNFS_HIDDEN = 0x0200,
KERNFS_SUICIDAL = 0x0400,
KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
@@ -430,6 +431,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
const char *name,
struct kernfs_node *target);
void kernfs_activate(struct kernfs_node *kn);
+void kernfs_show(struct kernfs_node *kn, bool show);
void kernfs_remove(struct kernfs_node *kn);
void kernfs_break_active_protection(struct kernfs_node *kn);
void kernfs_unbreak_active_protection(struct kernfs_node *kn);

Subject: [tip: sched/psi] cgroup: Implement cgroup_file_show()

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: e2691f6b44ed2135bfd005ad5fbabac4f433a7a1
Gitweb: https://git.kernel.org/tip/e2691f6b44ed2135bfd005ad5fbabac4f433a7a1
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:40 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:44 +02:00

cgroup: Implement cgroup_file_show()

Add cgroup_file_show() which allows toggling visibility of a cgroup file
using the new kernfs_show(). This will be used to hide psi interface files
on cgroups where it's disabled.

Cc: Chengming Zhou <[email protected]>
Cc: Johannes Weiner <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
include/linux/cgroup.h | 1 +
kernel/cgroup/cgroup.c | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed53bfe..f61dd13 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -114,6 +114,7 @@ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile);
+void cgroup_file_show(struct cgroup_file *cfile, bool show);

int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index ffaccd6..217469a 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4340,6 +4340,26 @@ void cgroup_file_notify(struct cgroup_file *cfile)
}

/**
+ * cgroup_file_show - show or hide a hidden cgroup file
+ * @cfile: target cgroup_file obtained by setting cftype->file_offset
+ * @show: whether to show or hide
+ */
+void cgroup_file_show(struct cgroup_file *cfile, bool show)
+{
+ struct kernfs_node *kn;
+
+ spin_lock_irq(&cgroup_file_kn_lock);
+ kn = cfile->kn;
+ kernfs_get(kn);
+ spin_unlock_irq(&cgroup_file_kn_lock);
+
+ if (kn)
+ kernfs_show(kn, show);
+
+ kernfs_put(kn);
+}
+
+/**
* css_next_child - find the next child of a given css
* @pos: the current position (%NULL to initiate traversal)
* @parent: css whose children to walk

Subject: [tip: sched/psi] kernfs: Factor out kernfs_activate_one()

The following commit has been merged into the sched/psi branch of tip:

Commit-ID: f8eb145eb946d543e8c8df3d2db39fcd5c80dacb
Gitweb: https://git.kernel.org/tip/f8eb145eb946d543e8c8df3d2db39fcd5c80dacb
Author: Tejun Heo <[email protected]>
AuthorDate: Sat, 27 Aug 2022 19:04:38 -10:00
Committer: Greg Kroah-Hartman <[email protected]>
CommitterDate: Thu, 01 Sep 2022 18:08:44 +02:00

kernfs: Factor out kernfs_activate_one()

Factor out kernfs_activate_one() from kernfs_activate() and reorder
operations so that KERNFS_ACTIVATED now simply indicates whether activation
was attempted on the node ignoring whether activation took place. As the
flag doesn't have a reader, the refactoring and reordering shouldn't cause
any behavior difference.

Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
Signed-off-by: Tejun Heo <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
fs/kernfs/dir.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index f8cbd05..c932395 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1305,6 +1305,21 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
return pos->parent;
}

+static void kernfs_activate_one(struct kernfs_node *kn)
+{
+ lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
+
+ kn->flags |= KERNFS_ACTIVATED;
+
+ if (kernfs_active(kn) || (kn->flags & KERNFS_REMOVING))
+ return;
+
+ WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
+}
+
/**
* kernfs_activate - activate a node which started deactivated
* @kn: kernfs_node whose subtree is to be activated
@@ -1326,16 +1341,8 @@ void kernfs_activate(struct kernfs_node *kn)
down_write(&root->kernfs_rwsem);

pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn))) {
- if (kernfs_active(pos) || (pos->flags & KERNFS_REMOVING))
- continue;
-
- WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
- WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
-
- atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
- pos->flags |= KERNFS_ACTIVATED;
- }
+ while ((pos = kernfs_next_descendant_post(pos, kn)))
+ kernfs_activate_one(pos);

up_write(&root->kernfs_rwsem);
}