2023-03-09 11:15:26

by Imran Khan

[permalink] [raw]
Subject: [PATCH v2 0/3] kernfs: Introduce separate rwsem to protect inode

Changes since v1:
- Remove kernfs_rwsem from kernfs_notify_workfn
- Include Matthew's "Reviewed-by" tag for Patch-3
-------------------------------------------------------------------
Original cover letter

This change set is consolidating the changes discussed and/or mentioned
in [1] and [2]. I have not received any feedback about any of the
patches included in this change set, so I am rebasing them on current
linux-next tip and bringing them all in one place.

As mentioned in [1], since changing per-fs kernfs_rwsem into a hashed
rwsem is not working for all scenarios, PATCH-1 here tries to address
the same issue with the help of another newly introduced per-fs rwsem.
PATCH-2 and PATCH-3 are basically resend of PATCH-1 and PATCH-2
respectively in [2].

It would be really helpful if I could get some feedback about this
changeset so that we can reduce the kernfs_rwsem contention and make
sysfs access more scalable for large-scale systems.

The patches in this change set are as follows:

PATCH-1: kernfs: Introduce separate rwsem to protect inode attributes.

PATCH-2: kernfs: Use a per-fs rwsem to protect per-fs list of
kernfs_super_info.

PATCH-3: kernfs: change kernfs_rename_lock into a read-write lock.

Imran Khan (3):
kernfs: Introduce separate rwsem to protect inode attributes.
Use a per-fs rwsem to protect per-fs list of kernfs_super_info.
kernfs: change kernfs_rename_lock into a read-write lock.

fs/kernfs/dir.c | 26 +++++++++++++++++---------
fs/kernfs/file.c | 2 ++
fs/kernfs/inode.c | 16 ++++++++--------
fs/kernfs/kernfs-internal.h | 2 ++
fs/kernfs/mount.c | 8 ++++----
5 files changed, 33 insertions(+), 21 deletions(-)


base-commit: 7f7a8831520f12a3cf894b0627641fad33971221

[1]:https://lore.kernel.org/all/[email protected]/
[2]:https://lore.kernel.org/all/[email protected]/
-------------------------------------------------------------------

Imran Khan (3):
kernfs: Introduce separate rwsem to protect inode attributes.
kernfs: Use a per-fs rwsem to protect per-fs list of
kernfs_super_info.
kernfs: change kernfs_rename_lock into a read-write lock.

fs/kernfs/dir.c | 26 +++++++++++++++++---------
fs/kernfs/file.c | 4 ++--
fs/kernfs/inode.c | 16 ++++++++--------
fs/kernfs/kernfs-internal.h | 2 ++
fs/kernfs/mount.c | 8 ++++----
5 files changed, 33 insertions(+), 23 deletions(-)

--
2.34.1



2023-03-09 11:15:29

by Imran Khan

[permalink] [raw]
Subject: [PATCH v2 2/3] kernfs: Use a per-fs rwsem to protect per-fs list of kernfs_super_info.

Right now per-fs kernfs_rwsem protects list of kernfs_super_info instances
for a kernfs_root. Since kernfs_rwsem is used to synchronize several other
operations across kernfs and since most of these operations don't impact
kernfs_super_info, we can use a separate per-fs rwsem to synchronize access
to list of kernfs_super_info.
This helps in reducing contention around kernfs_rwsem and also allows
operations that change/access list of kernfs_super_info to proceed without
contending for kernfs_rwsem.

Signed-off-by: Imran Khan <[email protected]>
---
fs/kernfs/dir.c | 1 +
fs/kernfs/file.c | 4 ++--
fs/kernfs/kernfs-internal.h | 1 +
fs/kernfs/mount.c | 8 ++++----
4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 953b2717c60e6..2cdb8516e5287 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -944,6 +944,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
idr_init(&root->ino_idr);
init_rwsem(&root->kernfs_rwsem);
init_rwsem(&root->kernfs_iattr_rwsem);
+ init_rwsem(&root->kernfs_supers_rwsem);
INIT_LIST_HEAD(&root->supers);

/*
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e4a50e4ff0d23..40c4661f15b7c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -922,8 +922,8 @@ static void kernfs_notify_workfn(struct work_struct *work)

root = kernfs_root(kn);
/* kick fsnotify */
- down_write(&root->kernfs_rwsem);

+ down_read(&root->kernfs_supers_rwsem);
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
struct kernfs_node *parent;
struct inode *p_inode = NULL;
@@ -960,7 +960,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
iput(inode);
}

- up_write(&root->kernfs_rwsem);
+ up_read(&root->kernfs_supers_rwsem);
kernfs_put(kn);
goto repeat;
}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3297093c920de..a9b854cdfdb5f 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -48,6 +48,7 @@ struct kernfs_root {
wait_queue_head_t deactivate_waitq;
struct rw_semaphore kernfs_rwsem;
struct rw_semaphore kernfs_iattr_rwsem;
+ struct rw_semaphore kernfs_supers_rwsem;
};

/* +1 to avoid triggering overflow warning when negating it */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index e08e8d9998070..d49606accb07b 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -351,9 +351,9 @@ int kernfs_get_tree(struct fs_context *fc)
}
sb->s_flags |= SB_ACTIVE;

- down_write(&root->kernfs_rwsem);
+ down_write(&root->kernfs_supers_rwsem);
list_add(&info->node, &info->root->supers);
- up_write(&root->kernfs_rwsem);
+ up_write(&root->kernfs_supers_rwsem);
}

fc->root = dget(sb->s_root);
@@ -380,9 +380,9 @@ void kernfs_kill_sb(struct super_block *sb)
struct kernfs_super_info *info = kernfs_info(sb);
struct kernfs_root *root = info->root;

- down_write(&root->kernfs_rwsem);
+ down_write(&root->kernfs_supers_rwsem);
list_del(&info->node);
- up_write(&root->kernfs_rwsem);
+ up_write(&root->kernfs_supers_rwsem);

/*
* Remove the superblock from fs_supers/s_instances
--
2.34.1


2023-03-09 11:15:33

by Imran Khan

[permalink] [raw]
Subject: [PATCH v2 3/3] kernfs: change kernfs_rename_lock into a read-write lock.

kernfs_rename_lock protects a node's ->parent and thus kernfs topology.
Thus it can be used in cases that rely on a stable kernfs topology.
Change it to a read-write lock for better scalability.

Suggested by: Al Viro <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
Signed-off-by: Imran Khan <[email protected]>
---
fs/kernfs/dir.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 2cdb8516e5287..06e27b36216fe 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -17,7 +17,7 @@

#include "kernfs-internal.h"

-static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
+static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */
/*
* Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
* call pr_cont() while holding rename_lock. Because sometimes pr_cont()
@@ -196,9 +196,9 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
unsigned long flags;
int ret;

- spin_lock_irqsave(&kernfs_rename_lock, flags);
+ read_lock_irqsave(&kernfs_rename_lock, flags);
ret = kernfs_name_locked(kn, buf, buflen);
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ read_unlock_irqrestore(&kernfs_rename_lock, flags);
return ret;
}

@@ -224,9 +224,9 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
unsigned long flags;
int ret;

- spin_lock_irqsave(&kernfs_rename_lock, flags);
+ read_lock_irqsave(&kernfs_rename_lock, flags);
ret = kernfs_path_from_node_locked(to, from, buf, buflen);
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ read_unlock_irqrestore(&kernfs_rename_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(kernfs_path_from_node);
@@ -294,10 +294,10 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
struct kernfs_node *parent;
unsigned long flags;

- spin_lock_irqsave(&kernfs_rename_lock, flags);
+ read_lock_irqsave(&kernfs_rename_lock, flags);
parent = kn->parent;
kernfs_get(parent);
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ read_unlock_irqrestore(&kernfs_rename_lock, flags);

return parent;
}
@@ -1731,7 +1731,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
kernfs_get(new_parent);

/* rename_lock protects ->parent and ->name accessors */
- spin_lock_irq(&kernfs_rename_lock);
+ write_lock_irq(&kernfs_rename_lock);

old_parent = kn->parent;
kn->parent = new_parent;
@@ -1742,7 +1742,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
kn->name = new_name;
}

- spin_unlock_irq(&kernfs_rename_lock);
+ write_unlock_irq(&kernfs_rename_lock);

kn->hash = kernfs_name_hash(kn->name, kn->ns);
kernfs_link_sibling(kn);
--
2.34.1