2022-06-28 20:59:37

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH 0/5] gfs2: debugfs PID reporting improvements

Currently, all glock holders in the "glocks" dump file are reported as
being associated with the process that acquired them, even for holders
that are actually associated with the filesystem itself (like the
journal glock holder) or with cached inodes (like iopen and flock glock
holders). This is confusing when those holders outlive the processes
that have acquired them, and it trips up utilities that analyze lock
dependencies. For example, the following two glocks were acquired by
pid 10821 during the initial mount, which has since terminated:

G: s:EX n:9/0 f:qb t:EX d:EX/0 a:0 v:0 r:3 m:200 p:0
H: s:EX f:ecH e:0 p:10821 [(ended)] init_inodes+0x5c2/0xb10 [gfs2]
G: s:EX n:2/805f f:qob t:EX d:EX/0 a:0 v:0 r:4 m:200 p:1
H: s:EX f:H e:0 p:10821 [(ended)] gfs2_fill_super+0x92b/0xcc0 [gfs2]
I: n:6/32863 t:8 f:0x00 d:0x00000201 s:24 p:0

This patch queue tries to fix this problem in two ways:

* Glock holders which are not held by the process that acquired them
are marked as GL_NOPID. For those holders, the PID is reported as 0,
and the process name is reported as "(none)".

* With this change alone, we would have a much harder time detecting
locking cycles involving iopen or flock glocks: in both cases, a
process which has a file descriptor open depends on the iopen and
flock glock of the corresponding inode / file. To keep track of
these dependencies, we introduce a new "glockfd" dump file that
reports which file descriptors of which processes are holding which
glocks.

A utility that checks for locking problems using this additional
information is forthcoming, but hasn't been completed so far.


NEW EXPORTS

This patch queue requires iterating through all file descriptors of all
processes, which is made easier by exporting find_ge_pid() and
task_lookup_next_fd_rcu(); copying Eric W. Biederman and the
linux-kernel and linux-fsdevel lists to make sure that's okay.


Thanks,
Andreas

Andreas Gruenbacher (5):
gfs2: Add glockfd debugfs file
gfs2: Add flocks to glockfd debugfs file
gfs2: Add GL_NOPID flag for process-independent glock holders
gfs2: Mark flock glock holders as GL_NOPID
gfs2: Mark the remaining process-independent glock holders as GL_NOPID

fs/file.c | 1 +
fs/gfs2/file.c | 29 +++++-
fs/gfs2/glock.c | 211 +++++++++++++++++++++++++++++++++++++++++--
fs/gfs2/glock.h | 1 +
fs/gfs2/inode.c | 6 +-
fs/gfs2/ops_fstype.c | 14 +--
fs/gfs2/super.c | 3 +-
fs/gfs2/util.c | 6 +-
kernel/pid.c | 1 +
9 files changed, 247 insertions(+), 25 deletions(-)

--
2.35.1


2022-06-28 21:00:21

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH 1/5] gfs2: Add glockfd debugfs file

When a process has a gfs2 file open, the file is keeping a reference on the
underlying gfs2 inode, and the inode is keeping the inode's iopen glock held in
shared mode. In other words, the process depends on the iopen glock of each
open gfs2 file. Expose those dependencies in a new "glockfd" debugfs file.

The new debugfs file contains one line for each gfs2 file descriptor,
specifying the tgid, file descriptor number, and glock name, e.g.,

1601 6 5/816d

This list is compiled by iterating all tasks on the system using find_ge_pid(),
and all file descriptors of each task using task_lookup_next_fd_rcu(). To make
that work from gfs2, export those two functions.

Signed-off-by: Andreas Gruenbacher <[email protected]>
Cc: Eric W. Biederman <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
fs/file.c | 1 +
fs/gfs2/glock.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/pid.c | 1 +
3 files changed, 149 insertions(+)

diff --git a/fs/file.c b/fs/file.c
index 3bcc1ecc314a..5f9c802a5d8d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -980,6 +980,7 @@ struct file *task_lookup_next_fd_rcu(struct task_struct *task, unsigned int *ret
*ret_fd = fd;
return file;
}
+EXPORT_SYMBOL(task_lookup_next_fd_rcu);

/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c992d53013d3..63dbab6fa242 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -33,6 +33,7 @@
#include <linux/list_sort.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
+#include <linux/fdtable.h>

#include "gfs2.h"
#include "incore.h"
@@ -2745,6 +2746,149 @@ static const struct file_operations gfs2_glstats_fops = {
.release = gfs2_glocks_release,
};

+struct gfs2_glockfd_iter {
+ struct super_block *sb;
+ unsigned int tgid;
+ struct task_struct *task;
+ unsigned int fd;
+ struct file *file;
+};
+
+static struct task_struct *gfs2_glockfd_next_task(struct gfs2_glockfd_iter *i)
+{
+ struct pid_namespace *ns = task_active_pid_ns(current);
+ struct pid *pid;
+
+ if (i->task)
+ put_task_struct(i->task);
+
+ rcu_read_lock();
+retry:
+ i->task = NULL;
+ pid = find_ge_pid(i->tgid, ns);
+ if (pid) {
+ i->tgid = pid_nr_ns(pid, ns);
+ i->task = pid_task(pid, PIDTYPE_TGID);
+ if (!i->task) {
+ i->tgid++;
+ goto retry;
+ }
+ get_task_struct(i->task);
+ }
+ rcu_read_unlock();
+ return i->task;
+}
+
+static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i)
+{
+ if (i->file) {
+ fput(i->file);
+ i->file = NULL;
+ }
+
+ rcu_read_lock();
+ for(;; i->fd++) {
+ struct inode *inode;
+
+ i->file = task_lookup_next_fd_rcu(i->task, &i->fd);
+ if (!i->file) {
+ i->fd = 0;
+ break;
+ }
+ inode = file_inode(i->file);
+ if (inode->i_sb != i->sb)
+ continue;
+ if (get_file_rcu(i->file))
+ break;
+ }
+ rcu_read_unlock();
+ return i->file;
+}
+
+static void *gfs2_glockfd_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+
+ if (*pos)
+ return NULL;
+ while (gfs2_glockfd_next_task(i)) {
+ if (gfs2_glockfd_next_file(i))
+ return i;
+ i->tgid++;
+ }
+ return NULL;
+}
+
+static void *gfs2_glockfd_seq_next(struct seq_file *seq, void *iter_ptr,
+ loff_t *pos)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+
+ (*pos)++;
+ i->fd++;
+ do {
+ if (gfs2_glockfd_next_file(i))
+ return i;
+ i->tgid++;
+ } while (gfs2_glockfd_next_task(i));
+ return NULL;
+}
+
+static void gfs2_glockfd_seq_stop(struct seq_file *seq, void *iter_ptr)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+
+ if (i->file)
+ fput(i->file);
+ if (i->task)
+ put_task_struct(i->task);
+}
+
+static int gfs2_glockfd_seq_show(struct seq_file *seq, void *iter_ptr)
+{
+ struct gfs2_glockfd_iter *i = seq->private;
+ struct inode *inode = file_inode(i->file);
+ struct gfs2_glock *gl;
+
+ inode_lock_shared(inode);
+ gl = GFS2_I(inode)->i_iopen_gh.gh_gl;
+ if (gl) {
+ seq_printf(seq, "%d %u %u/%llx\n",
+ i->tgid, i->fd, gl->gl_name.ln_type,
+ (unsigned long long)gl->gl_name.ln_number);
+ }
+ inode_unlock_shared(inode);
+ return 0;
+}
+
+static const struct seq_operations gfs2_glockfd_seq_ops = {
+ .start = gfs2_glockfd_seq_start,
+ .next = gfs2_glockfd_seq_next,
+ .stop = gfs2_glockfd_seq_stop,
+ .show = gfs2_glockfd_seq_show,
+};
+
+static int gfs2_glockfd_open(struct inode *inode, struct file *file)
+{
+ struct gfs2_glockfd_iter *i;
+ struct gfs2_sbd *sdp = inode->i_private;
+
+ i = __seq_open_private(file, &gfs2_glockfd_seq_ops,
+ sizeof(struct gfs2_glockfd_iter));
+ if (!i)
+ return -ENOMEM;
+ i->sb = sdp->sd_vfs;
+ return 0;
+}
+
+static const struct file_operations gfs2_glockfd_fops = {
+ .owner = THIS_MODULE,
+ .open = gfs2_glockfd_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats);

void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
@@ -2754,6 +2898,9 @@ void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
&gfs2_glocks_fops);

+ debugfs_create_file("glockfd", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
+ &gfs2_glockfd_fops);
+
debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
&gfs2_glstats_fops);

diff --git a/kernel/pid.c b/kernel/pid.c
index 2fc0a16ec77b..3fbc5e46b721 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -519,6 +519,7 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
return idr_get_next(&ns->idr, &nr);
}
+EXPORT_SYMBOL_GPL(find_ge_pid);

struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
{
--
2.35.1

2022-06-28 21:04:06

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH 5/5] gfs2: Mark the remaining process-independent glock holders as GL_NOPID

Add the GL_NOPID flag for the remaining glock holders which are not
associated with the current process.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/inode.c | 6 ++++--
fs/gfs2/ops_fstype.c | 14 ++++++++------
fs/gfs2/super.c | 3 ++-
fs/gfs2/util.c | 6 ++++--
4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c8ec876f33ea..e211ed8636b5 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -143,7 +143,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,

if (blktype != GFS2_BLKST_UNLINKED)
gfs2_cancel_delete_work(io_gl);
- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT,
+ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED,
+ GL_EXACT | GL_NOPID,
&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
if (unlikely(error))
@@ -720,7 +721,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
BUG_ON(error);

- error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
+ error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID,
+ &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c9b423c874a3..904a2d47c4b3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -403,7 +403,8 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,

error = gfs2_glock_nq_num(sdp,
GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
- LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
+ LM_ST_EXCLUSIVE,
+ LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID,
mount_gh);
if (error) {
fs_err(sdp, "can't acquire mount glock: %d\n", error);
@@ -413,7 +414,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
error = gfs2_glock_nq_num(sdp,
GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT,
+ LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
&sdp->sd_live_gh);
if (error) {
fs_err(sdp, "can't acquire live glock: %d\n", error);
@@ -689,7 +690,7 @@ static int init_statfs(struct gfs2_sbd *sdp)
iput(pn);
pn = NULL;
ip = GFS2_I(sdp->sd_sc_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOPID,
&sdp->sd_sc_gh);
if (error) {
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
@@ -778,7 +779,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
&gfs2_journal_glops,
LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP | GL_NOCACHE,
+ LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID,
&sdp->sd_journal_gh);
if (error) {
fs_err(sdp, "can't acquire journal glock: %d\n", error);
@@ -788,7 +789,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
sdp->sd_jinode_gl = ip->i_gl;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
- LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
+ LM_FLAG_NOEXP | GL_EXACT |
+ GL_NOCACHE | GL_NOPID,
&sdp->sd_jinode_gh);
if (error) {
fs_err(sdp, "can't acquire journal inode glock: %d\n",
@@ -959,7 +961,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
pn = NULL;

ip = GFS2_I(sdp->sd_qc_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_NOPID,
&sdp->sd_qc_gh);
if (error) {
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bdb773e5c88f..90db4a289269 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -346,7 +346,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
}

error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
- LM_FLAG_NOEXP, &sdp->sd_freeze_gh);
+ LM_FLAG_NOEXP | GL_NOPID,
+ &sdp->sd_freeze_gh);
if (error)
goto out;

diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 8241029a2a5d..95d733dd3c25 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -226,7 +226,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
*/
fs_warn(sdp, "Requesting recovery of jid %d.\n",
sdp->sd_lockstruct.ls_jid);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
+ gfs2_holder_reinit(LM_ST_EXCLUSIVE,
+ LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID,
&sdp->sd_live_gh);
msleep(GL_GLOCK_MAX_HOLD);
/*
@@ -251,7 +252,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
fs_warn(sdp, "Unable to recover our journal jid %d.\n",
sdp->sd_lockstruct.ls_jid);
gfs2_glock_dq_wait(&sdp->sd_live_gh);
- gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
+ gfs2_holder_reinit(LM_ST_SHARED,
+ LM_FLAG_NOEXP | GL_EXACT | GL_NOPID,
&sdp->sd_live_gh);
gfs2_glock_nq(&sdp->sd_live_gh);
}
--
2.35.1

2022-06-28 21:05:08

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH 3/5] gfs2: Add GL_NOPID flag for process-independent glock holders

Add a GL_NOPID flag to indicate that once a glock holder has been acquired, it
won't be associated with the current process anymore. This is useful for iopen
and flock glocks which are associated with open files, as well as journal glock
holders and similar which are associated with the filesystem.

Once GL_NOPID is used for all applicable glocks (see the next patches),
processes will no longer be falsely reported as holding glocks which they are
not actually holding in the glocks dump file. Unlike before, when a process is
reported as having "(ended)", this will indicate an actual bug.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/glock.c | 41 +++++++++++++++++++++++++++++++----------
fs/gfs2/glock.h | 1 +
2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index aa35f5d4eb54..480b3d2b00e8 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1465,6 +1465,15 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
va_end(args);
}

+static inline bool pid_is_meaningful(const struct gfs2_holder *gh)
+{
+ if (!(gh->gh_flags & GL_NOPID))
+ return true;
+ if (gh->gh_state == LM_ST_UNLOCKED)
+ return true;
+ return false;
+}
+
/**
* add_to_queue - Add a holder to the wait queue (but look for recursion)
* @gh: the holder structure to add
@@ -1501,10 +1510,17 @@ __acquires(&gl->gl_lockref.lock)
}

list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
- if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
- (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK) &&
- !test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags)))
- goto trap_recursive;
+ if (likely(gh2->gh_owner_pid != gh->gh_owner_pid))
+ continue;
+ if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK)
+ continue;
+ if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))
+ continue;
+ if (!pid_is_meaningful(gh2))
+ continue;
+ goto trap_recursive;
+ }
+ list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
if (try_futile &&
!(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
fail:
@@ -2319,19 +2335,24 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
const char *fs_id_buf)
{
- struct task_struct *gh_owner = NULL;
+ const char *comm = "(none)";
+ pid_t owner_pid = 0;
char flags_buf[32];

rcu_read_lock();
- if (gh->gh_owner_pid)
+ if (pid_is_meaningful(gh)) {
+ struct task_struct *gh_owner;
+
+ comm = "(ended)";
+ owner_pid = pid_nr(gh->gh_owner_pid);
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
+ if (gh_owner)
+ comm = gh_owner->comm;
+ }
gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
fs_id_buf, state2str(gh->gh_state),
hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
- gh->gh_error,
- gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
- gh_owner ? gh_owner->comm : "(ended)",
- (void *)gh->gh_ip);
+ gh->gh_error, (long)owner_pid, comm, (void *)gh->gh_ip);
rcu_read_unlock();
}

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index c0ae9100a0bc..e764ebeba54c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -91,6 +91,7 @@ enum {
#define GL_ASYNC 0x0040
#define GL_EXACT 0x0080
#define GL_SKIP 0x0100
+#define GL_NOPID 0x0200
#define GL_NOCACHE 0x0400

/*
--
2.35.1

2022-06-28 21:54:47

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH 4/5] gfs2: Mark flock glock holders as GL_NOPID

Add the GL_NOPID flag for flock glock holders. Clean up the flag
setting code in do_flock.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/file.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 25f4080bc973..1383f9598011 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1472,7 +1472,9 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
int sleeptime;

state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
- flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;
+ flags = GL_EXACT | GL_NOPID;
+ if (!IS_SETLKW(cmd))
+ flags |= LM_FLAG_TRY_1CB;

mutex_lock(&fp->f_fl_mutex);

@@ -1500,7 +1502,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
error = gfs2_glock_nq(fl_gh);
if (error != GLR_TRYFAILED)
break;
- fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
+ fl_gh->gh_flags &= ~LM_FLAG_TRY_1CB;
+ fl_gh->gh_flags |= LM_FLAG_TRY;
msleep(sleeptime);
}
if (error) {
--
2.35.1