2023-12-01 21:15:53

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 0/4] SUNRPC: Various RCU fixes

From: Anna Schumaker <[email protected]>

These are various fixes that I found after turning on CONFIG_LOCKDEP,
CONFIG_PROVE_RCU, and CONFIG_DEBUG_ATOMIC_SLEEP. I didn't hit any issues
when testing against a Linux server, but running against Netapp with pNFS
had several different lockdep & rcu related spews show up in my dmesg
(and that's just from running cthon, not even xfstests). These patches fix
all the issues that I found, and have a couple extra cleanups that I noticed
along the way.

Thoughts?
Anna


Anna Schumaker (4):
SUNRPC: Clean up unused variable in rpc_xprt_probe_trunked()
SUNRPC: Remove unused function rpc_clnt_xprt_switch_put()
SUNRPC: Create a helper function for accessing the rpc_clnt's
xprt_switch
SUNRPC: Fix a suspicious RCU usage warning

include/linux/sunrpc/clnt.h | 1 -
include/linux/sunrpc/xprtmultipath.h | 2 ++
net/sunrpc/clnt.c | 47 +++++++++++++---------------
net/sunrpc/xprtmultipath.c | 14 ++++++++-
4 files changed, 37 insertions(+), 27 deletions(-)

--
2.43.0



2023-12-01 21:15:56

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 1/4] SUNRPC: Clean up unused variable in rpc_xprt_probe_trunked()

From: Anna Schumaker <[email protected]>

We don't use the rpc_xprt_switch anywhere in this function, so let's not
take an extra reference to in unnecessarily.

Signed-off-by: Anna Schumaker <[email protected]>
---
net/sunrpc/clnt.c | 3 ---
1 file changed, 3 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index daa9582ec861..4aa838543f79 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -3116,7 +3116,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct rpc_add_xprt_test *data)
{
- struct rpc_xprt_switch *xps;
struct rpc_xprt *main_xprt;
int status = 0;

@@ -3124,7 +3123,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,

rcu_read_lock();
main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
(struct sockaddr *)&main_xprt->addr);
rcu_read_unlock();
@@ -3135,7 +3133,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
out:
xprt_put(xprt);
- xprt_switch_put(xps);
return status;
}

--
2.43.0


2023-12-01 21:15:59

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 2/4] SUNRPC: Remove unused function rpc_clnt_xprt_switch_put()

From: Anna Schumaker <[email protected]>

Signed-off-by: Anna Schumaker <[email protected]>
---
include/linux/sunrpc/clnt.h | 1 -
net/sunrpc/clnt.c | 8 --------
2 files changed, 9 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index e9d4377d03c6..5e9d1469c6fa 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -252,7 +252,6 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *,

const char *rpc_proc_name(const struct rpc_task *task);

-void rpc_clnt_xprt_switch_put(struct rpc_clnt *);
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *);
bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4aa838543f79..8df944444e9b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -3247,14 +3247,6 @@ rpc_set_connect_timeout(struct rpc_clnt *clnt,
}
EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);

-void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
-{
- rcu_read_lock();
- xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
-
void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
--
2.43.0


2023-12-01 21:15:59

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 3/4] SUNRPC: Create a helper function for accessing the rpc_clnt's xprt_switch

From: Anna Schumaker <[email protected]>

This function takes the necessary rcu read lock to dereference the
client's rpc_xprt_switch and bump the reference count so it doesn't
disappear underneath us before returning. This does mean that callers
are responsible for calling xprt_switch_put() on the returned object
when they are done with it.

Signed-off-by: Anna Schumaker <[email protected]>
---
net/sunrpc/clnt.c | 34 +++++++++++++++++++++-------------
1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8df944444e9b..0b2c4b5484f5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -797,15 +797,24 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);

+static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt)
+{
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ rcu_read_unlock();
+
+ return xps;
+}
+
static
int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
{
struct rpc_xprt_switch *xps;

- rcu_read_lock();
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps == NULL)
return -EAGAIN;
func(xpi, xps);
@@ -2206,9 +2215,7 @@ call_connect_status(struct rpc_task *task)
struct rpc_xprt *saved = task->tk_xprt;
struct rpc_xprt_switch *xps;

- rcu_read_lock();
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps->xps_nxprts > 1) {
long value;

@@ -3251,22 +3258,23 @@ void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;

- rcu_read_lock();
- xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
xprt_set_online_locked(xprt, xps);
+ xprt_switch_put(xps);
}

void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ struct rpc_xprt_switch *xps;
+
if (rpc_clnt_xprt_switch_has_addr(clnt,
(const struct sockaddr *)&xprt->addr)) {
return rpc_clnt_xprt_set_online(clnt, xprt);
}
- rcu_read_lock();
- rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
- xprt);
- rcu_read_unlock();
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
+ rpc_xprt_switch_add_xprt(xps, xprt);
+ xprt_switch_put(xps);
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);

--
2.43.0


2023-12-01 21:16:01

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 4/4] SUNRPC: Fix a suspicious RCU usage warning

From: Anna Schumaker <[email protected]>

I received the following warning while running cthon against an ontap
server running pNFS:

[ 57.202521] =============================
[ 57.202522] WARNING: suspicious RCU usage
[ 57.202523] 6.7.0-rc3-g2cc14f52aeb7 #41492 Not tainted
[ 57.202525] -----------------------------
[ 57.202525] net/sunrpc/xprtmultipath.c:349 RCU-list traversed in non-reader section!!
[ 57.202527]
other info that might help us debug this:

[ 57.202528]
rcu_scheduler_active = 2, debug_locks = 1
[ 57.202529] no locks held by test5/3567.
[ 57.202530]
stack backtrace:
[ 57.202532] CPU: 0 PID: 3567 Comm: test5 Not tainted 6.7.0-rc3-g2cc14f52aeb7 #41492 5b09971b4965c0aceba19f3eea324a4a806e227e
[ 57.202534] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022
[ 57.202536] Call Trace:
[ 57.202537] <TASK>
[ 57.202540] dump_stack_lvl+0x77/0xb0
[ 57.202551] lockdep_rcu_suspicious+0x154/0x1a0
[ 57.202556] rpc_xprt_switch_has_addr+0x17c/0x190 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
[ 57.202596] rpc_clnt_setup_test_and_add_xprt+0x50/0x180 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
[ 57.202621] ? rpc_clnt_add_xprt+0x254/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
[ 57.202646] rpc_clnt_add_xprt+0x27a/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
[ 57.202671] ? __pfx_rpc_clnt_setup_test_and_add_xprt+0x10/0x10 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
[ 57.202696] nfs4_pnfs_ds_connect+0x345/0x760 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
[ 57.202728] ? __pfx_nfs4_test_session_trunk+0x10/0x10 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
[ 57.202754] nfs4_fl_prepare_ds+0x75/0xc0 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a]
[ 57.202760] filelayout_write_pagelist+0x4a/0x200 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a]
[ 57.202765] pnfs_generic_pg_writepages+0xbe/0x230 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
[ 57.202788] __nfs_pageio_add_request+0x3fd/0x520 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202813] nfs_pageio_add_request+0x18b/0x390 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202831] nfs_do_writepage+0x116/0x1e0 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202849] nfs_writepages_callback+0x13/0x30 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202866] write_cache_pages+0x265/0x450
[ 57.202870] ? __pfx_nfs_writepages_callback+0x10/0x10 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202891] nfs_writepages+0x141/0x230 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202913] do_writepages+0xd2/0x230
[ 57.202917] ? filemap_fdatawrite_wbc+0x5c/0x80
[ 57.202921] filemap_fdatawrite_wbc+0x67/0x80
[ 57.202924] filemap_write_and_wait_range+0xd9/0x170
[ 57.202930] nfs_wb_all+0x49/0x180 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
[ 57.202947] nfs4_file_flush+0x72/0xb0 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
[ 57.202969] __se_sys_close+0x46/0xd0
[ 57.202972] do_syscall_64+0x68/0x100
[ 57.202975] ? do_syscall_64+0x77/0x100
[ 57.202976] ? do_syscall_64+0x77/0x100
[ 57.202979] entry_SYSCALL_64_after_hwframe+0x6e/0x76
[ 57.202982] RIP: 0033:0x7fe2b12e4a94
[ 57.202985] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 18 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 c3 0f 1f 00 48 83 ec 18 89 7c 24 0c e8 c3
[ 57.202987] RSP: 002b:00007ffe857ddb38 EFLAGS: 00000202 ORIG_RAX: 0000000000000003
[ 57.202989] RAX: ffffffffffffffda RBX: 00007ffe857dfd68 RCX: 00007fe2b12e4a94
[ 57.202991] RDX: 0000000000002000 RSI: 00007ffe857ddc40 RDI: 0000000000000003
[ 57.202992] RBP: 00007ffe857dfc50 R08: 7fffffffffffffff R09: 0000000065650f49
[ 57.202993] R10: 00007fe2b11f8300 R11: 0000000000000202 R12: 0000000000000000
[ 57.202994] R13: 00007ffe857dfd80 R14: 00007fe2b1445000 R15: 0000000000000000
[ 57.202999] </TASK>

The problem seems to be that two out of three callers aren't taking the
rcu_read_lock() before calling the list_for_each_entry_rcu() function in
rpc_xprt_switch_has_addr(). I fix this by making a new variant of the
function that takes the lock when necessary, and provide a bypass path
for the one function that was doing this already.

Signed-off-by: Anna Schumaker <[email protected]>
---
include/linux/sunrpc/xprtmultipath.h | 2 ++
net/sunrpc/clnt.c | 2 +-
net/sunrpc/xprtmultipath.c | 14 +++++++++++++-
3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
index c0514c684b2c..0598552e7ccc 100644
--- a/include/linux/sunrpc/xprtmultipath.h
+++ b/include/linux/sunrpc/xprtmultipath.h
@@ -78,6 +78,8 @@ extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi);
extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi);
extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);

+extern bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap);
extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
const struct sockaddr *sap);

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b2c4b5484f5..b2a81c26da32 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -3299,7 +3299,7 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,

rcu_read_lock();
xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
- ret = rpc_xprt_switch_has_addr(xps, sap);
+ ret = __rpc_xprt_switch_has_addr(xps, sap);
rcu_read_unlock();
return ret;
}
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 701250b305db..20f9dc220383 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -336,7 +336,7 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
xprt_switch_find_current_entry_offline);
}

-bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
const struct sockaddr *sap)
{
struct list_head *head;
@@ -356,6 +356,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
return false;
}

+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
+{
+ bool res;
+
+ rcu_read_lock();
+ res = __rpc_xprt_switch_has_addr(xps, sap);
+ rcu_read_unlock();
+
+ return res;
+}
+
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur, bool check_active)
--
2.43.0


2023-12-03 12:18:13

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 1/4] SUNRPC: Clean up unused variable in rpc_xprt_probe_trunked()

On Fri, 2023-12-01 at 16:15 -0500, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> We don't use the rpc_xprt_switch anywhere in this function, so let's not
> take an extra reference to in unnecessarily.
>
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> net/sunrpc/clnt.c | 3 ---
> 1 file changed, 3 deletions(-)
>
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index daa9582ec861..4aa838543f79 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -3116,7 +3116,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
> struct rpc_xprt *xprt,
> struct rpc_add_xprt_test *data)
> {
> - struct rpc_xprt_switch *xps;
> struct rpc_xprt *main_xprt;
> int status = 0;
>
> @@ -3124,7 +3123,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
>
> rcu_read_lock();
> main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
> - xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
> status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
> (struct sockaddr *)&main_xprt->addr);
> rcu_read_unlock();
> @@ -3135,7 +3133,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
> status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
> out:
> xprt_put(xprt);
> - xprt_switch_put(xps);
> return status;
> }
>

Seems right to me. If there is some hidden reason that we need to take
that reference here, then that needs to be documented in a comment.

Reviewed-by: Jeff Layton <[email protected]>

2023-12-03 12:19:18

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 2/4] SUNRPC: Remove unused function rpc_clnt_xprt_switch_put()

On Fri, 2023-12-01 at 16:15 -0500, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> include/linux/sunrpc/clnt.h | 1 -
> net/sunrpc/clnt.c | 8 --------
> 2 files changed, 9 deletions(-)
>
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index e9d4377d03c6..5e9d1469c6fa 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -252,7 +252,6 @@ void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *,
>
> const char *rpc_proc_name(const struct rpc_task *task);
>
> -void rpc_clnt_xprt_switch_put(struct rpc_clnt *);
> void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
> void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *);
> bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 4aa838543f79..8df944444e9b 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -3247,14 +3247,6 @@ rpc_set_connect_timeout(struct rpc_clnt *clnt,
> }
> EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
>
> -void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
> -{
> - rcu_read_lock();
> - xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
> - rcu_read_unlock();
> -}
> -EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
> -
> void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
> {
> struct rpc_xprt_switch *xps;

Nice catch.

Reviewed-by: Jeff Layton <[email protected]>

2023-12-03 12:25:01

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 3/4] SUNRPC: Create a helper function for accessing the rpc_clnt's xprt_switch

On Fri, 2023-12-01 at 16:15 -0500, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> This function takes the necessary rcu read lock to dereference the
> client's rpc_xprt_switch and bump the reference count so it doesn't
> disappear underneath us before returning. This does mean that callers
> are responsible for calling xprt_switch_put() on the returned object
> when they are done with it.
>
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> net/sunrpc/clnt.c | 34 +++++++++++++++++++++-------------
> 1 file changed, 21 insertions(+), 13 deletions(-)
>
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 8df944444e9b..0b2c4b5484f5 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -797,15 +797,24 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
> }
> EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
>
> +static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt)
> +{
> + struct rpc_xprt_switch *xps;
> +
> + rcu_read_lock();
> + xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
> + rcu_read_unlock();
> +
> + return xps;
> +}
> +
> static
> int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
> void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
> {
> struct rpc_xprt_switch *xps;
>
> - rcu_read_lock();
> - xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
> - rcu_read_unlock();
> + xps = rpc_clnt_xprt_switch_get(clnt);
> if (xps == NULL)
> return -EAGAIN;
> func(xpi, xps);
> @@ -2206,9 +2215,7 @@ call_connect_status(struct rpc_task *task)
> struct rpc_xprt *saved = task->tk_xprt;
> struct rpc_xprt_switch *xps;
>
> - rcu_read_lock();
> - xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
> - rcu_read_unlock();
> + xps = rpc_clnt_xprt_switch_get(clnt);
> if (xps->xps_nxprts > 1) {
> long value;
>
> @@ -3251,22 +3258,23 @@ void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
> {
> struct rpc_xprt_switch *xps;
>
> - rcu_read_lock();
> - xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
> - rcu_read_unlock();
> + xps = rpc_clnt_xprt_switch_get(clnt);
> xprt_set_online_locked(xprt, xps);
> + xprt_switch_put(xps);
> }

FWIW, it looks like the above fixes a real bug. It's almost certainly
not safe to dereference xps once you drop the rcu_read_lock there.

>
> void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
> {
> + struct rpc_xprt_switch *xps;
> +
> if (rpc_clnt_xprt_switch_has_addr(clnt,
> (const struct sockaddr *)&xprt->addr)) {
> return rpc_clnt_xprt_set_online(clnt, xprt);
> }
> - rcu_read_lock();
> - rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
> - xprt);
> - rcu_read_unlock();
> +
> + xps = rpc_clnt_xprt_switch_get(clnt);
> + rpc_xprt_switch_add_xprt(xps, xprt);
> + xprt_switch_put(xps);
> }
> EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
>

Reviewed-by: Jeff Layton <[email protected]>

2023-12-03 12:30:43

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 4/4] SUNRPC: Fix a suspicious RCU usage warning

On Fri, 2023-12-01 at 16:15 -0500, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> I received the following warning while running cthon against an ontap
> server running pNFS:
>
> [ 57.202521] =============================
> [ 57.202522] WARNING: suspicious RCU usage
> [ 57.202523] 6.7.0-rc3-g2cc14f52aeb7 #41492 Not tainted
> [ 57.202525] -----------------------------
> [ 57.202525] net/sunrpc/xprtmultipath.c:349 RCU-list traversed in non-reader section!!
> [ 57.202527]
> other info that might help us debug this:
>
> [ 57.202528]
> rcu_scheduler_active = 2, debug_locks = 1
> [ 57.202529] no locks held by test5/3567.
> [ 57.202530]
> stack backtrace:
> [ 57.202532] CPU: 0 PID: 3567 Comm: test5 Not tainted 6.7.0-rc3-g2cc14f52aeb7 #41492 5b09971b4965c0aceba19f3eea324a4a806e227e
> [ 57.202534] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022
> [ 57.202536] Call Trace:
> [ 57.202537] <TASK>
> [ 57.202540] dump_stack_lvl+0x77/0xb0
> [ 57.202551] lockdep_rcu_suspicious+0x154/0x1a0
> [ 57.202556] rpc_xprt_switch_has_addr+0x17c/0x190 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> [ 57.202596] rpc_clnt_setup_test_and_add_xprt+0x50/0x180 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> [ 57.202621] ? rpc_clnt_add_xprt+0x254/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> [ 57.202646] rpc_clnt_add_xprt+0x27a/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> [ 57.202671] ? __pfx_rpc_clnt_setup_test_and_add_xprt+0x10/0x10 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> [ 57.202696] nfs4_pnfs_ds_connect+0x345/0x760 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> [ 57.202728] ? __pfx_nfs4_test_session_trunk+0x10/0x10 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> [ 57.202754] nfs4_fl_prepare_ds+0x75/0xc0 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a]
> [ 57.202760] filelayout_write_pagelist+0x4a/0x200 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a]
> [ 57.202765] pnfs_generic_pg_writepages+0xbe/0x230 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> [ 57.202788] __nfs_pageio_add_request+0x3fd/0x520 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202813] nfs_pageio_add_request+0x18b/0x390 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202831] nfs_do_writepage+0x116/0x1e0 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202849] nfs_writepages_callback+0x13/0x30 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202866] write_cache_pages+0x265/0x450
> [ 57.202870] ? __pfx_nfs_writepages_callback+0x10/0x10 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202891] nfs_writepages+0x141/0x230 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202913] do_writepages+0xd2/0x230
> [ 57.202917] ? filemap_fdatawrite_wbc+0x5c/0x80
> [ 57.202921] filemap_fdatawrite_wbc+0x67/0x80
> [ 57.202924] filemap_write_and_wait_range+0xd9/0x170
> [ 57.202930] nfs_wb_all+0x49/0x180 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> [ 57.202947] nfs4_file_flush+0x72/0xb0 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> [ 57.202969] __se_sys_close+0x46/0xd0
> [ 57.202972] do_syscall_64+0x68/0x100
> [ 57.202975] ? do_syscall_64+0x77/0x100
> [ 57.202976] ? do_syscall_64+0x77/0x100
> [ 57.202979] entry_SYSCALL_64_after_hwframe+0x6e/0x76
> [ 57.202982] RIP: 0033:0x7fe2b12e4a94
> [ 57.202985] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 18 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 c3 0f 1f 00 48 83 ec 18 89 7c 24 0c e8 c3
> [ 57.202987] RSP: 002b:00007ffe857ddb38 EFLAGS: 00000202 ORIG_RAX: 0000000000000003
> [ 57.202989] RAX: ffffffffffffffda RBX: 00007ffe857dfd68 RCX: 00007fe2b12e4a94
> [ 57.202991] RDX: 0000000000002000 RSI: 00007ffe857ddc40 RDI: 0000000000000003
> [ 57.202992] RBP: 00007ffe857dfc50 R08: 7fffffffffffffff R09: 0000000065650f49
> [ 57.202993] R10: 00007fe2b11f8300 R11: 0000000000000202 R12: 0000000000000000
> [ 57.202994] R13: 00007ffe857dfd80 R14: 00007fe2b1445000 R15: 0000000000000000
> [ 57.202999] </TASK>
>
> The problem seems to be that two out of three callers aren't taking the
> rcu_read_lock() before calling the list_for_each_entry_rcu() function in
> rpc_xprt_switch_has_addr(). I fix this by making a new variant of the
> function that takes the lock when necessary, and provide a bypass path
> for the one function that was doing this already.
>
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> include/linux/sunrpc/xprtmultipath.h | 2 ++
> net/sunrpc/clnt.c | 2 +-
> net/sunrpc/xprtmultipath.c | 14 +++++++++++++-
> 3 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
> index c0514c684b2c..0598552e7ccc 100644
> --- a/include/linux/sunrpc/xprtmultipath.h
> +++ b/include/linux/sunrpc/xprtmultipath.h
> @@ -78,6 +78,8 @@ extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi);
> extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi);
> extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);
>
> +extern bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> + const struct sockaddr *sap);
> extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> const struct sockaddr *sap);
>
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 0b2c4b5484f5..b2a81c26da32 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -3299,7 +3299,7 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
>
> rcu_read_lock();
> xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
> - ret = rpc_xprt_switch_has_addr(xps, sap);
> + ret = __rpc_xprt_switch_has_addr(xps, sap);
> rcu_read_unlock();
> return ret;
> }
> diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
> index 701250b305db..20f9dc220383 100644
> --- a/net/sunrpc/xprtmultipath.c
> +++ b/net/sunrpc/xprtmultipath.c
> @@ -336,7 +336,7 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
> xprt_switch_find_current_entry_offline);
> }
>
> -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> const struct sockaddr *sap)
> {
> struct list_head *head;
> @@ -356,6 +356,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> return false;
> }
>
> +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> + const struct sockaddr *sap)
> +{
> + bool res;
> +
> + rcu_read_lock();
> + res = __rpc_xprt_switch_has_addr(xps, sap);
> + rcu_read_unlock();
> +
> + return res;
> +}
> +
> static
> struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
> const struct rpc_xprt *cur, bool check_active)

Adding an new wrapper here is probably more trouble than it's worth.

Why not have rpc_xprt_switch_has_addr take and drop the rcu_read_lock
itself? It is safe (and reasonably cheap) to take it recursively, so
that should be fine from existing callers that already hold it.

Either way, this patch fixes a real bug, so if you choose go to with it:

Reviewed-by: Jeff Layton <[email protected]>

2023-12-04 17:00:29

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH 4/4] SUNRPC: Fix a suspicious RCU usage warning

Hi Jeff,

On Sun, Dec 3, 2023 at 7:30 AM Jeff Layton <[email protected]> wrote:
>
> On Fri, 2023-12-01 at 16:15 -0500, Anna Schumaker wrote:
> > From: Anna Schumaker <[email protected]>
> >
> > I received the following warning while running cthon against an ontap
> > server running pNFS:
> >
> > [ 57.202521] =============================
> > [ 57.202522] WARNING: suspicious RCU usage
> > [ 57.202523] 6.7.0-rc3-g2cc14f52aeb7 #41492 Not tainted
> > [ 57.202525] -----------------------------
> > [ 57.202525] net/sunrpc/xprtmultipath.c:349 RCU-list traversed in non-reader section!!
> > [ 57.202527]
> > other info that might help us debug this:
> >
> > [ 57.202528]
> > rcu_scheduler_active = 2, debug_locks = 1
> > [ 57.202529] no locks held by test5/3567.
> > [ 57.202530]
> > stack backtrace:
> > [ 57.202532] CPU: 0 PID: 3567 Comm: test5 Not tainted 6.7.0-rc3-g2cc14f52aeb7 #41492 5b09971b4965c0aceba19f3eea324a4a806e227e
> > [ 57.202534] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022
> > [ 57.202536] Call Trace:
> > [ 57.202537] <TASK>
> > [ 57.202540] dump_stack_lvl+0x77/0xb0
> > [ 57.202551] lockdep_rcu_suspicious+0x154/0x1a0
> > [ 57.202556] rpc_xprt_switch_has_addr+0x17c/0x190 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> > [ 57.202596] rpc_clnt_setup_test_and_add_xprt+0x50/0x180 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> > [ 57.202621] ? rpc_clnt_add_xprt+0x254/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> > [ 57.202646] rpc_clnt_add_xprt+0x27a/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> > [ 57.202671] ? __pfx_rpc_clnt_setup_test_and_add_xprt+0x10/0x10 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6]
> > [ 57.202696] nfs4_pnfs_ds_connect+0x345/0x760 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> > [ 57.202728] ? __pfx_nfs4_test_session_trunk+0x10/0x10 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> > [ 57.202754] nfs4_fl_prepare_ds+0x75/0xc0 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a]
> > [ 57.202760] filelayout_write_pagelist+0x4a/0x200 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a]
> > [ 57.202765] pnfs_generic_pg_writepages+0xbe/0x230 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> > [ 57.202788] __nfs_pageio_add_request+0x3fd/0x520 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202813] nfs_pageio_add_request+0x18b/0x390 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202831] nfs_do_writepage+0x116/0x1e0 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202849] nfs_writepages_callback+0x13/0x30 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202866] write_cache_pages+0x265/0x450
> > [ 57.202870] ? __pfx_nfs_writepages_callback+0x10/0x10 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202891] nfs_writepages+0x141/0x230 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202913] do_writepages+0xd2/0x230
> > [ 57.202917] ? filemap_fdatawrite_wbc+0x5c/0x80
> > [ 57.202921] filemap_fdatawrite_wbc+0x67/0x80
> > [ 57.202924] filemap_write_and_wait_range+0xd9/0x170
> > [ 57.202930] nfs_wb_all+0x49/0x180 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902]
> > [ 57.202947] nfs4_file_flush+0x72/0xb0 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9]
> > [ 57.202969] __se_sys_close+0x46/0xd0
> > [ 57.202972] do_syscall_64+0x68/0x100
> > [ 57.202975] ? do_syscall_64+0x77/0x100
> > [ 57.202976] ? do_syscall_64+0x77/0x100
> > [ 57.202979] entry_SYSCALL_64_after_hwframe+0x6e/0x76
> > [ 57.202982] RIP: 0033:0x7fe2b12e4a94
> > [ 57.202985] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 18 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 c3 0f 1f 00 48 83 ec 18 89 7c 24 0c e8 c3
> > [ 57.202987] RSP: 002b:00007ffe857ddb38 EFLAGS: 00000202 ORIG_RAX: 0000000000000003
> > [ 57.202989] RAX: ffffffffffffffda RBX: 00007ffe857dfd68 RCX: 00007fe2b12e4a94
> > [ 57.202991] RDX: 0000000000002000 RSI: 00007ffe857ddc40 RDI: 0000000000000003
> > [ 57.202992] RBP: 00007ffe857dfc50 R08: 7fffffffffffffff R09: 0000000065650f49
> > [ 57.202993] R10: 00007fe2b11f8300 R11: 0000000000000202 R12: 0000000000000000
> > [ 57.202994] R13: 00007ffe857dfd80 R14: 00007fe2b1445000 R15: 0000000000000000
> > [ 57.202999] </TASK>
> >
> > The problem seems to be that two out of three callers aren't taking the
> > rcu_read_lock() before calling the list_for_each_entry_rcu() function in
> > rpc_xprt_switch_has_addr(). I fix this by making a new variant of the
> > function that takes the lock when necessary, and provide a bypass path
> > for the one function that was doing this already.
> >
> > Signed-off-by: Anna Schumaker <[email protected]>
> > ---
> > include/linux/sunrpc/xprtmultipath.h | 2 ++
> > net/sunrpc/clnt.c | 2 +-
> > net/sunrpc/xprtmultipath.c | 14 +++++++++++++-
> > 3 files changed, 16 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h
> > index c0514c684b2c..0598552e7ccc 100644
> > --- a/include/linux/sunrpc/xprtmultipath.h
> > +++ b/include/linux/sunrpc/xprtmultipath.h
> > @@ -78,6 +78,8 @@ extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi);
> > extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi);
> > extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi);
> >
> > +extern bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> > + const struct sockaddr *sap);
> > extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> > const struct sockaddr *sap);
> >
> > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > index 0b2c4b5484f5..b2a81c26da32 100644
> > --- a/net/sunrpc/clnt.c
> > +++ b/net/sunrpc/clnt.c
> > @@ -3299,7 +3299,7 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
> >
> > rcu_read_lock();
> > xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
> > - ret = rpc_xprt_switch_has_addr(xps, sap);
> > + ret = __rpc_xprt_switch_has_addr(xps, sap);
> > rcu_read_unlock();
> > return ret;
> > }
> > diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
> > index 701250b305db..20f9dc220383 100644
> > --- a/net/sunrpc/xprtmultipath.c
> > +++ b/net/sunrpc/xprtmultipath.c
> > @@ -336,7 +336,7 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
> > xprt_switch_find_current_entry_offline);
> > }
> >
> > -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> > +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> > const struct sockaddr *sap)
> > {
> > struct list_head *head;
> > @@ -356,6 +356,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> > return false;
> > }
> >
> > +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
> > + const struct sockaddr *sap)
> > +{
> > + bool res;
> > +
> > + rcu_read_lock();
> > + res = __rpc_xprt_switch_has_addr(xps, sap);
> > + rcu_read_unlock();
> > +
> > + return res;
> > +}
> > +
> > static
> > struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
> > const struct rpc_xprt *cur, bool check_active)
>
> Adding an new wrapper here is probably more trouble than it's worth.
>
> Why not have rpc_xprt_switch_has_addr take and drop the rcu_read_lock
> itself? It is safe (and reasonably cheap) to take it recursively, so
> that should be fine from existing callers that already hold it.

Good suggestion! I'm too used to thinking in terms of avoiding
recursive locks. I'll fix up the patch and resend.

Thanks for the review!
Anna

>
> Either way, this patch fixes a real bug, so if you choose go to with it:
>
> Reviewed-by: Jeff Layton <[email protected]>