If memory allocation fail in nfs4_schedule_state_manager() when mount
NFSv4.1/NFSv4.2, nfs4_run_state_manager() will not be called, and current
construction state will never be marked as ready or failed,
nfs_wait_client_init_complete() will wait forever, as shown below:
syscall(mount)
...
nfs4_init_client
nfs4_discover_server_trunking
nfs41_discover_server_trunking
nfs4_schedule_state_manager
kthread_run /* nfs4_run_state_manager() will not be called */
kthread_create
kthread_create_on_node
__kthread_create_on_node
create = kmalloc() = NULL
return ERR_PTR(-ENOMEM)
nfs_wait_client_init_complete /* wait forever */
Fix this by checking return value of nfs4_schedule_state_manager() which
can indicate whether kernel thread is created successful.
Signed-off-by: ChenXiaoSong <[email protected]>
---
fs/nfs/nfs4_fs.h | 2 +-
fs/nfs/nfs4state.c | 15 ++++++++++-----
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cfef738d765e..74c6d1504010 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -502,7 +502,7 @@ extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_stat
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
-extern void nfs4_schedule_state_manager(struct nfs_client *);
+extern int nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a2d2d5d1b088..127027f777c8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -376,8 +376,9 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
else
set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
}
- nfs4_schedule_state_manager(clp);
- status = nfs_wait_client_init_complete(clp);
+ status = nfs4_schedule_state_manager(clp);
+ if (!status)
+ status = nfs_wait_client_init_complete(clp);
if (status < 0)
nfs_put_client(clp);
return status;
@@ -1201,11 +1202,12 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
/*
* Schedule the nfs_client asynchronous state management routine
*/
-void nfs4_schedule_state_manager(struct nfs_client *clp)
+int nfs4_schedule_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
struct rpc_clnt *cl = clp->cl_rpcclient;
+ int ret = 0;
while (cl != cl->cl_parent)
cl = cl->cl_parent;
@@ -1213,7 +1215,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
wake_up_var(&clp->cl_state);
- return;
+ goto out;
}
set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
__module_get(THIS_MODULE);
@@ -1228,13 +1230,16 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
rcu_read_unlock();
task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
printk(KERN_ERR "%s: kthread_run: %ld\n",
- __func__, PTR_ERR(task));
+ __func__, ret);
nfs4_clear_state_manager_bit(clp);
clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs_put_client(clp);
module_put(THIS_MODULE);
}
+out:
+ return ret;
}
/*
--
2.31.1
Hi ChenXiaoSong,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on trondmy-nfs/linux-next]
[also build test WARNING on linus/master v6.1-rc4 next-20221111]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/ChenXiaoSong/NFSv4-1-handle-memory-allocation-failure-in-nfs4_schedule_state_manager/20221111-231029
base: git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
patch link: https://lore.kernel.org/r/20221111161033.899541-1-chenxiaosong2%40huawei.com
patch subject: [PATCH] NFSv4.1: handle memory allocation failure in nfs4_schedule_state_manager()
config: arm-netwinder_defconfig
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 463da45892e2d2a262277b91b96f5f8c05dc25d0)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm cross compiling tool for clang build
# apt-get install binutils-arm-linux-gnueabi
# https://github.com/intel-lab-lkp/linux/commit/8c841c9d48729579480bc452fdceff3dfdbf31c4
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review ChenXiaoSong/NFSv4-1-handle-memory-allocation-failure-in-nfs4_schedule_state_manager/20221111-231029
git checkout 8c841c9d48729579480bc452fdceff3dfdbf31c4
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash fs/nfs/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
>> fs/nfs/nfs4state.c:1235:14: warning: format specifies type 'long' but the argument has type 'int' [-Wformat]
__func__, ret);
^~~
include/linux/printk.h:457:60: note: expanded from macro 'printk'
#define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
~~~ ^~~~~~~~~~~
include/linux/printk.h:429:19: note: expanded from macro 'printk_index_wrap'
_p_func(_fmt, ##__VA_ARGS__); \
~~~~ ^~~~~~~~~~~
1 warning generated.
vim +1235 fs/nfs/nfs4state.c
1201
1202 /*
1203 * Schedule the nfs_client asynchronous state management routine
1204 */
1205 int nfs4_schedule_state_manager(struct nfs_client *clp)
1206 {
1207 struct task_struct *task;
1208 char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
1209 struct rpc_clnt *cl = clp->cl_rpcclient;
1210 int ret = 0;
1211
1212 while (cl != cl->cl_parent)
1213 cl = cl->cl_parent;
1214
1215 set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
1216 if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
1217 wake_up_var(&clp->cl_state);
1218 goto out;
1219 }
1220 set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
1221 __module_get(THIS_MODULE);
1222 refcount_inc(&clp->cl_count);
1223
1224 /* The rcu_read_lock() is not strictly necessary, as the state
1225 * manager is the only thread that ever changes the rpc_xprt
1226 * after it's initialized. At this point, we're single threaded. */
1227 rcu_read_lock();
1228 snprintf(buf, sizeof(buf), "%s-manager",
1229 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
1230 rcu_read_unlock();
1231 task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
1232 if (IS_ERR(task)) {
1233 ret = PTR_ERR(task);
1234 printk(KERN_ERR "%s: kthread_run: %ld\n",
> 1235 __func__, ret);
1236 nfs4_clear_state_manager_bit(clp);
1237 clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
1238 nfs_put_client(clp);
1239 module_put(THIS_MODULE);
1240 }
1241 out:
1242 return ret;
1243 }
1244
--
0-DAY CI Kernel Test Service
https://01.org/lkp