These patches clean up some issues surrouding nfs_page_group_lock:
- normalize wait/nonblock argument
- make nonblocking calls really nonblocking
- handle errors
- ensure that we don't call blocking nfs_page_group_lock when holding the
inode spinlock
This cleanup was inspired by Fengguang Wu's report that we were sleeping with
locks held in nfs_lock_and_join_requests.
Weston Andros Adamson (5):
nfs: change nfs_page_group_lock argument
nfs: fix nonblocking calls to nfs_page_group_lock
nfs: use blocking page_group_lock in add_request
nfs: fix error handling in lock_and_join_requests
nfs: don't sleep with inode lock in lock_and_join_requests
fs/nfs/pagelist.c | 59 ++++++++++++++++++++++++++++++------------------
fs/nfs/write.c | 21 +++++++++++++----
include/linux/nfs_page.h | 1 +
3 files changed, 55 insertions(+), 26 deletions(-)
--
1.8.5.2 (Apple Git-48)
__nfs_pageio_add_request was calling nfs_page_group_lock nonblocking, but
this can return -EAGAIN which would end up passing -EIO to the application.
There is no reason not to block in this path, so change the two calls to
do so. Also, there is no need to check the return value of
nfs_page_group_lock when nonblock=false, so remove the error handling code.
Signed-off-by: Weston Andros Adamson <[email protected]>
Reviewed-by: Peng Tao <[email protected]>
---
fs/nfs/pagelist.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f393c72..af707e0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -872,13 +872,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *subreq;
unsigned int bytes_left = 0;
unsigned int offset, pgbase;
- int ret;
- ret = nfs_page_group_lock(req, true);
- if (ret < 0) {
- desc->pg_error = ret;
- return 0;
- }
+ nfs_page_group_lock(req, false);
subreq = req;
bytes_left = subreq->wb_bytes;
@@ -900,11 +895,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
- ret = nfs_page_group_lock(req, true);
- if (ret < 0) {
- desc->pg_error = ret;
- return 0;
- }
+ nfs_page_group_lock(req, false);
continue;
}
--
1.8.5.2 (Apple Git-48)
Flip the meaning of the second argument from 'wait' to 'nonblock' to
match related functions. Update all five calls to reflect this change.
Signed-off-by: Weston Andros Adamson <[email protected]>
Reviewed-by: Peng Tao <[email protected]>
---
fs/nfs/pagelist.c | 11 ++++++-----
fs/nfs/write.c | 4 ++--
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 9425118..38eaf83 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -145,13 +145,14 @@ static int nfs_wait_bit_uninterruptible(void *word)
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
+ * @nonblock - if true don't block waiting for lock
*
* this lock must be held if modifying the page group list
*
* returns result from wait_on_bit_lock: 0 on success, < 0 on error
*/
int
-nfs_page_group_lock(struct nfs_page *req, bool wait)
+nfs_page_group_lock(struct nfs_page *req, bool nonblock)
{
struct nfs_page *head = req->wb_head;
int ret;
@@ -162,7 +163,7 @@ nfs_page_group_lock(struct nfs_page *req, bool wait)
ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
nfs_wait_bit_uninterruptible,
TASK_UNINTERRUPTIBLE);
- } while (wait && ret != 0);
+ } while (!nonblock && ret != 0);
WARN_ON_ONCE(ret > 0);
return ret;
@@ -226,7 +227,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
{
bool ret;
- nfs_page_group_lock(req, true);
+ nfs_page_group_lock(req, false);
ret = nfs_page_group_sync_on_bit_locked(req, bit);
nfs_page_group_unlock(req);
@@ -868,7 +869,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
unsigned int offset, pgbase;
int ret;
- ret = nfs_page_group_lock(req, false);
+ ret = nfs_page_group_lock(req, true);
if (ret < 0) {
desc->pg_error = ret;
return 0;
@@ -894,7 +895,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
- ret = nfs_page_group_lock(req, false);
+ ret = nfs_page_group_lock(req, true);
if (ret < 0) {
desc->pg_error = ret;
return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1065de2..321e561 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -241,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
unsigned int pos = 0;
unsigned int len = nfs_page_length(req->wb_page);
- nfs_page_group_lock(req, true);
+ nfs_page_group_lock(req, false);
do {
tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -479,7 +479,7 @@ try_again:
}
/* lock each request in the page group */
- ret = nfs_page_group_lock(head, false);
+ ret = nfs_page_group_lock(head, true);
if (ret < 0)
return ERR_PTR(ret);
subreq = head;
--
1.8.5.2 (Apple Git-48)
nfs_page_group_lock was calling wait_on_bit_lock even when told not to
block. Fix by first trying test_and_set_bit, followed by wait_on_bit_lock
if and only if blocking is allowed. Return -EAGAIN if nonblocking and the
test_and_set of the bit was already locked.
Signed-off-by: Weston Andros Adamson <[email protected]>
Reviewed-by: Peng Tao <[email protected]>
---
fs/nfs/pagelist.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 38eaf83..f393c72 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -149,24 +149,29 @@ static int nfs_wait_bit_uninterruptible(void *word)
*
* this lock must be held if modifying the page group list
*
- * returns result from wait_on_bit_lock: 0 on success, < 0 on error
+ * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
+ * result from wait_on_bit_lock
+ *
+ * NOTE: calling with nonblock=false should always have set the
+ * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
+ * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
*/
int
nfs_page_group_lock(struct nfs_page *req, bool nonblock)
{
struct nfs_page *head = req->wb_head;
- int ret;
WARN_ON_ONCE(head != head->wb_head);
- do {
- ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
- nfs_wait_bit_uninterruptible,
- TASK_UNINTERRUPTIBLE);
- } while (!nonblock && ret != 0);
+ if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
+ return 0;
- WARN_ON_ONCE(ret > 0);
- return ret;
+ if (!nonblock)
+ return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ nfs_wait_bit_uninterruptible,
+ TASK_UNINTERRUPTIBLE);
+
+ return -EAGAIN;
}
/*
--
1.8.5.2 (Apple Git-48)
This fixes handling of errors from nfs_page_group_lock in
nfs_lock_and_join_requests. It now releases the inode lock and the
reference to the head request.
Reported-by: Peng Tao <[email protected]>
Signed-off-by: Weston Andros Adamson <[email protected]>
Reviewed-by: Peng Tao <[email protected]>
---
fs/nfs/write.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 321e561..0e3186b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -480,8 +480,11 @@ try_again:
/* lock each request in the page group */
ret = nfs_page_group_lock(head, true);
- if (ret < 0)
+ if (ret < 0) {
+ spin_unlock(&inode->i_lock);
+ nfs_release_request(head);
return ERR_PTR(ret);
+ }
subreq = head;
do {
/*
--
1.8.5.2 (Apple Git-48)
This handles the 'nonblock=false' case in nfs_lock_and_join_requests.
If the group is already locked and blocking is allowed, drop the inode lock
and wait for the group lock to be cleared before trying it all again.
This should fix warnings found in peterz's tree (sched/wait branch), where
might_sleep() checks are added to wait.[ch].
Reported-by: Fengguang Wu <[email protected]>
Signed-off-by: Weston Andros Adamson <[email protected]>
Reviewed-by: Peng Tao <[email protected]>
---
fs/nfs/pagelist.c | 18 ++++++++++++++++++
fs/nfs/write.c | 12 +++++++++++-
include/linux/nfs_page.h | 1 +
3 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index af707e0..e0c2e72 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -175,6 +175,24 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock)
}
/*
+ * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
+ * @req - a request in the group
+ *
+ * This is a blocking call to wait for the group lock to be cleared.
+ */
+void
+nfs_page_group_lock_wait(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ wait_on_bit(&head->wb_flags, PG_HEADLOCK,
+ nfs_wait_bit_uninterruptible,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
* nfs_page_group_unlock - unlock the head of the page group
* @req - request in group that is to be unlocked
*/
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0e3186b..f744f02 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -478,13 +478,23 @@ try_again:
return NULL;
}
- /* lock each request in the page group */
+ /* holding inode lock, so always make a non-blocking call to try the
+ * page group lock */
ret = nfs_page_group_lock(head, true);
if (ret < 0) {
spin_unlock(&inode->i_lock);
+
+ if (!nonblock && ret == -EAGAIN) {
+ nfs_page_group_lock_wait(head);
+ nfs_release_request(head);
+ goto try_again;
+ }
+
nfs_release_request(head);
return ERR_PTR(ret);
}
+
+ /* lock each request in the page group */
subreq = head;
do {
/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6ad2bbc..6c3e06e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -123,6 +123,7 @@ extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
extern void nfs_unlock_and_release_request(struct nfs_page *);
extern int nfs_page_group_lock(struct nfs_page *, bool);
+extern void nfs_page_group_lock_wait(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
--
1.8.5.2 (Apple Git-48)