Simplify the loop in nfs_update_request by moving into a separate function
the code that attempts to update an existing cached NFS write.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/write.c | 105 ++++++++++++++++++++++++++++----------------------------
1 files changed, 53 insertions(+), 52 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8f12157..a675dc9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -34,9 +34,8 @@
/*
* Local function declarations
*/
-static struct nfs_page * nfs_update_request(struct nfs_open_context*,
- struct page *,
- unsigned int, unsigned int);
+static struct nfs_page * nfs_setup_write_request(struct nfs_open_context *ctx,
+ struct page *page, unsigned int offset, unsigned int bytes);
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
struct inode *inode, int ioflags);
static void nfs_redirty_request(struct nfs_page *req);
@@ -178,7 +177,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
int ret;
for (;;) {
- req = nfs_update_request(ctx, page, offset, count);
+ req = nfs_setup_write_request(ctx, page, offset, count);
if (!IS_ERR(req))
break;
ret = PTR_ERR(req);
@@ -566,6 +565,41 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg
}
#endif
+static int nfs_try_to_update_request(struct nfs_page *req,
+ unsigned int offset,
+ unsigned int end)
+{
+ unsigned int rqend;
+
+ rqend = req->wb_offset + req->wb_bytes;
+
+ /*
+ * If the page doesn't match, or the offsets
+ * are non-contiguous, tell the caller to
+ * wait on the conflicting request.
+ */
+ if (req->wb_page == NULL
+ || !nfs_dirty_request(req)
+ || offset > rqend
+ || end < req->wb_offset)
+ return -EBUSY;
+
+ if (!nfs_set_page_tag_locked(req))
+ return -EAGAIN;
+
+ /* Okay, the request matches. Update the region */
+ if (offset < req->wb_offset) {
+ req->wb_offset = offset;
+ req->wb_pgbase = offset;
+ }
+ if (end > rqend)
+ req->wb_bytes = end - req->wb_offset;
+ else
+ req->wb_bytes = rqend - req->wb_offset;
+
+ return 0;
+}
+
/*
* Try to update any existing write request, or create one if there is none.
* In order to match, the request's credentials must match those of
@@ -573,16 +607,15 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg
*
* Note: Should always be called with the Page Lock held!
*/
-static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
+static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
struct page *page, unsigned int offset, unsigned int bytes)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = page->mapping->host;
struct nfs_page *req, *new = NULL;
- pgoff_t rqend, end;
+ unsigned int end;
+ int error;
end = offset + bytes;
-
for (;;) {
/* Loop over all inode entries and see if we find
* A request for the page we wish to update
@@ -590,26 +623,16 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
if (req) {
- if (!nfs_set_page_tag_locked(req)) {
- int error;
-
- spin_unlock(&inode->i_lock);
+ error = nfs_try_to_update_request(req, offset, end);
+ spin_unlock(&inode->i_lock);
+ if (error == 0)
+ break;
+ if (error == -EAGAIN)
error = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (error < 0) {
- if (new) {
- radix_tree_preload_end();
- nfs_release_request(new);
- }
- return ERR_PTR(error);
- }
+ nfs_release_request(req);
+ if (error == 0)
continue;
- }
- spin_unlock(&inode->i_lock);
- if (new) {
- radix_tree_preload_end();
- nfs_release_request(new);
- }
+ req = ERR_PTR(error);
break;
}
@@ -632,32 +655,10 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
}
}
- /* We have a request for our page.
- * If the creds don't match, or the
- * page addresses don't match,
- * tell the caller to wait on the conflicting
- * request.
- */
- rqend = req->wb_offset + req->wb_bytes;
- if (req->wb_context != ctx
- || req->wb_page != page
- || !nfs_dirty_request(req)
- || offset > rqend || end < req->wb_offset) {
- nfs_clear_page_tag_locked(req);
- return ERR_PTR(-EBUSY);
+ if (new) {
+ radix_tree_preload_end();
+ nfs_release_request(new);
}
-
- /* Okay, the request matches. Update the region */
- if (offset < req->wb_offset) {
- req->wb_offset = offset;
- req->wb_pgbase = offset;
- req->wb_bytes = max(end, rqend) - req->wb_offset;
- goto out;
- }
-
- if (end > rqend)
- req->wb_bytes = end - req->wb_offset;
-
out:
return req;
}
Hi Trond-
On Jun 16, 2008, at 3:23 PM, Trond Myklebust wrote:
> Simplify the loop in nfs_update_request by moving into a separate
> function
> the code that attempts to update an existing cached NFS write.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
>
> fs/nfs/write.c | 105 +++++++++++++++++++++++++++
> +----------------------------
> 1 files changed, 53 insertions(+), 52 deletions(-)
>
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 8f12157..a675dc9 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -34,9 +34,8 @@
> /*
> * Local function declarations
> */
> -static struct nfs_page * nfs_update_request(struct nfs_open_context*,
> - struct page *,
> - unsigned int, unsigned int);
> +static struct nfs_page * nfs_setup_write_request(struct
> nfs_open_context *ctx,
> + struct page *page, unsigned int offset, unsigned int bytes);
> static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
> struct inode *inode, int ioflags);
> static void nfs_redirty_request(struct nfs_page *req);
> @@ -178,7 +177,7 @@ static int nfs_writepage_setup(struct
> nfs_open_context *ctx, struct page *page,
> int ret;
>
> for (;;) {
> - req = nfs_update_request(ctx, page, offset, count);
> + req = nfs_setup_write_request(ctx, page, offset, count);
> if (!IS_ERR(req))
> break;
> ret = PTR_ERR(req);
> @@ -566,6 +565,41 @@ static inline int nfs_scan_commit(struct inode
> *inode, struct list_head *dst, pg
> }
> #endif
>
> +static int nfs_try_to_update_request(struct nfs_page *req,
> + unsigned int offset,
> + unsigned int end)
> +{
> + unsigned int rqend;
> +
> + rqend = req->wb_offset + req->wb_bytes;
> +
> + /*
> + * If the page doesn't match, or the offsets
> + * are non-contiguous, tell the caller to
> + * wait on the conflicting request.
> + */
> + if (req->wb_page == NULL
> + || !nfs_dirty_request(req)
> + || offset > rqend
> + || end < req->wb_offset)
> + return -EBUSY;
I don't see how this is equivalent to what it replaces in
nfs_update_request:
> - if (req->wb_context != ctx
> - || req->wb_page != page
What happened to the context check, for example?
> +
> + if (!nfs_set_page_tag_locked(req))
> + return -EAGAIN;
This -EAGAIN (and the resulting logic in nfs_setup_write_request) is
headache-inducing. Is there a more straightforward way to break this
up? I'm not sure this is a readability win.
Overall I'd like to see this clean up broken into steps. It's a
challenge to "prove" that you haven't changed behavior somehow with
this one patch.
> +
> + /* Okay, the request matches. Update the region */
> + if (offset < req->wb_offset) {
> + req->wb_offset = offset;
> + req->wb_pgbase = offset;
> + }
> + if (end > rqend)
> + req->wb_bytes = end - req->wb_offset;
> + else
> + req->wb_bytes = rqend - req->wb_offset;
> +
> + return 0;
> +}
> +
> /*
> * Try to update any existing write request, or create one if there
> is none.
> * In order to match, the request's credentials must match those of
> @@ -573,16 +607,15 @@ static inline int nfs_scan_commit(struct inode
> *inode, struct list_head *dst, pg
> *
> * Note: Should always be called with the Page Lock held!
> */
> -static struct nfs_page * nfs_update_request(struct
> nfs_open_context* ctx,
> +static struct nfs_page * nfs_setup_write_request(struct
> nfs_open_context* ctx,
> struct page *page, unsigned int offset, unsigned int bytes)
> {
> - struct address_space *mapping = page->mapping;
> - struct inode *inode = mapping->host;
> + struct inode *inode = page->mapping->host;
> struct nfs_page *req, *new = NULL;
> - pgoff_t rqend, end;
> + unsigned int end;
> + int error;
>
> end = offset + bytes;
> -
> for (;;) {
> /* Loop over all inode entries and see if we find
> * A request for the page we wish to update
> @@ -590,26 +623,16 @@ static struct nfs_page *
> nfs_update_request(struct nfs_open_context* ctx,
> spin_lock(&inode->i_lock);
> req = nfs_page_find_request_locked(page);
> if (req) {
> - if (!nfs_set_page_tag_locked(req)) {
> - int error;
> -
> - spin_unlock(&inode->i_lock);
> + error = nfs_try_to_update_request(req, offset, end);
> + spin_unlock(&inode->i_lock);
> + if (error == 0)
> + break;
> + if (error == -EAGAIN)
> error = nfs_wait_on_request(req);
> - nfs_release_request(req);
> - if (error < 0) {
> - if (new) {
> - radix_tree_preload_end();
> - nfs_release_request(new);
> - }
> - return ERR_PTR(error);
> - }
> + nfs_release_request(req);
> + if (error == 0)
> continue;
> - }
> - spin_unlock(&inode->i_lock);
> - if (new) {
> - radix_tree_preload_end();
> - nfs_release_request(new);
> - }
> + req = ERR_PTR(error);
> break;
> }
>
> @@ -632,32 +655,10 @@ static struct nfs_page *
> nfs_update_request(struct nfs_open_context* ctx,
> }
> }
>
> - /* We have a request for our page.
> - * If the creds don't match, or the
> - * page addresses don't match,
> - * tell the caller to wait on the conflicting
> - * request.
> - */
> - rqend = req->wb_offset + req->wb_bytes;
> - if (req->wb_context != ctx
> - || req->wb_page != page
> - || !nfs_dirty_request(req)
> - || offset > rqend || end < req->wb_offset) {
> - nfs_clear_page_tag_locked(req);
> - return ERR_PTR(-EBUSY);
> + if (new) {
> + radix_tree_preload_end();
> + nfs_release_request(new);
> }
> -
> - /* Okay, the request matches. Update the region */
> - if (offset < req->wb_offset) {
> - req->wb_offset = offset;
> - req->wb_pgbase = offset;
> - req->wb_bytes = max(end, rqend) - req->wb_offset;
> - goto out;
> - }
> -
> - if (end > rqend)
> - req->wb_bytes = end - req->wb_offset;
> -
> out:
There's only one "goto out;" left in nfs_setup_write_request(). There
are other return sites in nfs_setup_write_request() that simply
"return req;" -- for what purpose is this one goto still here?
>
> return req;
> }
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
On Tue, 2008-06-17 at 17:35 -0400, Chuck Lever wrote:
> Hi Trond-
>
> On Jun 16, 2008, at 3:23 PM, Trond Myklebust wrote:
> > Simplify the loop in nfs_update_request by moving into a separate
> > function
> > the code that attempts to update an existing cached NFS write.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> >
> > fs/nfs/write.c | 105 +++++++++++++++++++++++++++
> > +----------------------------
> > 1 files changed, 53 insertions(+), 52 deletions(-)
> >
> > diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> > index 8f12157..a675dc9 100644
> > --- a/fs/nfs/write.c
> > +++ b/fs/nfs/write.c
> > @@ -34,9 +34,8 @@
> > /*
> > * Local function declarations
> > */
> > -static struct nfs_page * nfs_update_request(struct nfs_open_context*,
> > - struct page *,
> > - unsigned int, unsigned int);
> > +static struct nfs_page * nfs_setup_write_request(struct
> > nfs_open_context *ctx,
> > + struct page *page, unsigned int offset, unsigned int bytes);
> > static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
> > struct inode *inode, int ioflags);
> > static void nfs_redirty_request(struct nfs_page *req);
> > @@ -178,7 +177,7 @@ static int nfs_writepage_setup(struct
> > nfs_open_context *ctx, struct page *page,
> > int ret;
> >
> > for (;;) {
> > - req = nfs_update_request(ctx, page, offset, count);
> > + req = nfs_setup_write_request(ctx, page, offset, count);
> > if (!IS_ERR(req))
> > break;
> > ret = PTR_ERR(req);
> > @@ -566,6 +565,41 @@ static inline int nfs_scan_commit(struct inode
> > *inode, struct list_head *dst, pg
> > }
> > #endif
> >
> > +static int nfs_try_to_update_request(struct nfs_page *req,
> > + unsigned int offset,
> > + unsigned int end)
> > +{
> > + unsigned int rqend;
> > +
> > + rqend = req->wb_offset + req->wb_bytes;
> > +
> > + /*
> > + * If the page doesn't match, or the offsets
> > + * are non-contiguous, tell the caller to
> > + * wait on the conflicting request.
> > + */
> > + if (req->wb_page == NULL
> > + || !nfs_dirty_request(req)
> > + || offset > rqend
> > + || end < req->wb_offset)
> > + return -EBUSY;
>
> I don't see how this is equivalent to what it replaces in
> nfs_update_request:
>
> > - if (req->wb_context != ctx
> > - || req->wb_page != page
>
>
> What happened to the context check, for example?
That is a deliberate change.
We hold the page lock across nfs_vm_page_mkwrite and/or
nfs_write_begin()/nfs_write_end(), so nobody else can add a new nfs_page
to page->private during that time.
For that reason, the check that is done in nfs_flush_incompatible()
together with the check for req->wb_page==NULL (done in
try_to_update_page() while still holding the inode->i_lock) means that
the above two checks are redundant.
...actually, come to think of it, the check for req->wb_page == NULL is
redundant too, since nfs_clear_request() is called at the end of
nfs_inode_remove_request(), and hence nfs_page_find_request_locked()
would fail to find anything.
> > +
> > + if (!nfs_set_page_tag_locked(req))
> > + return -EAGAIN;
>
> This -EAGAIN (and the resulting logic in nfs_setup_write_request) is
> headache-inducing. Is there a more straightforward way to break this
> up?
No. If the page is being written out, then we _must_ release all locks,
wait on the request, and try again. This is not a change compared to the
previous incarnation.
> > +
> > + /* Okay, the request matches. Update the region */
> > + if (offset < req->wb_offset) {
> > + req->wb_offset = offset;
> > + req->wb_pgbase = offset;
> > + }
> > + if (end > rqend)
> > + req->wb_bytes = end - req->wb_offset;
> > + else
> > + req->wb_bytes = rqend - req->wb_offset;
> > +
> > + return 0;
> > +}
> > +
> > /*
> > * Try to update any existing write request, or create one if there
> > is none.
> > * In order to match, the request's credentials must match those of
> > @@ -573,16 +607,15 @@ static inline int nfs_scan_commit(struct inode
> > *inode, struct list_head *dst, pg
> > *
> > * Note: Should always be called with the Page Lock held!
> > */
> > -static struct nfs_page * nfs_update_request(struct
> > nfs_open_context* ctx,
> > +static struct nfs_page * nfs_setup_write_request(struct
> > nfs_open_context* ctx,
> > struct page *page, unsigned int offset, unsigned int bytes)
> > {
> > - struct address_space *mapping = page->mapping;
> > - struct inode *inode = mapping->host;
> > + struct inode *inode = page->mapping->host;
> > struct nfs_page *req, *new = NULL;
> > - pgoff_t rqend, end;
> > + unsigned int end;
> > + int error;
> >
> > end = offset + bytes;
> > -
> > for (;;) {
> > /* Loop over all inode entries and see if we find
> > * A request for the page we wish to update
> > @@ -590,26 +623,16 @@ static struct nfs_page *
> > nfs_update_request(struct nfs_open_context* ctx,
> > spin_lock(&inode->i_lock);
> > req = nfs_page_find_request_locked(page);
> > if (req) {
> > - if (!nfs_set_page_tag_locked(req)) {
> > - int error;
> > -
> > - spin_unlock(&inode->i_lock);
> > + error = nfs_try_to_update_request(req, offset, end);
> > + spin_unlock(&inode->i_lock);
> > + if (error == 0)
> > + break;
> > + if (error == -EAGAIN)
> > error = nfs_wait_on_request(req);
> > - nfs_release_request(req);
> > - if (error < 0) {
> > - if (new) {
> > - radix_tree_preload_end();
> > - nfs_release_request(new);
> > - }
> > - return ERR_PTR(error);
> > - }
> > + nfs_release_request(req);
> > + if (error == 0)
> > continue;
> > - }
> > - spin_unlock(&inode->i_lock);
> > - if (new) {
> > - radix_tree_preload_end();
> > - nfs_release_request(new);
> > - }
> > + req = ERR_PTR(error);
> > break;
> > }
> >
> > @@ -632,32 +655,10 @@ static struct nfs_page *
> > nfs_update_request(struct nfs_open_context* ctx,
> > }
> > }
> >
> > - /* We have a request for our page.
> > - * If the creds don't match, or the
> > - * page addresses don't match,
> > - * tell the caller to wait on the conflicting
> > - * request.
> > - */
> > - rqend = req->wb_offset + req->wb_bytes;
> > - if (req->wb_context != ctx
> > - || req->wb_page != page
> > - || !nfs_dirty_request(req)
> > - || offset > rqend || end < req->wb_offset) {
> > - nfs_clear_page_tag_locked(req);
> > - return ERR_PTR(-EBUSY);
> > + if (new) {
> > + radix_tree_preload_end();
> > + nfs_release_request(new);
> > }
> > -
> > - /* Okay, the request matches. Update the region */
> > - if (offset < req->wb_offset) {
> > - req->wb_offset = offset;
> > - req->wb_pgbase = offset;
> > - req->wb_bytes = max(end, rqend) - req->wb_offset;
> > - goto out;
> > - }
> > -
> > - if (end > rqend)
> > - req->wb_bytes = end - req->wb_offset;
> > -
> > out:
>
> There's only one "goto out;" left in nfs_setup_write_request(). There
> are other return sites in nfs_setup_write_request() that simply
> "return req;" -- for what purpose is this one goto still here?
It could be replaced by a single 'return req'.
--
Trond Myklebust
Linux NFS client maintainer
NetApp
[email protected]
http://www.netapp.com
Trond Myklebust wrote:
> On Tue, 2008-06-17 at 17:35 -0400, Chuck Lever wrote:
>> Hi Trond-
>>
>> On Jun 16, 2008, at 3:23 PM, Trond Myklebust wrote:
>>> Simplify the loop in nfs_update_request by moving into a separate
>>> function
>>> the code that attempts to update an existing cached NFS write.
>>>
>>> Signed-off-by: Trond Myklebust <[email protected]>
>>> ---
>>>
>>> fs/nfs/write.c | 105 +++++++++++++++++++++++++++
>>> +----------------------------
>>> 1 files changed, 53 insertions(+), 52 deletions(-)
>>>
>>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>>> index 8f12157..a675dc9 100644
>>> --- a/fs/nfs/write.c
>>> +++ b/fs/nfs/write.c
>>> @@ -34,9 +34,8 @@
>>> /*
>>> * Local function declarations
>>> */
>>> -static struct nfs_page * nfs_update_request(struct nfs_open_context*,
>>> - struct page *,
>>> - unsigned int, unsigned int);
>>> +static struct nfs_page * nfs_setup_write_request(struct
>>> nfs_open_context *ctx,
>>> + struct page *page, unsigned int offset, unsigned int bytes);
>>> static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
>>> struct inode *inode, int ioflags);
>>> static void nfs_redirty_request(struct nfs_page *req);
>>> @@ -178,7 +177,7 @@ static int nfs_writepage_setup(struct
>>> nfs_open_context *ctx, struct page *page,
>>> int ret;
>>>
>>> for (;;) {
>>> - req = nfs_update_request(ctx, page, offset, count);
>>> + req = nfs_setup_write_request(ctx, page, offset, count);
>>> if (!IS_ERR(req))
>>> break;
>>> ret = PTR_ERR(req);
>>> @@ -566,6 +565,41 @@ static inline int nfs_scan_commit(struct inode
>>> *inode, struct list_head *dst, pg
>>> }
>>> #endif
>>>
>>> +static int nfs_try_to_update_request(struct nfs_page *req,
>>> + unsigned int offset,
>>> + unsigned int end)
>>> +{
>>> + unsigned int rqend;
>>> +
>>> + rqend = req->wb_offset + req->wb_bytes;
>>> +
>>> + /*
>>> + * If the page doesn't match, or the offsets
>>> + * are non-contiguous, tell the caller to
>>> + * wait on the conflicting request.
>>> + */
>>> + if (req->wb_page == NULL
>>> + || !nfs_dirty_request(req)
>>> + || offset > rqend
>>> + || end < req->wb_offset)
>>> + return -EBUSY;
>> I don't see how this is equivalent to what it replaces in
>> nfs_update_request:
>>
>>> - if (req->wb_context != ctx
>>> - || req->wb_page != page
>>
>> What happened to the context check, for example?
>
> That is a deliberate change.
>
> We hold the page lock across nfs_vm_page_mkwrite and/or
> nfs_write_begin()/nfs_write_end(), so nobody else can add a new nfs_page
> to page->private during that time.
>
> For that reason, the check that is done in nfs_flush_incompatible()
> together with the check for req->wb_page==NULL (done in
> try_to_update_page() while still holding the inode->i_lock) means that
> the above two checks are redundant.
>
> ...actually, come to think of it, the check for req->wb_page == NULL is
> redundant too, since nfs_clear_request() is called at the end of
> nfs_inode_remove_request(), and hence nfs_page_find_request_locked()
> would fail to find anything.
>
>>> +
>>> + if (!nfs_set_page_tag_locked(req))
>>> + return -EAGAIN;
>> This -EAGAIN (and the resulting logic in nfs_setup_write_request) is
>> headache-inducing. Is there a more straightforward way to break this
>> up?
>
> No. If the page is being written out, then we _must_ release all locks,
> wait on the request, and try again. This is not a change compared to the
> previous incarnation.
Right, understood... but I think the logic added here is harder to
understand and thus more prone to break when something changes than the
previous incarnation was.
>>> +
>>> + /* Okay, the request matches. Update the region */
>>> + if (offset < req->wb_offset) {
>>> + req->wb_offset = offset;
>>> + req->wb_pgbase = offset;
>>> + }
>>> + if (end > rqend)
>>> + req->wb_bytes = end - req->wb_offset;
>>> + else
>>> + req->wb_bytes = rqend - req->wb_offset;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> /*
>>> * Try to update any existing write request, or create one if there
>>> is none.
>>> * In order to match, the request's credentials must match those of
>>> @@ -573,16 +607,15 @@ static inline int nfs_scan_commit(struct inode
>>> *inode, struct list_head *dst, pg
>>> *
>>> * Note: Should always be called with the Page Lock held!
>>> */
>>> -static struct nfs_page * nfs_update_request(struct
>>> nfs_open_context* ctx,
>>> +static struct nfs_page * nfs_setup_write_request(struct
>>> nfs_open_context* ctx,
>>> struct page *page, unsigned int offset, unsigned int bytes)
>>> {
>>> - struct address_space *mapping = page->mapping;
>>> - struct inode *inode = mapping->host;
>>> + struct inode *inode = page->mapping->host;
>>> struct nfs_page *req, *new = NULL;
>>> - pgoff_t rqend, end;
>>> + unsigned int end;
>>> + int error;
>>>
>>> end = offset + bytes;
>>> -
>>> for (;;) {
>>> /* Loop over all inode entries and see if we find
>>> * A request for the page we wish to update
>>> @@ -590,26 +623,16 @@ static struct nfs_page *
>>> nfs_update_request(struct nfs_open_context* ctx,
>>> spin_lock(&inode->i_lock);
>>> req = nfs_page_find_request_locked(page);
>>> if (req) {
>>> - if (!nfs_set_page_tag_locked(req)) {
>>> - int error;
>>> -
>>> - spin_unlock(&inode->i_lock);
>>> + error = nfs_try_to_update_request(req, offset, end);
>>> + spin_unlock(&inode->i_lock);
>>> + if (error == 0)
>>> + break;
>>> + if (error == -EAGAIN)
>>> error = nfs_wait_on_request(req);
>>> - nfs_release_request(req);
>>> - if (error < 0) {
>>> - if (new) {
>>> - radix_tree_preload_end();
>>> - nfs_release_request(new);
>>> - }
>>> - return ERR_PTR(error);
>>> - }
>>> + nfs_release_request(req);
>>> + if (error == 0)
>>> continue;
>>> - }
>>> - spin_unlock(&inode->i_lock);
>>> - if (new) {
>>> - radix_tree_preload_end();
>>> - nfs_release_request(new);
>>> - }
>>> + req = ERR_PTR(error);
>>> break;
>>> }
>>>
>>> @@ -632,32 +655,10 @@ static struct nfs_page *
>>> nfs_update_request(struct nfs_open_context* ctx,
>>> }
>>> }
>>>
>>> - /* We have a request for our page.
>>> - * If the creds don't match, or the
>>> - * page addresses don't match,
>>> - * tell the caller to wait on the conflicting
>>> - * request.
>>> - */
>>> - rqend = req->wb_offset + req->wb_bytes;
>>> - if (req->wb_context != ctx
>>> - || req->wb_page != page
>>> - || !nfs_dirty_request(req)
>>> - || offset > rqend || end < req->wb_offset) {
>>> - nfs_clear_page_tag_locked(req);
>>> - return ERR_PTR(-EBUSY);
>>> + if (new) {
>>> + radix_tree_preload_end();
>>> + nfs_release_request(new);
>>> }
>>> -
>>> - /* Okay, the request matches. Update the region */
>>> - if (offset < req->wb_offset) {
>>> - req->wb_offset = offset;
>>> - req->wb_pgbase = offset;
>>> - req->wb_bytes = max(end, rqend) - req->wb_offset;
>>> - goto out;
>>> - }
>>> -
>>> - if (end > rqend)
>>> - req->wb_bytes = end - req->wb_offset;
>>> -
>>> out:
>> There's only one "goto out;" left in nfs_setup_write_request(). There
>> are other return sites in nfs_setup_write_request() that simply
>> "return req;" -- for what purpose is this one goto still here?
>
> It could be replaced by a single 'return req'.
On Wed, 2008-06-18 at 16:40 -0400, Chuck Lever wrote:
> > No. If the page is being written out, then we _must_ release all locks,
> > wait on the request, and try again. This is not a change compared to the
> > previous incarnation.
>
> Right, understood... but I think the logic added here is harder to
> understand and thus more prone to break when something changes than the
> previous incarnation was.
I don't understand: The behaviour and the results are exactly the same
as before. All that has happened is that the existing code to update a
request got shuffled into a separate function. If the request is locked,
so that we can't update it, we still wait for the request to become
unlocked, then release it and try the lookup again. There is no new
logic...
The only 'change' is a cosmetic one: the new routine has somehow to
signal to the caller that the lock attempt failed, so it returns an
error code EAGAIN to do so.
One alternative might be to move the code that waits on the lock into
nfs_try_to_update_request(), but I don't like that idea for two reasons:
1. waiting on the lock isn't related to updating the request
2. Once the request has been unlocked, you _still_ have to return
to nfs_setup_write_request() because the old request might have
completed, and been removed from the inode altogether. IOW: you
still don't get rid of the need to signal the EAGAIN.
--
Trond Myklebust
Linux NFS client maintainer
NetApp
[email protected]
http://www.netapp.com
Trond Myklebust wrote:
> On Wed, 2008-06-18 at 16:40 -0400, Chuck Lever wrote:
>>> No. If the page is being written out, then we _must_ release all locks,
>>> wait on the request, and try again. This is not a change compared to the
>>> previous incarnation.
>> Right, understood... but I think the logic added here is harder to
>> understand and thus more prone to break when something changes than the
>> previous incarnation was.
>
> I don't understand: The behaviour and the results are exactly the same
> as before. All that has happened is that the existing code to update a
> request got shuffled into a separate function. If the request is locked,
> so that we can't update it, we still wait for the request to become
> unlocked, then release it and try the lookup again. There is no new
> logic...
Sorry, I'm not being clear. I do realize that this patch is intended as
a clean up and not a behavioral change.
What I'm trying to say is the new function with the EAGAIN return code
is spaghetti. Don't get me wrong, I like Italian food.
I do agree that nfs_update_request() in its current state deserves some
clarification and clean up. I'm not sure I like the new code any more
than the old, however.
> One alternative might be to move the code that waits on the lock into
> nfs_try_to_update_request(), but I don't like that idea for two reasons:
>
> 1. waiting on the lock isn't related to updating the request
> 2. Once the request has been unlocked, you _still_ have to return
> to nfs_setup_write_request() because the old request might have
> completed, and been removed from the inode altogether. IOW: you
> still don't get rid of the need to signal the EAGAIN.
On Wed, 2008-06-18 at 18:05 -0400, Chuck Lever wrote:
> What I'm trying to say is the new function with the EAGAIN return code
> is spaghetti. Don't get me wrong, I like Italian food.
How about this version, then?
---------------------------------------------
From: Trond Myklebust <[email protected]>
Date: Fri, 13 Jun 2008 12:12:32 -0400
NFS: Clean up nfs_update_request()
Simplify the loop in nfs_update_request by moving into a separate function
the code that attempts to update an existing cached NFS write.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/write.c | 190 +++++++++++++++++++++++++++-----------------------------
1 files changed, 93 insertions(+), 97 deletions(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 21d8a48..48f9605 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -34,9 +34,6 @@
/*
* Local function declarations
*/
-static struct nfs_page * nfs_update_request(struct nfs_open_context*,
- struct page *,
- unsigned int, unsigned int);
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
struct inode *inode, int ioflags);
static void nfs_redirty_request(struct nfs_page *req);
@@ -169,30 +166,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
SetPageUptodate(page);
}
-static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
- unsigned int offset, unsigned int count)
-{
- struct nfs_page *req;
- int ret;
-
- for (;;) {
- req = nfs_update_request(ctx, page, offset, count);
- if (!IS_ERR(req))
- break;
- ret = PTR_ERR(req);
- if (ret != -EBUSY)
- return ret;
- ret = nfs_wb_page(page->mapping->host, page);
- if (ret != 0)
- return ret;
- }
- /* Update file length */
- nfs_grow_file(page, offset, count);
- nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
- nfs_clear_page_tag_locked(req);
- return 0;
-}
-
static int wb_priority(struct writeback_control *wbc)
{
if (wbc->for_reclaim)
@@ -361,6 +334,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
struct nfs_inode *nfsi = NFS_I(inode);
int error;
+ /* Lock the request! */
+ nfs_lock_request_dontget(req);
+
+ spin_lock(&inode->i_lock);
error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
BUG_ON(error);
if (!nfsi->npages) {
@@ -374,6 +351,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
kref_get(&req->wb_kref);
radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
NFS_PAGE_TAG_LOCKED);
+ spin_unlock(&inode->i_lock);
}
/*
@@ -565,101 +543,119 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg
#endif
/*
- * Try to update any existing write request, or create one if there is none.
- * In order to match, the request's credentials must match those of
- * the calling process.
+ * Search for an existing write request, and attempt to update
+ * it to reflect a new dirty region on a given page.
*
- * Note: Should always be called with the Page Lock held!
+ * If the attempt fails, then the existing request is flushed out
+ * to disk.
*/
-static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
- struct page *page, unsigned int offset, unsigned int bytes)
+static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
+ struct page *page,
+ unsigned int offset,
+ unsigned int bytes)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- struct nfs_page *req, *new = NULL;
- pgoff_t rqend, end;
+ struct nfs_page *req;
+ unsigned int rqend;
+ unsigned int end;
+ int error;
+
+ if (!PagePrivate(page))
+ return NULL;
end = offset + bytes;
+ spin_lock(&inode->i_lock);
for (;;) {
- /* Loop over all inode entries and see if we find
- * A request for the page we wish to update
- */
- spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
- if (req) {
- if (!nfs_set_page_tag_locked(req)) {
- int error;
-
- spin_unlock(&inode->i_lock);
- error = nfs_wait_on_request(req);
- nfs_release_request(req);
- if (error < 0) {
- if (new) {
- radix_tree_preload_end();
- nfs_release_request(new);
- }
- return ERR_PTR(error);
- }
- continue;
- }
- spin_unlock(&inode->i_lock);
- if (new) {
- radix_tree_preload_end();
- nfs_release_request(new);
- }
- break;
- }
+ if (req == NULL)
+ goto out_unlock;
- if (new) {
- nfs_lock_request_dontget(new);
- nfs_inode_add_request(inode, new);
- spin_unlock(&inode->i_lock);
- radix_tree_preload_end();
- req = new;
- goto out;
- }
- spin_unlock(&inode->i_lock);
+ rqend = req->wb_offset + req->wb_bytes;
+ /*
+ * Tell the caller to flush out the request if
+ * the offsets are non-contiguous.
+ */
+ if (!nfs_dirty_request(req)
+ || offset > rqend
+ || end < req->wb_offset)
+ goto out_flushme;
- new = nfs_create_request(ctx, inode, page, offset, bytes);
- if (IS_ERR(new))
- return new;
- if (radix_tree_preload(GFP_NOFS)) {
- nfs_release_request(new);
- return ERR_PTR(-ENOMEM);
- }
- }
+ if (nfs_set_page_tag_locked(req))
+ break;
- /* We have a request for our page.
- * If the creds don't match, or the
- * page addresses don't match,
- * tell the caller to wait on the conflicting
- * request.
- */
- rqend = req->wb_offset + req->wb_bytes;
- if (req->wb_context != ctx
- || req->wb_page != page
- || !nfs_dirty_request(req)
- || offset > rqend || end < req->wb_offset) {
- nfs_clear_page_tag_locked(req);
- return ERR_PTR(-EBUSY);
+ /* The request is locked, so wait and then retry */
+ spin_unlock(&inode->i_lock);
+ error = nfs_wait_on_request(req);
+ nfs_release_request(req);
+ if (error != 0)
+ goto out_err;
+ spin_lock(&inode->i_lock);
}
/* Okay, the request matches. Update the region */
if (offset < req->wb_offset) {
req->wb_offset = offset;
req->wb_pgbase = offset;
- req->wb_bytes = max(end, rqend) - req->wb_offset;
- goto out;
}
-
if (end > rqend)
req->wb_bytes = end - req->wb_offset;
+ else
+ req->wb_bytes = rqend - req->wb_offset;
+out_unlock:
+ spin_unlock(&inode->i_lock);
+ return req;
+out_flushme:
+ spin_unlock(&inode->i_lock);
+ nfs_release_request(req);
+ error = nfs_wb_page(inode, page);
+out_err:
+ return ERR_PTR(error);
+}
+
+/*
+ * Try to update an existing write request, or create one if there is none.
+ *
+ * Note: Should always be called with the Page Lock held to prevent races
+ * if we have to add a new request. Also assumes that the caller has
+ * already called nfs_flush_incompatible() if necessary.
+ */
+static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
+ struct page *page, unsigned int offset, unsigned int bytes)
+{
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *req;
+ req = nfs_try_to_update_request(inode, page, offset, bytes);
+ if (req != NULL)
+ goto out;
+ req = nfs_create_request(ctx, inode, page, offset, bytes);
+ if (IS_ERR(req))
+ goto out;
+ if (radix_tree_preload(GFP_NOFS)) {
+ nfs_release_request(req);
+ return ERR_PTR(-ENOMEM);
+ }
+ nfs_inode_add_request(inode, req);
+ radix_tree_preload_end();
out:
return req;
}
+static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
+ unsigned int offset, unsigned int count)
+{
+ struct nfs_page *req;
+
+ req = nfs_setup_write_request(ctx, page, offset, count);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ /* Update file length */
+ nfs_grow_file(page, offset, count);
+ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ nfs_clear_page_tag_locked(req);
+ return 0;
+}
+
int nfs_flush_incompatible(struct file *file, struct page *page)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
On Jun 19, 2008, at 5:22 PM, Trond Myklebust wrote:
> On Wed, 2008-06-18 at 18:05 -0400, Chuck Lever wrote:
>> What I'm trying to say is the new function with the EAGAIN return
>> code
>> is spaghetti. Don't get me wrong, I like Italian food.
>
> How about this version, then?
At first glance this looks a whole lot nicer. I'll take a closer look
later today.
> ---------------------------------------------
> From: Trond Myklebust <[email protected]>
> Date: Fri, 13 Jun 2008 12:12:32 -0400
> NFS: Clean up nfs_update_request()
>
> Simplify the loop in nfs_update_request by moving into a separate
> function
> the code that attempts to update an existing cached NFS write.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
>
> fs/nfs/write.c | 190 ++++++++++++++++++++++++++
> +-----------------------------
> 1 files changed, 93 insertions(+), 97 deletions(-)
>
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 21d8a48..48f9605 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -34,9 +34,6 @@
> /*
> * Local function declarations
> */
> -static struct nfs_page * nfs_update_request(struct nfs_open_context*,
> - struct page *,
> - unsigned int, unsigned int);
> static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
> struct inode *inode, int ioflags);
> static void nfs_redirty_request(struct nfs_page *req);
> @@ -169,30 +166,6 @@ static void nfs_mark_uptodate(struct page
> *page, unsigned int base, unsigned int
> SetPageUptodate(page);
> }
>
> -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct
> page *page,
> - unsigned int offset, unsigned int count)
> -{
> - struct nfs_page *req;
> - int ret;
> -
> - for (;;) {
> - req = nfs_update_request(ctx, page, offset, count);
> - if (!IS_ERR(req))
> - break;
> - ret = PTR_ERR(req);
> - if (ret != -EBUSY)
> - return ret;
> - ret = nfs_wb_page(page->mapping->host, page);
> - if (ret != 0)
> - return ret;
> - }
> - /* Update file length */
> - nfs_grow_file(page, offset, count);
> - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
> - nfs_clear_page_tag_locked(req);
> - return 0;
> -}
> -
> static int wb_priority(struct writeback_control *wbc)
> {
> if (wbc->for_reclaim)
> @@ -361,6 +334,10 @@ static void nfs_inode_add_request(struct inode
> *inode, struct nfs_page *req)
> struct nfs_inode *nfsi = NFS_I(inode);
> int error;
>
> + /* Lock the request! */
> + nfs_lock_request_dontget(req);
> +
> + spin_lock(&inode->i_lock);
> error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
> BUG_ON(error);
> if (!nfsi->npages) {
> @@ -374,6 +351,7 @@ static void nfs_inode_add_request(struct inode
> *inode, struct nfs_page *req)
> kref_get(&req->wb_kref);
> radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
> NFS_PAGE_TAG_LOCKED);
> + spin_unlock(&inode->i_lock);
> }
>
> /*
> @@ -565,101 +543,119 @@ static inline int nfs_scan_commit(struct
> inode *inode, struct list_head *dst, pg
> #endif
>
> /*
> - * Try to update any existing write request, or create one if there
> is none.
> - * In order to match, the request's credentials must match those of
> - * the calling process.
> + * Search for an existing write request, and attempt to update
> + * it to reflect a new dirty region on a given page.
> *
> - * Note: Should always be called with the Page Lock held!
> + * If the attempt fails, then the existing request is flushed out
> + * to disk.
> */
> -static struct nfs_page * nfs_update_request(struct
> nfs_open_context* ctx,
> - struct page *page, unsigned int offset, unsigned int bytes)
> +static struct nfs_page *nfs_try_to_update_request(struct inode
> *inode,
> + struct page *page,
> + unsigned int offset,
> + unsigned int bytes)
> {
> - struct address_space *mapping = page->mapping;
> - struct inode *inode = mapping->host;
> - struct nfs_page *req, *new = NULL;
> - pgoff_t rqend, end;
> + struct nfs_page *req;
> + unsigned int rqend;
> + unsigned int end;
> + int error;
> +
> + if (!PagePrivate(page))
> + return NULL;
>
> end = offset + bytes;
> + spin_lock(&inode->i_lock);
>
> for (;;) {
> - /* Loop over all inode entries and see if we find
> - * A request for the page we wish to update
> - */
> - spin_lock(&inode->i_lock);
> req = nfs_page_find_request_locked(page);
> - if (req) {
> - if (!nfs_set_page_tag_locked(req)) {
> - int error;
> -
> - spin_unlock(&inode->i_lock);
> - error = nfs_wait_on_request(req);
> - nfs_release_request(req);
> - if (error < 0) {
> - if (new) {
> - radix_tree_preload_end();
> - nfs_release_request(new);
> - }
> - return ERR_PTR(error);
> - }
> - continue;
> - }
> - spin_unlock(&inode->i_lock);
> - if (new) {
> - radix_tree_preload_end();
> - nfs_release_request(new);
> - }
> - break;
> - }
> + if (req == NULL)
> + goto out_unlock;
>
> - if (new) {
> - nfs_lock_request_dontget(new);
> - nfs_inode_add_request(inode, new);
> - spin_unlock(&inode->i_lock);
> - radix_tree_preload_end();
> - req = new;
> - goto out;
> - }
> - spin_unlock(&inode->i_lock);
> + rqend = req->wb_offset + req->wb_bytes;
> + /*
> + * Tell the caller to flush out the request if
> + * the offsets are non-contiguous.
> + */
> + if (!nfs_dirty_request(req)
> + || offset > rqend
> + || end < req->wb_offset)
> + goto out_flushme;
>
> - new = nfs_create_request(ctx, inode, page, offset, bytes);
> - if (IS_ERR(new))
> - return new;
> - if (radix_tree_preload(GFP_NOFS)) {
> - nfs_release_request(new);
> - return ERR_PTR(-ENOMEM);
> - }
> - }
> + if (nfs_set_page_tag_locked(req))
> + break;
>
> - /* We have a request for our page.
> - * If the creds don't match, or the
> - * page addresses don't match,
> - * tell the caller to wait on the conflicting
> - * request.
> - */
> - rqend = req->wb_offset + req->wb_bytes;
> - if (req->wb_context != ctx
> - || req->wb_page != page
> - || !nfs_dirty_request(req)
> - || offset > rqend || end < req->wb_offset) {
> - nfs_clear_page_tag_locked(req);
> - return ERR_PTR(-EBUSY);
> + /* The request is locked, so wait and then retry */
> + spin_unlock(&inode->i_lock);
> + error = nfs_wait_on_request(req);
> + nfs_release_request(req);
> + if (error != 0)
> + goto out_err;
> + spin_lock(&inode->i_lock);
> }
>
> /* Okay, the request matches. Update the region */
> if (offset < req->wb_offset) {
> req->wb_offset = offset;
> req->wb_pgbase = offset;
> - req->wb_bytes = max(end, rqend) - req->wb_offset;
> - goto out;
> }
> -
> if (end > rqend)
> req->wb_bytes = end - req->wb_offset;
> + else
> + req->wb_bytes = rqend - req->wb_offset;
> +out_unlock:
> + spin_unlock(&inode->i_lock);
> + return req;
> +out_flushme:
> + spin_unlock(&inode->i_lock);
> + nfs_release_request(req);
> + error = nfs_wb_page(inode, page);
> +out_err:
> + return ERR_PTR(error);
> +}
> +
> +/*
> + * Try to update an existing write request, or create one if there
> is none.
> + *
> + * Note: Should always be called with the Page Lock held to prevent
> races
> + * if we have to add a new request. Also assumes that the caller has
> + * already called nfs_flush_incompatible() if necessary.
> + */
> +static struct nfs_page * nfs_setup_write_request(struct
> nfs_open_context* ctx,
> + struct page *page, unsigned int offset, unsigned int bytes)
> +{
> + struct inode *inode = page->mapping->host;
> + struct nfs_page *req;
>
> + req = nfs_try_to_update_request(inode, page, offset, bytes);
> + if (req != NULL)
> + goto out;
> + req = nfs_create_request(ctx, inode, page, offset, bytes);
> + if (IS_ERR(req))
> + goto out;
> + if (radix_tree_preload(GFP_NOFS)) {
> + nfs_release_request(req);
> + return ERR_PTR(-ENOMEM);
> + }
> + nfs_inode_add_request(inode, req);
> + radix_tree_preload_end();
> out:
> return req;
> }
>
> +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct
> page *page,
> + unsigned int offset, unsigned int count)
> +{
> + struct nfs_page *req;
> +
> + req = nfs_setup_write_request(ctx, page, offset, count);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> + /* Update file length */
> + nfs_grow_file(page, offset, count);
> + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
> + nfs_clear_page_tag_locked(req);
> + return 0;
> +}
> +
> int nfs_flush_incompatible(struct file *file, struct page *page)
> {
> struct nfs_open_context *ctx = nfs_file_open_context(file);
>
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
Trond Myklebust wrote:
> On Wed, 2008-06-18 at 18:05 -0400, Chuck Lever wrote:
>> What I'm trying to say is the new function with the EAGAIN return code
>> is spaghetti. Don't get me wrong, I like Italian food.
>
> How about this version, then?
I like this much better; it is much more straightforward.
Minor comments below.
> ---------------------------------------------
> From: Trond Myklebust <[email protected]>
> Date: Fri, 13 Jun 2008 12:12:32 -0400
> NFS: Clean up nfs_update_request()
>
> Simplify the loop in nfs_update_request by moving into a separate function
> the code that attempts to update an existing cached NFS write.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
>
> fs/nfs/write.c | 190 +++++++++++++++++++++++++++-----------------------------
> 1 files changed, 93 insertions(+), 97 deletions(-)
>
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 21d8a48..48f9605 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -34,9 +34,6 @@
> /*
> * Local function declarations
> */
> -static struct nfs_page * nfs_update_request(struct nfs_open_context*,
> - struct page *,
> - unsigned int, unsigned int);
> static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
> struct inode *inode, int ioflags);
> static void nfs_redirty_request(struct nfs_page *req);
> @@ -169,30 +166,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
> SetPageUptodate(page);
> }
>
> -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
> - unsigned int offset, unsigned int count)
> -{
> - struct nfs_page *req;
> - int ret;
> -
> - for (;;) {
> - req = nfs_update_request(ctx, page, offset, count);
> - if (!IS_ERR(req))
> - break;
> - ret = PTR_ERR(req);
> - if (ret != -EBUSY)
> - return ret;
> - ret = nfs_wb_page(page->mapping->host, page);
> - if (ret != 0)
> - return ret;
> - }
> - /* Update file length */
> - nfs_grow_file(page, offset, count);
> - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
> - nfs_clear_page_tag_locked(req);
> - return 0;
> -}
> -
> static int wb_priority(struct writeback_control *wbc)
> {
> if (wbc->for_reclaim)
> @@ -361,6 +334,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
> struct nfs_inode *nfsi = NFS_I(inode);
> int error;
>
> + /* Lock the request! */
> + nfs_lock_request_dontget(req);
> +
> + spin_lock(&inode->i_lock);
> error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
> BUG_ON(error);
> if (!nfsi->npages) {
> @@ -374,6 +351,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
> kref_get(&req->wb_kref);
> radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
> NFS_PAGE_TAG_LOCKED);
> + spin_unlock(&inode->i_lock);
> }
>
> /*
> @@ -565,101 +543,119 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pg
> #endif
>
> /*
> - * Try to update any existing write request, or create one if there is none.
> - * In order to match, the request's credentials must match those of
> - * the calling process.
> + * Search for an existing write request, and attempt to update
> + * it to reflect a new dirty region on a given page.
> *
> - * Note: Should always be called with the Page Lock held!
> + * If the attempt fails, then the existing request is flushed out
> + * to disk.
> */
> -static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
> - struct page *page, unsigned int offset, unsigned int bytes)
> +static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
> + struct page *page,
> + unsigned int offset,
> + unsigned int bytes)
> {
> - struct address_space *mapping = page->mapping;
> - struct inode *inode = mapping->host;
> - struct nfs_page *req, *new = NULL;
> - pgoff_t rqend, end;
> + struct nfs_page *req;
> + unsigned int rqend;
> + unsigned int end;
> + int error;
> +
> + if (!PagePrivate(page))
> + return NULL;
>
> end = offset + bytes;
> + spin_lock(&inode->i_lock);
>
> for (;;) {
> - /* Loop over all inode entries and see if we find
> - * A request for the page we wish to update
> - */
> - spin_lock(&inode->i_lock);
> req = nfs_page_find_request_locked(page);
> - if (req) {
> - if (!nfs_set_page_tag_locked(req)) {
> - int error;
> -
> - spin_unlock(&inode->i_lock);
> - error = nfs_wait_on_request(req);
> - nfs_release_request(req);
> - if (error < 0) {
> - if (new) {
> - radix_tree_preload_end();
> - nfs_release_request(new);
> - }
> - return ERR_PTR(error);
> - }
> - continue;
> - }
> - spin_unlock(&inode->i_lock);
> - if (new) {
> - radix_tree_preload_end();
> - nfs_release_request(new);
> - }
> - break;
> - }
> + if (req == NULL)
> + goto out_unlock;
>
> - if (new) {
> - nfs_lock_request_dontget(new);
> - nfs_inode_add_request(inode, new);
> - spin_unlock(&inode->i_lock);
> - radix_tree_preload_end();
> - req = new;
> - goto out;
> - }
> - spin_unlock(&inode->i_lock);
> + rqend = req->wb_offset + req->wb_bytes;
> + /*
> + * Tell the caller to flush out the request if
> + * the offsets are non-contiguous.
> + */
> + if (!nfs_dirty_request(req)
> + || offset > rqend
> + || end < req->wb_offset)
> + goto out_flushme;
I asked about this in my previous comments. The removal of the context
check here deserves some explication in the patch description, IMO. It
would need nothing more than copying your reply to my question into the
patch description.
>
> - new = nfs_create_request(ctx, inode, page, offset, bytes);
> - if (IS_ERR(new))
> - return new;
> - if (radix_tree_preload(GFP_NOFS)) {
> - nfs_release_request(new);
> - return ERR_PTR(-ENOMEM);
> - }
> - }
> + if (nfs_set_page_tag_locked(req))
> + break;
>
> - /* We have a request for our page.
> - * If the creds don't match, or the
> - * page addresses don't match,
> - * tell the caller to wait on the conflicting
> - * request.
> - */
> - rqend = req->wb_offset + req->wb_bytes;
> - if (req->wb_context != ctx
> - || req->wb_page != page
> - || !nfs_dirty_request(req)
> - || offset > rqend || end < req->wb_offset) {
> - nfs_clear_page_tag_locked(req);
> - return ERR_PTR(-EBUSY);
> + /* The request is locked, so wait and then retry */
> + spin_unlock(&inode->i_lock);
> + error = nfs_wait_on_request(req);
> + nfs_release_request(req);
> + if (error != 0)
> + goto out_err;
> + spin_lock(&inode->i_lock);
> }
>
> /* Okay, the request matches. Update the region */
> if (offset < req->wb_offset) {
> req->wb_offset = offset;
> req->wb_pgbase = offset;
> - req->wb_bytes = max(end, rqend) - req->wb_offset;
> - goto out;
> }
> -
> if (end > rqend)
> req->wb_bytes = end - req->wb_offset;
> + else
> + req->wb_bytes = rqend - req->wb_offset;
> +out_unlock:
> + spin_unlock(&inode->i_lock);
> + return req;
> +out_flushme:
> + spin_unlock(&inode->i_lock);
> + nfs_release_request(req);
> + error = nfs_wb_page(inode, page);
> +out_err:
> + return ERR_PTR(error);
> +}
> +
> +/*
> + * Try to update an existing write request, or create one if there is none.
> + *
> + * Note: Should always be called with the Page Lock held to prevent races
> + * if we have to add a new request. Also assumes that the caller has
> + * already called nfs_flush_incompatible() if necessary.
> + */
> +static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
> + struct page *page, unsigned int offset, unsigned int bytes)
> +{
> + struct inode *inode = page->mapping->host;
> + struct nfs_page *req;
>
> + req = nfs_try_to_update_request(inode, page, offset, bytes);
> + if (req != NULL)
> + goto out;
> + req = nfs_create_request(ctx, inode, page, offset, bytes);
> + if (IS_ERR(req))
> + goto out;
> + if (radix_tree_preload(GFP_NOFS)) {
> + nfs_release_request(req);
> + return ERR_PTR(-ENOMEM);
> + }
> + nfs_inode_add_request(inode, req);
> + radix_tree_preload_end();
Nit: You might gain additional code clarity here by moving these
radix_tree operations into nfs_inode_add_request(). It appears that
nfs_setup_write_request() is the only caller of nfs_inode_add_request().
> out:
> return req;
> }
>
> +static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
> + unsigned int offset, unsigned int count)
> +{
> + struct nfs_page *req;
> +
> + req = nfs_setup_write_request(ctx, page, offset, count);
> + if (IS_ERR(req))
> + return PTR_ERR(req);
> + /* Update file length */
> + nfs_grow_file(page, offset, count);
> + nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
> + nfs_clear_page_tag_locked(req);
> + return 0;
> +}
> +
> int nfs_flush_incompatible(struct file *file, struct page *page)
> {
> struct nfs_open_context *ctx = nfs_file_open_context(file);
>