The following fixes result from Andy latest patchset to change
nfs_inode.layout into a pointer and reworking the layout allocation scheme.
[PATCH 1/2] SQUASHME: pnfs-submit: check has_layout in __nfs4_close
This fixes a NULL dereference on __nfs4_closer when the layout is NULL.
[PATCH 2/2] SQUASHME: pnfs-submit: clean up nfs_lock_alloc_layout
This fixes a BUG_ON where nfs_lock_alloc_layout did not lock the inode
if the layout was already allocated.
pnfs_layout_roc_iomode may hit NULL deref if layout is NULL.
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfs/nfs4state.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8734688..724a963 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -594,8 +594,11 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
} else {
- u32 roc_iomode = pnfs_layout_roc_iomode(NFS_I(state->inode));
- if (roc_iomode) {
+ u32 roc_iomode;
+ struct nfs_inode *nfsi = NFS_I(state->inode);
+
+ if (has_layout(nfsi) &&
+ (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
struct nfs4_pnfs_layout_segment range = {
.iomode = roc_iomode,
.offset = 0,
--
1.7.1.1
Fix a bug where the function returned without taking the i_lock
if the layout hdr was already allocated.
Simplify by moving inode locking to caller.
Rename function as it no longer grabs the lock.
Clean up the implementation so it's clearer what's going on
and what are the likely cases vs. the unlikely ones.
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfs/pnfs.c | 42 ++++++++++++++++++++++--------------------
1 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4ba7595..053a5c1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -938,36 +938,37 @@ alloc_init_layout(struct inode *ino)
}
/*
- * Lock and possibly allocate the inode layout
+ * Retrieve and possibly allocate the inode layout
*
- * If successful, ino->i_lock is taken, and the caller must unlock.
+ * ino->i_lock must be taken by the caller.
*/
static struct pnfs_layout_type *
-nfs_lock_alloc_layout(struct inode *ino)
+pnfs_alloc_layout(struct inode *ino)
{
+ struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_type *new = NULL;
- dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, NFS_I(ino)->layout);
+ dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
- if (NFS_I(ino)->layout == NULL) {
- new = alloc_init_layout(ino);
- if (new == NULL)
- return NULL;
- spin_lock(&ino->i_lock);
- if (NFS_I(ino)->layout == NULL) {
- NFS_I(ino)->layout = new;
- new = NULL;
- }
- }
- if (new) {
+ BUG_ON(!spin_is_locked(&ino->i_lock));
+ if (likely(nfsi->layout))
+ return nfsi->layout;
+
+ spin_unlock(&ino->i_lock);
+ new = alloc_init_layout(ino);
+ spin_lock(&ino->i_lock);
+
+ if (likely(nfsi->layout == NULL)) { /* Won the race? */
+ nfsi->layout = new;
+ } else if (new) {
/* Reference the layout accross i_lock release and grab */
- get_layout(NFS_I(ino)->layout);
+ get_layout(nfsi->layout);
spin_unlock(&ino->i_lock);
NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
spin_lock(&ino->i_lock);
- put_layout_locked(NFS_I(ino)->layout);
+ put_layout_locked(nfsi->layout);
}
- return NFS_I(ino)->layout;
+ return nfsi->layout;
}
/*
@@ -1055,10 +1056,11 @@ _pnfs_update_layout(struct inode *ino,
if (take_ref)
*lsegpp = NULL;
- lo = nfs_lock_alloc_layout(ino);
+ spin_lock(&ino->i_lock);
+ lo = pnfs_alloc_layout(ino);
if (lo == NULL) {
dprintk("%s ERROR: can't get pnfs_layout_type\n", __func__);
- goto out;
+ goto out_unlock;
}
/* Check to see if the layout for the given range already exists */
--
1.7.1.1
On Mon, Jul 12, 2010 at 2:40 PM, Benny Halevy <[email protected]> wro=
te:
> Fix a bug where the function returned without taking the i_lock
> if the layout hdr was already allocated.
> Simplify by moving inode locking to caller.
No, the original function I sent had no such bug.
>
> Rename function as it no longer grabs the lock.
> Clean up the implementation so it's clearer what's going on
> and what are the likely cases vs. the unlikely ones.
I do not think this is any clearer!
>
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> =A0fs/nfs/pnfs.c | =A0 42 ++++++++++++++++++++++--------------------
> =A01 files changed, 22 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 4ba7595..053a5c1 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -938,36 +938,37 @@ alloc_init_layout(struct inode *ino)
> =A0}
>
> =A0/*
> - * Lock and possibly allocate the inode layout
> + * Retrieve and possibly allocate the inode layout
> =A0*
> - * If successful, ino->i_lock is taken, and the caller must unlock.
> + * ino->i_lock must be taken by the caller.
> =A0*/
> =A0static struct pnfs_layout_type *
> -nfs_lock_alloc_layout(struct inode *ino)
> +pnfs_alloc_layout(struct inode *ino)
> =A0{
> + =A0 =A0 =A0 struct nfs_inode *nfsi =3D NFS_I(ino);
> =A0 =A0 =A0 =A0struct pnfs_layout_type *new =3D NULL;
>
> - =A0 =A0 =A0 dprintk("%s Begin ino=3D%p layout=3D%p\n", __func__, in=
o, NFS_I(ino)->layout);
> + =A0 =A0 =A0 dprintk("%s Begin ino=3D%p layout=3D%p\n", __func__, in=
o, nfsi->layout);
>
> - =A0 =A0 =A0 if (NFS_I(ino)->layout =3D=3D NULL) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D alloc_init_layout(ino);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (new =3D=3D NULL)
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return NULL;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&ino->i_lock);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (NFS_I(ino)->layout =3D=3D NULL) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 NFS_I(ino)->layout =3D =
new;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D NULL;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
> - =A0 =A0 =A0 }
> - =A0 =A0 =A0 if (new) {
> + =A0 =A0 =A0 BUG_ON(!spin_is_locked(&ino->i_lock));
> + =A0 =A0 =A0 if (likely(nfsi->layout))
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return nfsi->layout;
> +
> + =A0 =A0 =A0 spin_unlock(&ino->i_lock);
> + =A0 =A0 =A0 new =3D alloc_init_layout(ino);
> + =A0 =A0 =A0 spin_lock(&ino->i_lock);
> +
> + =A0 =A0 =A0 if (likely(nfsi->layout =3D=3D NULL)) { =A0 =A0 /* Won =
the race? */
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 nfsi->layout =3D new;
> + =A0 =A0 =A0 } else if (new) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* Reference the layout accross i_lock=
release and grab */
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 get_layout(NFS_I(ino)->layout);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 get_layout(nfsi->layout);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_unlock(&ino->i_lock);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0NFS_SERVER(ino)->pnfs_curr_ld->ld_io_o=
ps->free_layout(new);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_lock(&ino->i_lock);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_layout_locked(NFS_I(ino)->layout);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_layout_locked(nfsi->layout);
> =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 return NFS_I(ino)->layout;
> + =A0 =A0 =A0 return nfsi->layout;
> =A0}
>
> =A0/*
> @@ -1055,10 +1056,11 @@ _pnfs_update_layout(struct inode *ino,
>
> =A0 =A0 =A0 =A0if (take_ref)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*lsegpp =3D NULL;
> - =A0 =A0 =A0 lo =3D nfs_lock_alloc_layout(ino);
> + =A0 =A0 =A0 spin_lock(&ino->i_lock);
> + =A0 =A0 =A0 lo =3D pnfs_alloc_layout(ino);
> =A0 =A0 =A0 =A0if (lo =3D=3D NULL) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s ERROR: can't get pnfs_layo=
ut_type\n", __func__);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out_unlock;
> =A0 =A0 =A0 =A0}
>
> =A0 =A0 =A0 =A0/* Check to see if the layout for the given range alre=
ady exists */
> --
> 1.7.1.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to [email protected]
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>
On Jul. 13, 2010, 16:44 +0300, "William A. (Andy) Adamson" <[email protected]> wrote:
> On Mon, Jul 12, 2010 at 2:40 PM, Benny Halevy <[email protected]> wrote:
>> Fix a bug where the function returned without taking the i_lock
>> if the layout hdr was already allocated.
>> Simplify by moving inode locking to caller.
>
> No, the original function I sent had no such bug.
>
True. It was my bad.
>>
>> Rename function as it no longer grabs the lock.
>> Clean up the implementation so it's clearer what's going on
>> and what are the likely cases vs. the unlikely ones.
>
> I do not think this is any clearer!
>
I think that getting the lock by the caller is simpler
than having the callee take it, but it doesn't matter that much.
My main problems with your patch were:
a. usage of the 'new' variable, setting it to NULL if it was used
rather than using simple if/else logic.
b. if alloc_init_layout failed after releasing the lock
the function always returned NULL, even if someone else
was able to allocate it in parallel (very unlikely, but possible)
c. the fast path had to go through 2 unlikely if's
Benny
>>
>> Signed-off-by: Benny Halevy <[email protected]>
>> ---
>> fs/nfs/pnfs.c | 42 ++++++++++++++++++++++--------------------
>> 1 files changed, 22 insertions(+), 20 deletions(-)
>>
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 4ba7595..053a5c1 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -938,36 +938,37 @@ alloc_init_layout(struct inode *ino)
>> }
>>
>> /*
>> - * Lock and possibly allocate the inode layout
>> + * Retrieve and possibly allocate the inode layout
>> *
>> - * If successful, ino->i_lock is taken, and the caller must unlock.
>> + * ino->i_lock must be taken by the caller.
>> */
>> static struct pnfs_layout_type *
>> -nfs_lock_alloc_layout(struct inode *ino)
>> +pnfs_alloc_layout(struct inode *ino)
>> {
>> + struct nfs_inode *nfsi = NFS_I(ino);
>> struct pnfs_layout_type *new = NULL;
>>
>> - dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, NFS_I(ino)->layout);
>> + dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
>>
>> - if (NFS_I(ino)->layout == NULL) {
>> - new = alloc_init_layout(ino);
>> - if (new == NULL)
>> - return NULL;
>> - spin_lock(&ino->i_lock);
>> - if (NFS_I(ino)->layout == NULL) {
>> - NFS_I(ino)->layout = new;
>> - new = NULL;
>> - }
>> - }
>> - if (new) {
>> + BUG_ON(!spin_is_locked(&ino->i_lock));
>> + if (likely(nfsi->layout))
>> + return nfsi->layout;
>> +
>> + spin_unlock(&ino->i_lock);
>> + new = alloc_init_layout(ino);
>> + spin_lock(&ino->i_lock);
>> +
>> + if (likely(nfsi->layout == NULL)) { /* Won the race? */
>> + nfsi->layout = new;
>> + } else if (new) {
>> /* Reference the layout accross i_lock release and grab */
>> - get_layout(NFS_I(ino)->layout);
>> + get_layout(nfsi->layout);
>> spin_unlock(&ino->i_lock);
>> NFS_SERVER(ino)->pnfs_curr_ld->ld_io_ops->free_layout(new);
>> spin_lock(&ino->i_lock);
>> - put_layout_locked(NFS_I(ino)->layout);
>> + put_layout_locked(nfsi->layout);
>> }
>> - return NFS_I(ino)->layout;
>> + return nfsi->layout;
>> }
>>
>> /*
>> @@ -1055,10 +1056,11 @@ _pnfs_update_layout(struct inode *ino,
>>
>> if (take_ref)
>> *lsegpp = NULL;
>> - lo = nfs_lock_alloc_layout(ino);
>> + spin_lock(&ino->i_lock);
>> + lo = pnfs_alloc_layout(ino);
>> if (lo == NULL) {
>> dprintk("%s ERROR: can't get pnfs_layout_type\n", __func__);
>> - goto out;
>> + goto out_unlock;
>> }
>>
>> /* Check to see if the layout for the given range already exists */
>> --
>> 1.7.1.1
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jul 13, 2010 at 10:02 AM, Benny Halevy <[email protected]> wr=
ote:
> On Jul. 13, 2010, 16:44 +0300, "William A. (Andy) Adamson" <androsada=
[email protected]> wrote:
>> On Mon, Jul 12, 2010 at 2:40 PM, Benny Halevy <[email protected]> =
wrote:
>>> Fix a bug where the function returned without taking the i_lock
>>> if the layout hdr was already allocated.
>>> Simplify by moving inode locking to caller.
>>
>> No, the original function I sent had no such bug.
>>
>
> True. =A0It was my bad.
>
>>>
>>> Rename function as it no longer grabs the lock.
>>> Clean up the implementation so it's clearer what's going on
>>> and what are the likely cases vs. the unlikely ones.
>>
>> I do not think this is any clearer!
>>
>
> I think that getting the lock by the caller is simpler
> than having the callee take it, but it doesn't matter that much.
>
> My main problems with your patch were:
> a. usage of the 'new' variable, setting it to NULL if it was used
> rather than using simple if/else logic.
>
> b. if alloc_init_layout failed after releasing the lock
> the function always returned NULL, even if someone else
> was able to allocate it in parallel (very unlikely, but possible)
:)
>
> c. the fast path had to go through 2 unlikely if's
Absolutely not concerned with fast path as almost always occurs once
per inode until umount (unless server reboots, network partition,
migration or use of another replica)
OK, I guess it doesn't matter that much. It just seems like a
re-arrange for very little reason.
-->Andy
>
> Benny
>
>>>
>>> Signed-off-by: Benny Halevy <[email protected]>
>>> ---
>>> =A0fs/nfs/pnfs.c | =A0 42 ++++++++++++++++++++++-------------------=
-
>>> =A01 files changed, 22 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index 4ba7595..053a5c1 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -938,36 +938,37 @@ alloc_init_layout(struct inode *ino)
>>> =A0}
>>>
>>> =A0/*
>>> - * Lock and possibly allocate the inode layout
>>> + * Retrieve and possibly allocate the inode layout
>>> =A0*
>>> - * If successful, ino->i_lock is taken, and the caller must unlock=
=2E
>>> + * ino->i_lock must be taken by the caller.
>>> =A0*/
>>> =A0static struct pnfs_layout_type *
>>> -nfs_lock_alloc_layout(struct inode *ino)
>>> +pnfs_alloc_layout(struct inode *ino)
>>> =A0{
>>> + =A0 =A0 =A0 struct nfs_inode *nfsi =3D NFS_I(ino);
>>> =A0 =A0 =A0 =A0struct pnfs_layout_type *new =3D NULL;
>>>
>>> - =A0 =A0 =A0 dprintk("%s Begin ino=3D%p layout=3D%p\n", __func__, =
ino, NFS_I(ino)->layout);
>>> + =A0 =A0 =A0 dprintk("%s Begin ino=3D%p layout=3D%p\n", __func__, =
ino, nfsi->layout);
>>>
>>> - =A0 =A0 =A0 if (NFS_I(ino)->layout =3D=3D NULL) {
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D alloc_init_layout(ino);
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (new =3D=3D NULL)
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return NULL;
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&ino->i_lock);
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (NFS_I(ino)->layout =3D=3D NULL) {
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 NFS_I(ino)->layout =3D=
new;
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D NULL;
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>>> - =A0 =A0 =A0 }
>>> - =A0 =A0 =A0 if (new) {
>>> + =A0 =A0 =A0 BUG_ON(!spin_is_locked(&ino->i_lock));
>>> + =A0 =A0 =A0 if (likely(nfsi->layout))
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return nfsi->layout;
>>> +
>>> + =A0 =A0 =A0 spin_unlock(&ino->i_lock);
>>> + =A0 =A0 =A0 new =3D alloc_init_layout(ino);
>>> + =A0 =A0 =A0 spin_lock(&ino->i_lock);
>>> +
>>> + =A0 =A0 =A0 if (likely(nfsi->layout =3D=3D NULL)) { =A0 =A0 /* Wo=
n the race? */
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 nfsi->layout =3D new;
>>> + =A0 =A0 =A0 } else if (new) {
>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* Reference the layout accross i_lo=
ck release and grab */
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 get_layout(NFS_I(ino)->layout);
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 get_layout(nfsi->layout);
>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_unlock(&ino->i_lock);
>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0NFS_SERVER(ino)->pnfs_curr_ld->ld_io=
_ops->free_layout(new);
>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_lock(&ino->i_lock);
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_layout_locked(NFS_I(ino)->layout)=
;
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_layout_locked(nfsi->layout);
>>> =A0 =A0 =A0 =A0}
>>> - =A0 =A0 =A0 return NFS_I(ino)->layout;
>>> + =A0 =A0 =A0 return nfsi->layout;
>>> =A0}
>>>
>>> =A0/*
>>> @@ -1055,10 +1056,11 @@ _pnfs_update_layout(struct inode *ino,
>>>
>>> =A0 =A0 =A0 =A0if (take_ref)
>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*lsegpp =3D NULL;
>>> - =A0 =A0 =A0 lo =3D nfs_lock_alloc_layout(ino);
>>> + =A0 =A0 =A0 spin_lock(&ino->i_lock);
>>> + =A0 =A0 =A0 lo =3D pnfs_alloc_layout(ino);
>>> =A0 =A0 =A0 =A0if (lo =3D=3D NULL) {
>>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s ERROR: can't get pnfs_la=
yout_type\n", __func__);
>>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out_unlock;
>>> =A0 =A0 =A0 =A0}
>>>
>>> =A0 =A0 =A0 =A0/* Check to see if the layout for the given range al=
ready exists */
>>> --
>>> 1.7.1.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs=
" in
>>> the body of a message to [email protected]
>>> More majordomo info at =A0http://vger.kernel.org/majordomo-info.htm=
l
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs"=
in
>> the body of a message to [email protected]
>> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>