2016-06-10 20:49:07

by Benjamin Coddington

[permalink] [raw]
Subject: [PATCH] pnfs/blocklayout: put deviceid node after releasing bl_ext_lock

The last put of deviceid nodes for SCSI layouts may sleep, so we shouldn't
hold any spinlocks. Make sure we put them outside the bl_ext_lock.

Signed-off-by: Benjamin Coddington <[email protected]>
---
fs/nfs/blocklayout/extent_tree.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 720b3ff55fa9..992bcb19c11e 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
return be;
}

+static void __ext_put_deviceids(struct list_head *head)
+{
+ struct pnfs_block_extent *be, *tmp;
+
+ list_for_each_entry_safe(be, tmp, head, be_list) {
+ nfs4_put_deviceid_node(be->be_device);
+ kfree(be);
+ }
+}
+
static void
__ext_tree_insert(struct rb_root *root,
struct pnfs_block_extent *new, bool merge_ok)
@@ -163,7 +173,8 @@ free_new:
}

static int
-__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
+__ext_tree_remove(struct rb_root *root,
+ sector_t start, sector_t end, struct list_head *tmp)
{
struct pnfs_block_extent *be;
sector_t len1 = 0, len2 = 0;
@@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
struct pnfs_block_extent *next = ext_tree_next(be);

rb_erase(&be->be_node, root);
- nfs4_put_deviceid_node(be->be_device);
- kfree(be);
+ list_add_tail(&be->be_list, tmp);
be = next;
}

@@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,
sector_t start, sector_t end)
{
int err, err2;
+ LIST_HEAD(tmp);

spin_lock(&bl->bl_ext_lock);
- err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
+ err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
if (rw) {
- err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
+ err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
if (!err)
err = err2;
}
spin_unlock(&bl->bl_ext_lock);

+ __ext_put_deviceids(&tmp);
return err;
}

@@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t end = start + len;
struct pnfs_block_extent *be;
int err = 0;
+ LIST_HEAD(tmp);

spin_lock(&bl->bl_ext_lock);
/*
* First remove all COW extents or holes from written to range.
*/
- err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
+ err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
if (err)
goto out;

@@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
}
out:
spin_unlock(&bl->bl_ext_lock);
+
+ __ext_put_deviceids(&tmp);
return err;
}

--
2.5.5



2016-06-13 14:53:00

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] pnfs/blocklayout: put deviceid node after releasing bl_ext_lock

On Fri, Jun 10, 2016 at 04:37:35PM -0400, Benjamin Coddington wrote:
> The last put of deviceid nodes for SCSI layouts may sleep, so we shouldn't
> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>
> Signed-off-by: Benjamin Coddington <[email protected]>

Thanks Benjamin,

the patch looks fine:

Reviewed-by: Christoph Hellwig <[email protected]>

Although maybe it's worth addressing a few naming nitpicks:

> +static void __ext_put_deviceids(struct list_head *head)

No need for the __ here.

> static int
> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
> +__ext_tree_remove(struct rb_root *root,
> + sector_t start, sector_t end, struct list_head *tmp)
> {

Maybe instead of tmp this could be called to_free?

> + LIST_HEAD(tmp);
>
> spin_lock(&bl->bl_ext_lock);
> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
> if (rw) {
> - err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
> + err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);

Same here,

> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
> sector_t end = start + len;
> struct pnfs_block_extent *be;
> int err = 0;
> + LIST_HEAD(tmp);

and here.

2016-06-13 17:41:04

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] pnfs/blocklayout: put deviceid node after releasing bl_ext_lock

> On Fri, Jun 10, 2016 at 04:37:35PM -0400, Benjamin Coddington wrote:
>> The last put of deviceid nodes for SCSI layouts may sleep, so we
>> shouldn't
>> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>
> Thanks Benjamin,
>
> the patch looks fine:
>
> Reviewed-by: Christoph Hellwig <[email protected]>
>
> Although maybe it's worth addressing a few naming nitpicks:
>
>> +static void __ext_put_deviceids(struct list_head *head)
>
> No need for the __ here.
>
>> static int
>> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t
>> end)
>> +__ext_tree_remove(struct rb_root *root,
>> + sector_t start, sector_t end, struct list_head *tmp)
>> {
>
> Maybe instead of tmp this could be called to_free?
>
>> + LIST_HEAD(tmp);
>>
>> spin_lock(&bl->bl_ext_lock);
>> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
>> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
>> if (rw) {
>> - err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
>> + err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
>
> Same here,
>
>> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout
>> *bl, sector_t start,
>> sector_t end = start + len;
>> struct pnfs_block_extent *be;
>> int err = 0;
>> + LIST_HEAD(tmp);
>
> and here.

OK. I'll send a V2.

Ben

2016-07-19 12:25:45

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] pnfs/blocklayout: put deviceid node after releasing bl_ext_lock

Hi Ben,

> On Jun 10, 2016, at 16:37, Benjamin Coddington <[email protected]> wrot=
e:
>=20
> The last put of deviceid nodes for SCSI layouts may sleep, so we shouldn'=
t
> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>=20
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfs/blocklayout/extent_tree.c | 27 +++++++++++++++++++++------
> 1 file changed, 21 insertions(+), 6 deletions(-)
>=20
> diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent=
_tree.c
> index 720b3ff55fa9..992bcb19c11e 100644
> --- a/fs/nfs/blocklayout/extent_tree.c
> +++ b/fs/nfs/blocklayout/extent_tree.c
> @@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root, struct =
pnfs_block_extent *be)
> =09return be;
> }
>=20
> +static void __ext_put_deviceids(struct list_head *head)
> +{
> +=09struct pnfs_block_extent *be, *tmp;
> +
> +=09list_for_each_entry_safe(be, tmp, head, be_list) {
> +=09=09nfs4_put_deviceid_node(be->be_device);
> +=09=09kfree(be);
> +=09}

This definitely needs a list_del()=85 I only noticed this morning after hun=
ting for the OOM that Christoph says turned up after this weekend.

> +}
> +
> static void
> __ext_tree_insert(struct rb_root *root,
> =09=09struct pnfs_block_extent *new, bool merge_ok)
> @@ -163,7 +173,8 @@ free_new:
> }
>=20
> static int
> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
> +__ext_tree_remove(struct rb_root *root,
> +=09=09sector_t start, sector_t end, struct list_head *tmp)
> {
> =09struct pnfs_block_extent *be;
> =09sector_t len1 =3D 0, len2 =3D 0;
> @@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t star=
t, sector_t end)
> =09=09=09struct pnfs_block_extent *next =3D ext_tree_next(be);
>=20
> =09=09=09rb_erase(&be->be_node, root);
> -=09=09=09nfs4_put_deviceid_node(be->be_device);
> -=09=09=09kfree(be);
> +=09=09=09list_add_tail(&be->be_list, tmp);
> =09=09=09be =3D next;
> =09=09}
>=20
> @@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout *bl, b=
ool rw,
> =09=09sector_t start, sector_t end)
> {
> =09int err, err2;
> +=09LIST_HEAD(tmp);
>=20
> =09spin_lock(&bl->bl_ext_lock);
> -=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end);
> +=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
> =09if (rw) {
> -=09=09err2 =3D __ext_tree_remove(&bl->bl_ext_rw, start, end);
> +=09=09err2 =3D __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
> =09=09if (!err)
> =09=09=09err =3D err2;
> =09}
> =09spin_unlock(&bl->bl_ext_lock);
>=20
> +=09__ext_put_deviceids(&tmp);
> =09return err;
> }
>=20
> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl,=
sector_t start,
> =09sector_t end =3D start + len;
> =09struct pnfs_block_extent *be;
> =09int err =3D 0;
> +=09LIST_HEAD(tmp);
>=20
> =09spin_lock(&bl->bl_ext_lock);
> =09/*
> =09 * First remove all COW extents or holes from written to range.
> =09 */
> -=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end);
> +=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
> =09if (err)
> =09=09goto out;
>=20
> @@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, s=
ector_t start,
> =09}
> out:
> =09spin_unlock(&bl->bl_ext_lock);
> +
> +=09__ext_put_deviceids(&tmp);
> =09return err;
> }
>=20
> --=20
> 2.5.5
>=20
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>=20

2016-07-19 13:08:27

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] pnfs/blocklayout: put deviceid node after releasing bl_ext_lock

On 19 Jul 2016, at 8:25, Trond Myklebust wrote:

> Hi Ben,
>
>> On Jun 10, 2016, at 16:37, Benjamin Coddington <[email protected]>
>> wrote:
>>
>> The last put of deviceid nodes for SCSI layouts may sleep, so we
>> shouldn't
>> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>> ---
>> fs/nfs/blocklayout/extent_tree.c | 27 +++++++++++++++++++++------
>> 1 file changed, 21 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/blocklayout/extent_tree.c
>> b/fs/nfs/blocklayout/extent_tree.c
>> index 720b3ff55fa9..992bcb19c11e 100644
>> --- a/fs/nfs/blocklayout/extent_tree.c
>> +++ b/fs/nfs/blocklayout/extent_tree.c
>> @@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root,
>> struct pnfs_block_extent *be)
>> return be;
>> }
>>
>> +static void __ext_put_deviceids(struct list_head *head)
>> +{
>> + struct pnfs_block_extent *be, *tmp;
>> +
>> + list_for_each_entry_safe(be, tmp, head, be_list) {
>> + nfs4_put_deviceid_node(be->be_device);
>> + kfree(be);
>> + }
>
> This definitely needs a list_del()… I only noticed this morning
> after hunting for the OOM that Christoph says turned up after this
> weekend.

I'm afraid I don't understand why, though. List traversal seems fine
without it since we're using list_for_each_entry_safe.. I must be
missing
something.

Ben

>> +}
>> +
>> static void
>> __ext_tree_insert(struct rb_root *root,
>> struct pnfs_block_extent *new, bool merge_ok)
>> @@ -163,7 +173,8 @@ free_new:
>> }
>>
>> static int
>> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t
>> end)
>> +__ext_tree_remove(struct rb_root *root,
>> + sector_t start, sector_t end, struct list_head *tmp)
>> {
>> struct pnfs_block_extent *be;
>> sector_t len1 = 0, len2 = 0;
>> @@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t
>> start, sector_t end)
>> struct pnfs_block_extent *next = ext_tree_next(be);
>>
>> rb_erase(&be->be_node, root);
>> - nfs4_put_deviceid_node(be->be_device);
>> - kfree(be);
>> + list_add_tail(&be->be_list, tmp);
>> be = next;
>> }
>>
>> @@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout
>> *bl, bool rw,
>> sector_t start, sector_t end)
>> {
>> int err, err2;
>> + LIST_HEAD(tmp);
>>
>> spin_lock(&bl->bl_ext_lock);
>> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
>> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
>> if (rw) {
>> - err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
>> + err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
>> if (!err)
>> err = err2;
>> }
>> spin_unlock(&bl->bl_ext_lock);
>>
>> + __ext_put_deviceids(&tmp);
>> return err;
>> }
>>
>> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout
>> *bl, sector_t start,
>> sector_t end = start + len;
>> struct pnfs_block_extent *be;
>> int err = 0;
>> + LIST_HEAD(tmp);
>>
>> spin_lock(&bl->bl_ext_lock);
>> /*
>> * First remove all COW extents or holes from written to range.
>> */
>> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
>> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
>> if (err)
>> goto out;
>>
>> @@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout
>> *bl, sector_t start,
>> }
>> out:
>> spin_unlock(&bl->bl_ext_lock);
>> +
>> + __ext_put_deviceids(&tmp);
>> return err;
>> }
>>
>> --
>> 2.5.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
>> in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>