The last put of deviceid nodes for SCSI layouts may sleep, so we shouldn't
hold any spinlocks. Make sure we put them outside the bl_ext_lock.
Signed-off-by: Benjamin Coddington <[email protected]>
---
fs/nfs/blocklayout/extent_tree.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 720b3ff55fa9..992bcb19c11e 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be)
return be;
}
+static void __ext_put_deviceids(struct list_head *head)
+{
+ struct pnfs_block_extent *be, *tmp;
+
+ list_for_each_entry_safe(be, tmp, head, be_list) {
+ nfs4_put_deviceid_node(be->be_device);
+ kfree(be);
+ }
+}
+
static void
__ext_tree_insert(struct rb_root *root,
struct pnfs_block_extent *new, bool merge_ok)
@@ -163,7 +173,8 @@ free_new:
}
static int
-__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
+__ext_tree_remove(struct rb_root *root,
+ sector_t start, sector_t end, struct list_head *tmp)
{
struct pnfs_block_extent *be;
sector_t len1 = 0, len2 = 0;
@@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
struct pnfs_block_extent *next = ext_tree_next(be);
rb_erase(&be->be_node, root);
- nfs4_put_deviceid_node(be->be_device);
- kfree(be);
+ list_add_tail(&be->be_list, tmp);
be = next;
}
@@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout *bl, bool rw,
sector_t start, sector_t end)
{
int err, err2;
+ LIST_HEAD(tmp);
spin_lock(&bl->bl_ext_lock);
- err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
+ err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
if (rw) {
- err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
+ err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
if (!err)
err = err2;
}
spin_unlock(&bl->bl_ext_lock);
+ __ext_put_deviceids(&tmp);
return err;
}
@@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
sector_t end = start + len;
struct pnfs_block_extent *be;
int err = 0;
+ LIST_HEAD(tmp);
spin_lock(&bl->bl_ext_lock);
/*
* First remove all COW extents or holes from written to range.
*/
- err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
+ err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
if (err)
goto out;
@@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
}
out:
spin_unlock(&bl->bl_ext_lock);
+
+ __ext_put_deviceids(&tmp);
return err;
}
--
2.5.5
On Fri, Jun 10, 2016 at 04:37:35PM -0400, Benjamin Coddington wrote:
> The last put of deviceid nodes for SCSI layouts may sleep, so we shouldn't
> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>
> Signed-off-by: Benjamin Coddington <[email protected]>
Thanks Benjamin,
the patch looks fine:
Reviewed-by: Christoph Hellwig <[email protected]>
Although maybe it's worth addressing a few naming nitpicks:
> +static void __ext_put_deviceids(struct list_head *head)
No need for the __ here.
> static int
> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
> +__ext_tree_remove(struct rb_root *root,
> + sector_t start, sector_t end, struct list_head *tmp)
> {
Maybe instead of tmp this could be called to_free?
> + LIST_HEAD(tmp);
>
> spin_lock(&bl->bl_ext_lock);
> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
> if (rw) {
> - err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
> + err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
Same here,
> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
> sector_t end = start + len;
> struct pnfs_block_extent *be;
> int err = 0;
> + LIST_HEAD(tmp);
and here.
> On Fri, Jun 10, 2016 at 04:37:35PM -0400, Benjamin Coddington wrote:
>> The last put of deviceid nodes for SCSI layouts may sleep, so we
>> shouldn't
>> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>
> Thanks Benjamin,
>
> the patch looks fine:
>
> Reviewed-by: Christoph Hellwig <[email protected]>
>
> Although maybe it's worth addressing a few naming nitpicks:
>
>> +static void __ext_put_deviceids(struct list_head *head)
>
> No need for the __ here.
>
>> static int
>> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t
>> end)
>> +__ext_tree_remove(struct rb_root *root,
>> + sector_t start, sector_t end, struct list_head *tmp)
>> {
>
> Maybe instead of tmp this could be called to_free?
>
>> + LIST_HEAD(tmp);
>>
>> spin_lock(&bl->bl_ext_lock);
>> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
>> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
>> if (rw) {
>> - err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
>> + err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
>
> Same here,
>
>> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout
>> *bl, sector_t start,
>> sector_t end = start + len;
>> struct pnfs_block_extent *be;
>> int err = 0;
>> + LIST_HEAD(tmp);
>
> and here.
OK. I'll send a V2.
Ben
Hi Ben,
> On Jun 10, 2016, at 16:37, Benjamin Coddington <[email protected]> wrot=
e:
>=20
> The last put of deviceid nodes for SCSI layouts may sleep, so we shouldn'=
t
> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>=20
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfs/blocklayout/extent_tree.c | 27 +++++++++++++++++++++------
> 1 file changed, 21 insertions(+), 6 deletions(-)
>=20
> diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent=
_tree.c
> index 720b3ff55fa9..992bcb19c11e 100644
> --- a/fs/nfs/blocklayout/extent_tree.c
> +++ b/fs/nfs/blocklayout/extent_tree.c
> @@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root, struct =
pnfs_block_extent *be)
> =09return be;
> }
>=20
> +static void __ext_put_deviceids(struct list_head *head)
> +{
> +=09struct pnfs_block_extent *be, *tmp;
> +
> +=09list_for_each_entry_safe(be, tmp, head, be_list) {
> +=09=09nfs4_put_deviceid_node(be->be_device);
> +=09=09kfree(be);
> +=09}
This definitely needs a list_del()=85 I only noticed this morning after hun=
ting for the OOM that Christoph says turned up after this weekend.
> +}
> +
> static void
> __ext_tree_insert(struct rb_root *root,
> =09=09struct pnfs_block_extent *new, bool merge_ok)
> @@ -163,7 +173,8 @@ free_new:
> }
>=20
> static int
> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t end)
> +__ext_tree_remove(struct rb_root *root,
> +=09=09sector_t start, sector_t end, struct list_head *tmp)
> {
> =09struct pnfs_block_extent *be;
> =09sector_t len1 =3D 0, len2 =3D 0;
> @@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t star=
t, sector_t end)
> =09=09=09struct pnfs_block_extent *next =3D ext_tree_next(be);
>=20
> =09=09=09rb_erase(&be->be_node, root);
> -=09=09=09nfs4_put_deviceid_node(be->be_device);
> -=09=09=09kfree(be);
> +=09=09=09list_add_tail(&be->be_list, tmp);
> =09=09=09be =3D next;
> =09=09}
>=20
> @@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout *bl, b=
ool rw,
> =09=09sector_t start, sector_t end)
> {
> =09int err, err2;
> +=09LIST_HEAD(tmp);
>=20
> =09spin_lock(&bl->bl_ext_lock);
> -=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end);
> +=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
> =09if (rw) {
> -=09=09err2 =3D __ext_tree_remove(&bl->bl_ext_rw, start, end);
> +=09=09err2 =3D __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
> =09=09if (!err)
> =09=09=09err =3D err2;
> =09}
> =09spin_unlock(&bl->bl_ext_lock);
>=20
> +=09__ext_put_deviceids(&tmp);
> =09return err;
> }
>=20
> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout *bl,=
sector_t start,
> =09sector_t end =3D start + len;
> =09struct pnfs_block_extent *be;
> =09int err =3D 0;
> +=09LIST_HEAD(tmp);
>=20
> =09spin_lock(&bl->bl_ext_lock);
> =09/*
> =09 * First remove all COW extents or holes from written to range.
> =09 */
> -=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end);
> +=09err =3D __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
> =09if (err)
> =09=09goto out;
>=20
> @@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout *bl, s=
ector_t start,
> =09}
> out:
> =09spin_unlock(&bl->bl_ext_lock);
> +
> +=09__ext_put_deviceids(&tmp);
> =09return err;
> }
>=20
> --=20
> 2.5.5
>=20
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>=20
On 19 Jul 2016, at 8:25, Trond Myklebust wrote:
> Hi Ben,
>
>> On Jun 10, 2016, at 16:37, Benjamin Coddington <[email protected]>
>> wrote:
>>
>> The last put of deviceid nodes for SCSI layouts may sleep, so we
>> shouldn't
>> hold any spinlocks. Make sure we put them outside the bl_ext_lock.
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>> ---
>> fs/nfs/blocklayout/extent_tree.c | 27 +++++++++++++++++++++------
>> 1 file changed, 21 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/blocklayout/extent_tree.c
>> b/fs/nfs/blocklayout/extent_tree.c
>> index 720b3ff55fa9..992bcb19c11e 100644
>> --- a/fs/nfs/blocklayout/extent_tree.c
>> +++ b/fs/nfs/blocklayout/extent_tree.c
>> @@ -121,6 +121,16 @@ ext_try_to_merge_right(struct rb_root *root,
>> struct pnfs_block_extent *be)
>> return be;
>> }
>>
>> +static void __ext_put_deviceids(struct list_head *head)
>> +{
>> + struct pnfs_block_extent *be, *tmp;
>> +
>> + list_for_each_entry_safe(be, tmp, head, be_list) {
>> + nfs4_put_deviceid_node(be->be_device);
>> + kfree(be);
>> + }
>
> This definitely needs a list_del()… I only noticed this morning
> after hunting for the OOM that Christoph says turned up after this
> weekend.
I'm afraid I don't understand why, though. List traversal seems fine
without it since we're using list_for_each_entry_safe.. I must be
missing
something.
Ben
>> +}
>> +
>> static void
>> __ext_tree_insert(struct rb_root *root,
>> struct pnfs_block_extent *new, bool merge_ok)
>> @@ -163,7 +173,8 @@ free_new:
>> }
>>
>> static int
>> -__ext_tree_remove(struct rb_root *root, sector_t start, sector_t
>> end)
>> +__ext_tree_remove(struct rb_root *root,
>> + sector_t start, sector_t end, struct list_head *tmp)
>> {
>> struct pnfs_block_extent *be;
>> sector_t len1 = 0, len2 = 0;
>> @@ -223,8 +234,7 @@ __ext_tree_remove(struct rb_root *root, sector_t
>> start, sector_t end)
>> struct pnfs_block_extent *next = ext_tree_next(be);
>>
>> rb_erase(&be->be_node, root);
>> - nfs4_put_deviceid_node(be->be_device);
>> - kfree(be);
>> + list_add_tail(&be->be_list, tmp);
>> be = next;
>> }
>>
>> @@ -350,16 +360,18 @@ int ext_tree_remove(struct pnfs_block_layout
>> *bl, bool rw,
>> sector_t start, sector_t end)
>> {
>> int err, err2;
>> + LIST_HEAD(tmp);
>>
>> spin_lock(&bl->bl_ext_lock);
>> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
>> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
>> if (rw) {
>> - err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end);
>> + err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end, &tmp);
>> if (!err)
>> err = err2;
>> }
>> spin_unlock(&bl->bl_ext_lock);
>>
>> + __ext_put_deviceids(&tmp);
>> return err;
>> }
>>
>> @@ -396,12 +408,13 @@ ext_tree_mark_written(struct pnfs_block_layout
>> *bl, sector_t start,
>> sector_t end = start + len;
>> struct pnfs_block_extent *be;
>> int err = 0;
>> + LIST_HEAD(tmp);
>>
>> spin_lock(&bl->bl_ext_lock);
>> /*
>> * First remove all COW extents or holes from written to range.
>> */
>> - err = __ext_tree_remove(&bl->bl_ext_ro, start, end);
>> + err = __ext_tree_remove(&bl->bl_ext_ro, start, end, &tmp);
>> if (err)
>> goto out;
>>
>> @@ -459,6 +472,8 @@ ext_tree_mark_written(struct pnfs_block_layout
>> *bl, sector_t start,
>> }
>> out:
>> spin_unlock(&bl->bl_ext_lock);
>> +
>> + __ext_put_deviceids(&tmp);
>> return err;
>> }
>>
>> --
>> 2.5.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
>> in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>