2017-06-08 16:56:17

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH 1/1] PNFS fix dangling DS mount

There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
DS mount dangling.

Previously, filelayout_alloc_sec() would call filelayout_check_layout()
which would call nfs4_find_get_deviceid which ups the count on the
device_id. It's only called once and it's matched by the
filelayout_free_lseg() that calls nfs4_fl_put_deviceid().

After that patch, each read/write ends up calling nfs4_find_get_deviceid
and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.

Fixes: 8d40b0f14846 ("NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes")
Signed-off-by: Olga Kornievskaia <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index acd30ba..3c6d043 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -763,7 +763,6 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);

dprintk("--> %s\n", __func__);
- nfs4_fl_put_deviceid(fl->dsaddr);
/* This assumes a single RW lseg */
if (lseg->pls_range.iomode == IOMODE_RW) {
struct nfs4_filelayout *flo;
@@ -989,18 +988,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
nfs_pageio_reset_write_mds(pgio);
}

+static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_lseg) {
+ struct nfs4_filelayout_segment *fl =
+ FILELAYOUT_LSEG(desc->pg_lseg);
+
+ nfs4_fl_put_deviceid(fl->dsaddr);
+ }
+ pnfs_generic_pg_cleanup(desc);
+}
+
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};

static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};

static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
--
1.8.3.1



2017-06-09 20:58:17

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH 1/1] PNFS fix dangling DS mount

On Thu, Jun 8, 2017 at 12:55 PM, Olga Kornievskaia <[email protected]> wrote:
> There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
> GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
> DS mount dangling.
>
> Previously, filelayout_alloc_sec() would call filelayout_check_layout()
> which would call nfs4_find_get_deviceid which ups the count on the
> device_id. It's only called once and it's matched by the
> filelayout_free_lseg() that calls nfs4_fl_put_deviceid().
>
> After that patch, each read/write ends up calling nfs4_find_get_deviceid
> and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
> in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.
>
> Fixes: 8d40b0f14846 ("NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes")
> Signed-off-by: Olga Kornievskaia <[email protected]>
> ---
> fs/nfs/filelayout/filelayout.c | 16 +++++++++++++---
> 1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
> index acd30ba..3c6d043 100644
> --- a/fs/nfs/filelayout/filelayout.c
> +++ b/fs/nfs/filelayout/filelayout.c
> @@ -763,7 +763,6 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
> struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
>
> dprintk("--> %s\n", __func__);
> - nfs4_fl_put_deviceid(fl->dsaddr);
> /* This assumes a single RW lseg */
> if (lseg->pls_range.iomode == IOMODE_RW) {
> struct nfs4_filelayout *flo;
> @@ -989,18 +988,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
> nfs_pageio_reset_write_mds(pgio);
> }
>
> +static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
> +{
> + if (desc->pg_lseg) {
> + struct nfs4_filelayout_segment *fl =
> + FILELAYOUT_LSEG(desc->pg_lseg);
> +
> + nfs4_fl_put_deviceid(fl->dsaddr);
> + }
> + pnfs_generic_pg_cleanup(desc);
> +}
> +
> static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> .pg_init = filelayout_pg_init_read,
> .pg_test = filelayout_pg_test,
> .pg_doio = pnfs_generic_pg_readpages,
> - .pg_cleanup = pnfs_generic_pg_cleanup,
> + .pg_cleanup = filelayout_pg_cleanup,
> };
>
> static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> .pg_init = filelayout_pg_init_write,
> .pg_test = filelayout_pg_test,
> .pg_doio = pnfs_generic_pg_writepages,
> - .pg_cleanup = pnfs_generic_pg_cleanup,
> + .pg_cleanup = filelayout_pg_cleanup,
> };
>
> static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
> --
> 1.8.3.1

Needs to do a bit more. It fixes a dangling mount on non-error case.
But on server reboots it leads to ref count imbalance again.

2017-06-13 18:36:24

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH 1/1] PNFS fix dangling DS mount

There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
DS mount dangling.

Previously, filelayout_alloc_sec() would call filelayout_check_layout()
which would call nfs4_find_get_deviceid which ups the count on the
device_id. It's only called once and it's matched by the
filelayout_free_lseg() that calls nfs4_fl_put_deviceid().

After that patch, each read/write ends up calling nfs4_find_get_deviceid
and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.

But we still want a reference to hold over the lifetime of the segment.
So start the recount at 2 and keep the nfs4_fl_put_deviceid() in
the filelayout_free_lseg().

Fixes: 8d40b0f14846 ("NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes")
Signed-off-by: Olga Kornievskaia <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 15 +++++++++++++--
fs/nfs/pnfs_dev.c | 2 +-
2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index acd30ba..33ebd1a 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -989,18 +989,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
nfs_pageio_reset_write_mds(pgio);
}

+static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_lseg) {
+ struct nfs4_filelayout_segment *fl =
+ FILELAYOUT_LSEG(desc->pg_lseg);
+
+ nfs4_fl_put_deviceid(fl->dsaddr);
+ }
+ pnfs_generic_pg_cleanup(desc);
+}
+
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};

static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};

static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 2961fcd..671d65f 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -255,7 +255,7 @@ struct nfs4_deviceid_node *
d->nfs_client = server->nfs_client;
d->flags = 0;
d->deviceid = *id;
- atomic_set(&d->ref, 1);
+ atomic_set(&d->ref, 2);
}
EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);

--
1.8.3.1


2017-06-20 19:37:19

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/1] PNFS fix dangling DS mount

T24gVHVlLCAyMDE3LTA2LTEzIGF0IDE0OjM2IC0wNDAwLCBPbGdhIEtvcm5pZXZza2FpYSB3cm90
ZToNCj4gVGhlcmUgaXMgYSByZWdyZXNzaW9uIGJ5IGNvbW1pdCA4ZDQwYjBmMTQ4NDYgKCJORlMg
ZmlsZWxheW91dDpjYWxsDQo+IEdFVERFVklDRUlORk8gYWZ0ZXIgcG5mc19sYXlvdXRfcHJvY2Vz
cyBjb21wbGV0ZXMiKS4gSXQgbGVhdmVzIHRoZQ0KPiBEUyBtb3VudCBkYW5nbGluZy4NCj4gDQo+
IFByZXZpb3VzbHksIGZpbGVsYXlvdXRfYWxsb2Nfc2VjKCkgd291bGQgY2FsbA0KPiBmaWxlbGF5
b3V0X2NoZWNrX2xheW91dCgpDQo+IHdoaWNoIHdvdWxkIGNhbGwgbmZzNF9maW5kX2dldF9kZXZp
Y2VpZCB3aGljaCB1cHMgdGhlIGNvdW50IG9uIHRoZQ0KPiBkZXZpY2VfaWQuIEl0J3Mgb25seSBj
YWxsZWQgb25jZSBhbmQgaXQncyBtYXRjaGVkIGJ5IHRoZQ0KPiBmaWxlbGF5b3V0X2ZyZWVfbHNl
ZygpIHRoYXQgY2FsbHMgbmZzNF9mbF9wdXRfZGV2aWNlaWQoKS4NCj4gDQo+IEFmdGVyIHRoYXQg
cGF0Y2gsIGVhY2ggcmVhZC93cml0ZSBlbmRzIHVwIGNhbGxpbmcNCj4gbmZzNF9maW5kX2dldF9k
ZXZpY2VpZA0KPiBhbmQgdGhlcmUgaXMgbm8gYmFsYW5jZSBmb3IgdGhhdC4gSW5zdGVhZCwgZG8g
bmZzNF9mbF9wdXRfZGV2aWNlaWQoKQ0KPiBpbiB0aGUgZmlsZWxheW91dCdzIC5wZ19jbGVhbnVw
IGFuZCByZW1vdmUgaXQgZnJvbQ0KPiBmaWxlbGF5b3V0X2ZyZWVfbHNlZy4NCj4gDQo+IEJ1dCB3
ZSBzdGlsbCB3YW50IGEgcmVmZXJlbmNlIHRvIGhvbGQgb3ZlciB0aGUgbGlmZXRpbWUgb2YgdGhl
DQo+IHNlZ21lbnQuDQo+IFNvIHN0YXJ0IHRoZSByZWNvdW50IGF0IDIgYW5kIGtlZXAgdGhlIG5m
czRfZmxfcHV0X2RldmljZWlkKCkgaW4NCj4gdGhlIGZpbGVsYXlvdXRfZnJlZV9sc2VnKCkuDQo+
IA0KPiBGaXhlczogOGQ0MGIwZjE0ODQ2ICgiTkZTIGZpbGVsYXlvdXQ6Y2FsbCBHRVRERVZJQ0VJ
TkZPIGFmdGVyDQo+IHBuZnNfbGF5b3V0X3Byb2Nlc3MgY29tcGxldGVzIikNCj4gU2lnbmVkLW9m
Zi1ieTogT2xnYSBLb3JuaWV2c2thaWEgPGtvbGdhQG5ldGFwcC5jb20+DQo+IC0tLQ0KPiDCoGZz
L25mcy9maWxlbGF5b3V0L2ZpbGVsYXlvdXQuYyB8IDE1ICsrKysrKysrKysrKystLQ0KPiDCoGZz
L25mcy9wbmZzX2Rldi5jwqDCoMKgwqDCoMKgwqDCoMKgwqDCoMKgwqDCoHzCoMKgMiArLQ0KPiDC
oDIgZmlsZXMgY2hhbmdlZCwgMTQgaW5zZXJ0aW9ucygrKSwgMyBkZWxldGlvbnMoLSkNCj4gDQo+
IGRpZmYgLS1naXQgYS9mcy9uZnMvZmlsZWxheW91dC9maWxlbGF5b3V0LmMNCj4gYi9mcy9uZnMv
ZmlsZWxheW91dC9maWxlbGF5b3V0LmMNCj4gaW5kZXggYWNkMzBiYS4uMzNlYmQxYSAxMDA2NDQN
Cj4gLS0tIGEvZnMvbmZzL2ZpbGVsYXlvdXQvZmlsZWxheW91dC5jDQo+ICsrKyBiL2ZzL25mcy9m
aWxlbGF5b3V0L2ZpbGVsYXlvdXQuYw0KPiBAQCAtOTg5LDE4ICs5ODksMjkgQEAgc3RhdGljIHZv
aWQgX2ZpbGVsYXlvdXRfZnJlZV9sc2VnKHN0cnVjdA0KPiBuZnM0X2ZpbGVsYXlvdXRfc2VnbWVu
dCAqZmwpDQo+IMKgCW5mc19wYWdlaW9fcmVzZXRfd3JpdGVfbWRzKHBnaW8pOw0KPiDCoH0NCj4g
wqANCj4gK3N0YXRpYyB2b2lkIGZpbGVsYXlvdXRfcGdfY2xlYW51cChzdHJ1Y3QgbmZzX3BhZ2Vp
b19kZXNjcmlwdG9yDQo+ICpkZXNjKQ0KPiArew0KPiArCWlmIChkZXNjLT5wZ19sc2VnKSB7DQo+
ICsJCXN0cnVjdCBuZnM0X2ZpbGVsYXlvdXRfc2VnbWVudCAqZmwgPQ0KPiArCQkJRklMRUxBWU9V
VF9MU0VHKGRlc2MtPnBnX2xzZWcpOw0KPiArDQo+ICsJCW5mczRfZmxfcHV0X2RldmljZWlkKGZs
LT5kc2FkZHIpOw0KPiArCX0NCj4gKwlwbmZzX2dlbmVyaWNfcGdfY2xlYW51cChkZXNjKTsNCj4g
K30NCj4gKw0KPiDCoHN0YXRpYyBjb25zdCBzdHJ1Y3QgbmZzX3BhZ2Vpb19vcHMgZmlsZWxheW91
dF9wZ19yZWFkX29wcyA9IHsNCj4gwqAJLnBnX2luaXQgPSBmaWxlbGF5b3V0X3BnX2luaXRfcmVh
ZCwNCj4gwqAJLnBnX3Rlc3QgPSBmaWxlbGF5b3V0X3BnX3Rlc3QsDQo+IMKgCS5wZ19kb2lvID0g
cG5mc19nZW5lcmljX3BnX3JlYWRwYWdlcywNCj4gLQkucGdfY2xlYW51cCA9IHBuZnNfZ2VuZXJp
Y19wZ19jbGVhbnVwLA0KPiArCS5wZ19jbGVhbnVwID0gZmlsZWxheW91dF9wZ19jbGVhbnVwLA0K
PiDCoH07DQo+IMKgDQo+IMKgc3RhdGljIGNvbnN0IHN0cnVjdCBuZnNfcGFnZWlvX29wcyBmaWxl
bGF5b3V0X3BnX3dyaXRlX29wcyA9IHsNCj4gwqAJLnBnX2luaXQgPSBmaWxlbGF5b3V0X3BnX2lu
aXRfd3JpdGUsDQo+IMKgCS5wZ190ZXN0ID0gZmlsZWxheW91dF9wZ190ZXN0LA0KPiDCoAkucGdf
ZG9pbyA9IHBuZnNfZ2VuZXJpY19wZ193cml0ZXBhZ2VzLA0KPiAtCS5wZ19jbGVhbnVwID0gcG5m
c19nZW5lcmljX3BnX2NsZWFudXAsDQo+ICsJLnBnX2NsZWFudXAgPSBmaWxlbGF5b3V0X3BnX2Ns
ZWFudXAsDQo+IMKgfTsNCj4gwqANCj4gwqBzdGF0aWMgdTMyIHNlbGVjdF9idWNrZXRfaW5kZXgo
c3RydWN0IG5mczRfZmlsZWxheW91dF9zZWdtZW50ICpmbCwNCj4gdTMyIGopDQo+IGRpZmYgLS1n
aXQgYS9mcy9uZnMvcG5mc19kZXYuYyBiL2ZzL25mcy9wbmZzX2Rldi5jDQo+IGluZGV4IDI5NjFm
Y2QuLjY3MWQ2NWYgMTAwNjQ0DQo+IC0tLSBhL2ZzL25mcy9wbmZzX2Rldi5jDQo+ICsrKyBiL2Zz
L25mcy9wbmZzX2Rldi5jDQo+IEBAIC0yNTUsNyArMjU1LDcgQEAgc3RydWN0IG5mczRfZGV2aWNl
aWRfbm9kZSAqDQo+IMKgCWQtPm5mc19jbGllbnQgPSBzZXJ2ZXItPm5mc19jbGllbnQ7DQo+IMKg
CWQtPmZsYWdzID0gMDsNCj4gwqAJZC0+ZGV2aWNlaWQgPSAqaWQ7DQo+IC0JYXRvbWljX3NldCgm
ZC0+cmVmLCAxKTsNCj4gKwlhdG9taWNfc2V0KCZkLT5yZWYsIDIpOw0KPiDCoH0NCj4gwqBFWFBP
UlRfU1lNQk9MX0dQTChuZnM0X2luaXRfZGV2aWNlaWRfbm9kZSk7DQo+IMKgDQoNCkkgd2FzIGdv
aW5nIHRvIGFwcGx5IHRoaXMsIGJ1dCB1cG9uIHJldmlldyBpdCBsb29rcyBhcyBpZiBpdCB3aWxs
IGNhdXNlDQphIGRldmljZWlkIHJlZmNvdW50IGxlYWsgZm9yIGJvdGggcE5GUyBibG9ja3MgYW5k
IGZsZXhmaWxlcy4gUGxlYXNlDQpmaXguDQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBO
RlMgY2xpZW50IG1haW50YWluZXIsIFByaW1hcnlEYXRhDQp0cm9uZC5teWtsZWJ1c3RAcHJpbWFy
eWRhdGEuY29tDQo=


2017-06-20 20:40:35

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH 1/1] PNFS fix dangling DS mount

There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
DS mount dangling.

Previously, filelayout_alloc_sec() would call filelayout_check_layout()
which would call nfs4_find_get_deviceid which ups the count on the
device_id. It's only called once and it's matched by the
filelayout_free_lseg() that calls nfs4_fl_put_deviceid().

After that patch, each read/write ends up calling nfs4_find_get_deviceid
and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.

But we still want a reference to hold over the lifetime of the segment.
So up the ref count when we allocate new device node and free it in
the filelayout_free_lseg().

Fixes: 8d40b0f14846 ("NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes")
Signed-off-by: Olga Kornievskaia <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 15 +++++++++++++--
fs/nfs/filelayout/filelayoutdev.c | 1 +
2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1cf85d6..ce82da1 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -991,18 +991,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
nfs_pageio_reset_write_mds(pgio);
}

+static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_lseg) {
+ struct nfs4_filelayout_segment *fl =
+ FILELAYOUT_LSEG(desc->pg_lseg);
+
+ nfs4_fl_put_deviceid(fl->dsaddr);
+ }
+ pnfs_generic_pg_cleanup(desc);
+}
+
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};

static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};

static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index d913e81..862fe1f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -147,6 +147,7 @@ struct nfs4_file_layout_dsaddr *
stripe_indices = NULL;
dsaddr->ds_num = num;
nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);
+ nfs4_get_deviceid(&dsaddr->id_node);

INIT_LIST_HEAD(&dsaddrs);

--
1.8.3.1