2016-05-30 16:35:09

by Jeff Layton

[permalink] [raw]
Subject: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

Allow the client to deal with servers that hand out multiple layout
types for the same filesystem. When this happens, we pick the "best" one,
based on a hardcoded assumed order in the client code.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/client.c | 2 +-
fs/nfs/nfs4proc.c | 2 +-
fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
include/linux/nfs_xdr.h | 2 +-
5 files changed, 85 insertions(+), 38 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0c96528db94a..53b41f4bd45a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
}

fsinfo.fattr = fattr;
- fsinfo.layouttype = 0;
+ fsinfo.layouttypes = 0;
error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
if (error < 0)
goto out_error;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..9446aef89b48 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
if (error == 0) {
/* block layout checks this! */
server->pnfs_blksize = fsinfo->blksize;
- set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
+ set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
}

return error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 661e753fe1c9..876a80802c1d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
* Decode potentially multiple layout types. Currently we only support
* one layout driver per file system.
*/
-static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
- uint32_t *layouttype)
+static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
{
__be32 *p;
int num;
+ u32 type;

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
num = be32_to_cpup(p);

- /* pNFS is not supported by the underlying file system */
- if (num == 0) {
- *layouttype = 0;
- return 0;
- }
- if (num > 1)
- printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
- "drivers per filesystem not supported\n", __func__);
+ *layouttypes = 0;

- /* Decode and set first layout type, move xdr->p past unused types */
- p = xdr_inline_decode(xdr, num * 4);
- if (unlikely(!p))
- goto out_overflow;
- *layouttype = be32_to_cpup(p);
+ for (; num; --num) {
+ p = xdr_inline_decode(xdr, 4);
+
+ if (unlikely(!p))
+ goto out_overflow;
+
+ type = be32_to_cpup(p);
+
+ /* Ignore any that we don't understand */
+ if (unlikely(type >= LAYOUT_TYPE_MAX))
+ continue;
+
+ *layouttypes |= 1 << type;
+ }
return 0;
out_overflow:
+ *layouttypes = 0;
print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -4759,7 +4762,7 @@ out_overflow:
* Note we must ensure that layouttype is set in any non-error case.
*/
static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
- uint32_t *layouttype)
+ __u32 *layouttypes)
{
int status = 0;

@@ -4767,10 +4770,10 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
return -EIO;
if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
- status = decode_first_pnfs_layout_type(xdr, layouttype);
+ status = decode_pnfs_layout_types(xdr, layouttypes);
bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
} else
- *layouttype = 0;
+ *layouttypes = 0;
return status;
}

@@ -4851,7 +4854,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
if (status != 0)
goto xdr_error;
- status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
+ status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);
if (status != 0)
goto xdr_error;

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..20b7b1f4e041 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -70,7 +70,7 @@ out:
}

static struct pnfs_layoutdriver_type *
-find_pnfs_driver(u32 id)
+__find_pnfs_driver(u32 id)
{
struct pnfs_layoutdriver_type *local;

@@ -84,6 +84,22 @@ find_pnfs_driver(u32 id)
return local;
}

+static struct pnfs_layoutdriver_type *
+find_pnfs_driver(u32 id)
+{
+ struct pnfs_layoutdriver_type *ld_type;
+
+ ld_type = __find_pnfs_driver(id);
+ if (!ld_type) {
+ request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
+ ld_type = __find_pnfs_driver(id);
+ if (!ld_type)
+ dprintk("%s: No pNFS module found for %u.\n",
+ __func__, id);
+ }
+ return ld_type;
+}
+
void
unset_pnfs_layoutdriver(struct nfs_server *nfss)
{
@@ -102,44 +118,72 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
* Try to set the server's pnfs module to the pnfs layout type specified by id.
* Currently only one pNFS layout driver per filesystem is supported.
*
- * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
+ * @layouttypes: bitfield showing what layout types server supports
*/
void
set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
- u32 id)
+ u32 layouttypes)
{
struct pnfs_layoutdriver_type *ld_type = NULL;

- if (id == 0)
+ if (layouttypes == 0)
goto out_no_driver;
if (!(server->nfs_client->cl_exchange_flags &
(EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
- printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
- __func__, id, server->nfs_client->cl_exchange_flags);
+ printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",
+ __func__, layouttypes, server->nfs_client->cl_exchange_flags);
goto out_no_driver;
}
- ld_type = find_pnfs_driver(id);
- if (!ld_type) {
- request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
- ld_type = find_pnfs_driver(id);
- if (!ld_type) {
- dprintk("%s: No pNFS module found for %u.\n",
- __func__, id);
+
+ /*
+ * See if one of the layout types that we got handed is usable. We
+ * attempt in a hardcoded order of preference, in order of (assumed)
+ * decreasing speeds and functionality.
+ *
+ * FIXME: should this order be configurable in some fashion?
+ */
+ if (layouttypes & (1 << LAYOUT_SCSI)) {
+ ld_type = find_pnfs_driver(LAYOUT_SCSI);
+ if (ld_type)
+ goto set_driver;
+ }
+
+ if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {
+ ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);
+ if (ld_type)
+ goto set_driver;
+ }
+
+ if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {
+ ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);
+ if (ld_type)
+ goto set_driver;
+ }
+
+ if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {
+ ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);
+ if (ld_type)
+ goto set_driver;
+ }
+
+ if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {
+ ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);
+ if (!ld_type)
goto out_no_driver;
- }
}
+set_driver:
server->pnfs_curr_ld = ld_type;
if (ld_type->set_layoutdriver
&& ld_type->set_layoutdriver(server, mntfh)) {
printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
- "driver %u.\n", __func__, id);
+ "driver %u.\n", __func__, ld_type->id);
module_put(ld_type->owner);
goto out_no_driver;
}
/* Bump the MDS count */
atomic_inc(&server->nfs_client->cl_mds_count);

- dprintk("%s: pNFS module for %u set\n", __func__, id);
+ dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);
return;

out_no_driver:
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c304a11b5b1a..1f6bb59f05f2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -139,7 +139,7 @@ struct nfs_fsinfo {
__u64 maxfilesize;
struct timespec time_delta; /* server time granularity */
__u32 lease_time; /* in seconds */
- __u32 layouttype; /* supported pnfs layout driver */
+ __u32 layouttypes; /* supported pnfs layout drivers */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
};
--
2.5.5



2016-05-31 16:03:52

by Trond Myklebust

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

DQoNCk9uIDUvMzAvMTYsIDEyOjM1LCAiSmVmZiBMYXl0b24iIDxqbGF5dG9uQHBvb2NoaWVyZWRz
Lm5ldD4gd3JvdGU6DQoNCj5BbGxvdyB0aGUgY2xpZW50IHRvIGRlYWwgd2l0aCBzZXJ2ZXJzIHRo
YXQgaGFuZCBvdXQgbXVsdGlwbGUgbGF5b3V0DQo+dHlwZXMgZm9yIHRoZSBzYW1lIGZpbGVzeXN0
ZW0uIFdoZW4gdGhpcyBoYXBwZW5zLCB3ZSBwaWNrIHRoZSAiYmVzdCIgb25lLA0KPmJhc2VkIG9u
IGEgaGFyZGNvZGVkIGFzc3VtZWQgb3JkZXIgaW4gdGhlIGNsaWVudCBjb2RlLg0KPg0KPlNpZ25l
ZC1vZmYtYnk6IEplZmYgTGF5dG9uIDxqZWZmLmxheXRvbkBwcmltYXJ5ZGF0YS5jb20+DQo+LS0t
DQo+IGZzL25mcy9jbGllbnQuYyAgICAgICAgIHwgIDIgKy0NCj4gZnMvbmZzL25mczRwcm9jLmMg
ICAgICAgfCAgMiArLQ0KPiBmcy9uZnMvbmZzNHhkci5jICAgICAgICB8IDQxICsrKysrKysrKysr
KystLS0tLS0tLS0tLS0tDQo+IGZzL25mcy9wbmZzLmMgICAgICAgICAgIHwgNzYgKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrKysrKystLS0tLS0tLS0tLQ0KPiBpbmNsdWRlL2xpbnV4
L25mc194ZHIuaCB8ICAyICstDQo+IDUgZmlsZXMgY2hhbmdlZCwgODUgaW5zZXJ0aW9ucygrKSwg
MzggZGVsZXRpb25zKC0pDQo+DQo+ZGlmZiAtLWdpdCBhL2ZzL25mcy9jbGllbnQuYyBiL2ZzL25m
cy9jbGllbnQuYw0KPmluZGV4IDBjOTY1MjhkYjk0YS4uNTNiNDFmNGJkNDVhIDEwMDY0NA0KPi0t
LSBhL2ZzL25mcy9jbGllbnQuYw0KPisrKyBiL2ZzL25mcy9jbGllbnQuYw0KPkBAIC03ODcsNyAr
Nzg3LDcgQEAgaW50IG5mc19wcm9iZV9mc2luZm8oc3RydWN0IG5mc19zZXJ2ZXIgKnNlcnZlciwg
c3RydWN0IG5mc19maCAqbW50ZmgsIHN0cnVjdCBuZnMNCj4gCX0NCj4gDQo+IAlmc2luZm8uZmF0
dHIgPSBmYXR0cjsNCj4tCWZzaW5mby5sYXlvdXR0eXBlID0gMDsNCj4rCWZzaW5mby5sYXlvdXR0
eXBlcyA9IDA7DQo+IAllcnJvciA9IGNscC0+cnBjX29wcy0+ZnNpbmZvKHNlcnZlciwgbW50Zmgs
ICZmc2luZm8pOw0KPiAJaWYgKGVycm9yIDwgMCkNCj4gCQlnb3RvIG91dF9lcnJvcjsNCj5kaWZm
IC0tZ2l0IGEvZnMvbmZzL25mczRwcm9jLmMgYi9mcy9uZnMvbmZzNHByb2MuYw0KPmluZGV4IGRl
OTc1Njc3OTVhNS4uOTQ0NmFlZjg5YjQ4IDEwMDY0NA0KPi0tLSBhL2ZzL25mcy9uZnM0cHJvYy5j
DQo+KysrIGIvZnMvbmZzL25mczRwcm9jLmMNCj5AQCAtNDI1Miw3ICs0MjUyLDcgQEAgc3RhdGlj
IGludCBuZnM0X3Byb2NfZnNpbmZvKHN0cnVjdCBuZnNfc2VydmVyICpzZXJ2ZXIsIHN0cnVjdCBu
ZnNfZmggKmZoYW5kbGUsIHMNCj4gCWlmIChlcnJvciA9PSAwKSB7DQo+IAkJLyogYmxvY2sgbGF5
b3V0IGNoZWNrcyB0aGlzISAqLw0KPiAJCXNlcnZlci0+cG5mc19ibGtzaXplID0gZnNpbmZvLT5i
bGtzaXplOw0KPi0JCXNldF9wbmZzX2xheW91dGRyaXZlcihzZXJ2ZXIsIGZoYW5kbGUsIGZzaW5m
by0+bGF5b3V0dHlwZSk7DQo+KwkJc2V0X3BuZnNfbGF5b3V0ZHJpdmVyKHNlcnZlciwgZmhhbmRs
ZSwgZnNpbmZvLT5sYXlvdXR0eXBlcyk7DQo+IAl9DQo+IA0KPiAJcmV0dXJuIGVycm9yOw0KPmRp
ZmYgLS1naXQgYS9mcy9uZnMvbmZzNHhkci5jIGIvZnMvbmZzL25mczR4ZHIuYw0KPmluZGV4IDY2
MWU3NTNmZTFjOS4uODc2YTgwODAyYzFkIDEwMDY0NA0KPi0tLSBhL2ZzL25mcy9uZnM0eGRyLmMN
Cj4rKysgYi9mcy9uZnMvbmZzNHhkci5jDQo+QEAgLTQ3MjMsMzMgKzQ3MjMsMzYgQEAgc3RhdGlj
IGludCBkZWNvZGVfZ2V0ZmF0dHIoc3RydWN0IHhkcl9zdHJlYW0gKnhkciwgc3RydWN0IG5mc19m
YXR0ciAqZmF0dHIsDQo+ICAqIERlY29kZSBwb3RlbnRpYWxseSBtdWx0aXBsZSBsYXlvdXQgdHlw
ZXMuIEN1cnJlbnRseSB3ZSBvbmx5IHN1cHBvcnQNCj4gICogb25lIGxheW91dCBkcml2ZXIgcGVy
IGZpbGUgc3lzdGVtLg0KPiAgKi8NCj4tc3RhdGljIGludCBkZWNvZGVfZmlyc3RfcG5mc19sYXlv
dXRfdHlwZShzdHJ1Y3QgeGRyX3N0cmVhbSAqeGRyLA0KPi0JCQkJCSB1aW50MzJfdCAqbGF5b3V0
dHlwZSkNCj4rc3RhdGljIGludCBkZWNvZGVfcG5mc19sYXlvdXRfdHlwZXMoc3RydWN0IHhkcl9z
dHJlYW0gKnhkciwgdTMyICpsYXlvdXR0eXBlcykNCj4gew0KPiAJX19iZTMyICpwOw0KPiAJaW50
IG51bTsNCj4rCXUzMiB0eXBlOw0KPiANCj4gCXAgPSB4ZHJfaW5saW5lX2RlY29kZSh4ZHIsIDQp
Ow0KPiAJaWYgKHVubGlrZWx5KCFwKSkNCj4gCQlnb3RvIG91dF9vdmVyZmxvdzsNCj4gCW51bSA9
IGJlMzJfdG9fY3B1cChwKTsNCj4gDQo+LQkvKiBwTkZTIGlzIG5vdCBzdXBwb3J0ZWQgYnkgdGhl
IHVuZGVybHlpbmcgZmlsZSBzeXN0ZW0gKi8NCj4tCWlmIChudW0gPT0gMCkgew0KPi0JCSpsYXlv
dXR0eXBlID0gMDsNCj4tCQlyZXR1cm4gMDsNCj4tCX0NCj4tCWlmIChudW0gPiAxKQ0KPi0JCXBy
aW50ayhLRVJOX0lORk8gIk5GUzogJXM6IFdhcm5pbmc6IE11bHRpcGxlIHBORlMgbGF5b3V0ICIN
Cj4tCQkJImRyaXZlcnMgcGVyIGZpbGVzeXN0ZW0gbm90IHN1cHBvcnRlZFxuIiwgX19mdW5jX18p
Ow0KPisJKmxheW91dHR5cGVzID0gMDsNCj4gDQo+LQkvKiBEZWNvZGUgYW5kIHNldCBmaXJzdCBs
YXlvdXQgdHlwZSwgbW92ZSB4ZHItPnAgcGFzdCB1bnVzZWQgdHlwZXMgKi8NCj4tCXAgPSB4ZHJf
aW5saW5lX2RlY29kZSh4ZHIsIG51bSAqIDQpOw0KPi0JaWYgKHVubGlrZWx5KCFwKSkNCj4tCQln
b3RvIG91dF9vdmVyZmxvdzsNCj4tCSpsYXlvdXR0eXBlID0gYmUzMl90b19jcHVwKHApOw0KPisJ
Zm9yICg7IG51bTsgLS1udW0pIHsNCj4rCQlwID0geGRyX2lubGluZV9kZWNvZGUoeGRyLCA0KTsN
Cj4rDQo+KwkJaWYgKHVubGlrZWx5KCFwKSkNCj4rCQkJZ290byBvdXRfb3ZlcmZsb3c7DQo+Kw0K
PisJCXR5cGUgPSBiZTMyX3RvX2NwdXAocCk7DQo+Kw0KPisJCS8qIElnbm9yZSBhbnkgdGhhdCB3
ZSBkb24ndCB1bmRlcnN0YW5kICovDQo+KwkJaWYgKHVubGlrZWx5KHR5cGUgPj0gTEFZT1VUX1RZ
UEVfTUFYKSkNCg0KVGhpcyB3aWxsIGluIGVmZmVjdCBoYXJkIGNvZGUgdGhlIGxheW91dHMgdGhh
dCB0aGUgY2xpZW50IHN1cHBvcnRzLiBMQVlPVVRfVFlQRV9NQVggaXMgc29tZXRoaW5nIHRoYXQg
YXBwbGllcyB0byBrbmZzZCBvbmx5IGZvciBub3cuIExldOKAmXMgbm90IGxlYWsgaXQgaW50byB0
aGUgY2xpZW50LiBJIHN1Z2dlc3QganVzdCBtYWtpbmcgdGhpcyA4KnNpemVvZigqbGF5b3V0dHlw
ZXMpLg0KDQo+KwkJCWNvbnRpbnVlOw0KPisNCj4rCQkqbGF5b3V0dHlwZXMgfD0gMSA8PCB0eXBl
Ow0KPisJfQ0KPiAJcmV0dXJuIDA7DQo+IG91dF9vdmVyZmxvdzoNCj4rCSpsYXlvdXR0eXBlcyA9
IDA7DQo+IAlwcmludF9vdmVyZmxvd19tc2coX19mdW5jX18sIHhkcik7DQo+IAlyZXR1cm4gLUVJ
TzsNCj4gfQ0KPkBAIC00NzU5LDcgKzQ3NjIsNyBAQCBvdXRfb3ZlcmZsb3c6DQo+ICAqIE5vdGUg
d2UgbXVzdCBlbnN1cmUgdGhhdCBsYXlvdXR0eXBlIGlzIHNldCBpbiBhbnkgbm9uLWVycm9yIGNh
c2UuDQo+ICAqLw0KPiBzdGF0aWMgaW50IGRlY29kZV9hdHRyX3BuZnN0eXBlKHN0cnVjdCB4ZHJf
c3RyZWFtICp4ZHIsIHVpbnQzMl90ICpiaXRtYXAsDQo+LQkJCQl1aW50MzJfdCAqbGF5b3V0dHlw
ZSkNCj4rCQkJCV9fdTMyICpsYXlvdXR0eXBlcykNCj4gew0KPiAJaW50IHN0YXR1cyA9IDA7DQo+
IA0KPkBAIC00NzY3LDEwICs0NzcwLDEwIEBAIHN0YXRpYyBpbnQgZGVjb2RlX2F0dHJfcG5mc3R5
cGUoc3RydWN0IHhkcl9zdHJlYW0gKnhkciwgdWludDMyX3QgKmJpdG1hcCwNCj4gCWlmICh1bmxp
a2VseShiaXRtYXBbMV0gJiAoRkFUVFI0X1dPUkQxX0ZTX0xBWU9VVF9UWVBFUyAtIDFVKSkpDQo+
IAkJcmV0dXJuIC1FSU87DQo+IAlpZiAoYml0bWFwWzFdICYgRkFUVFI0X1dPUkQxX0ZTX0xBWU9V
VF9UWVBFUykgew0KPi0JCXN0YXR1cyA9IGRlY29kZV9maXJzdF9wbmZzX2xheW91dF90eXBlKHhk
ciwgbGF5b3V0dHlwZSk7DQo+KwkJc3RhdHVzID0gZGVjb2RlX3BuZnNfbGF5b3V0X3R5cGVzKHhk
ciwgbGF5b3V0dHlwZXMpOw0KPiAJCWJpdG1hcFsxXSAmPSB+RkFUVFI0X1dPUkQxX0ZTX0xBWU9V
VF9UWVBFUzsNCj4gCX0gZWxzZQ0KPi0JCSpsYXlvdXR0eXBlID0gMDsNCj4rCQkqbGF5b3V0dHlw
ZXMgPSAwOw0KPiAJcmV0dXJuIHN0YXR1czsNCj4gfQ0KPiANCj5AQCAtNDg1MSw3ICs0ODU0LDcg
QEAgc3RhdGljIGludCBkZWNvZGVfZnNpbmZvKHN0cnVjdCB4ZHJfc3RyZWFtICp4ZHIsIHN0cnVj
dCBuZnNfZnNpbmZvICpmc2luZm8pDQo+IAlzdGF0dXMgPSBkZWNvZGVfYXR0cl90aW1lX2RlbHRh
KHhkciwgYml0bWFwLCAmZnNpbmZvLT50aW1lX2RlbHRhKTsNCj4gCWlmIChzdGF0dXMgIT0gMCkN
Cj4gCQlnb3RvIHhkcl9lcnJvcjsNCj4tCXN0YXR1cyA9IGRlY29kZV9hdHRyX3BuZnN0eXBlKHhk
ciwgYml0bWFwLCAmZnNpbmZvLT5sYXlvdXR0eXBlKTsNCj4rCXN0YXR1cyA9IGRlY29kZV9hdHRy
X3BuZnN0eXBlKHhkciwgYml0bWFwLCAmZnNpbmZvLT5sYXlvdXR0eXBlcyk7DQo+IAlpZiAoc3Rh
dHVzICE9IDApDQo+IAkJZ290byB4ZHJfZXJyb3I7DQo+IA0KPmRpZmYgLS1naXQgYS9mcy9uZnMv
cG5mcy5jIGIvZnMvbmZzL3BuZnMuYw0KPmluZGV4IDBjN2UwZDQ1YTRkZS4uMjBiN2IxZjRlMDQx
IDEwMDY0NA0KPi0tLSBhL2ZzL25mcy9wbmZzLmMNCj4rKysgYi9mcy9uZnMvcG5mcy5jDQo+QEAg
LTcwLDcgKzcwLDcgQEAgb3V0Og0KPiB9DQo+IA0KPiBzdGF0aWMgc3RydWN0IHBuZnNfbGF5b3V0
ZHJpdmVyX3R5cGUgKg0KPi1maW5kX3BuZnNfZHJpdmVyKHUzMiBpZCkNCj4rX19maW5kX3BuZnNf
ZHJpdmVyKHUzMiBpZCkNCj4gew0KPiAJc3RydWN0IHBuZnNfbGF5b3V0ZHJpdmVyX3R5cGUgKmxv
Y2FsOw0KPiANCj5AQCAtODQsNiArODQsMjIgQEAgZmluZF9wbmZzX2RyaXZlcih1MzIgaWQpDQo+
IAlyZXR1cm4gbG9jYWw7DQo+IH0NCj4gDQo+K3N0YXRpYyBzdHJ1Y3QgcG5mc19sYXlvdXRkcml2
ZXJfdHlwZSAqDQo+K2ZpbmRfcG5mc19kcml2ZXIodTMyIGlkKQ0KPit7DQo+KwlzdHJ1Y3QgcG5m
c19sYXlvdXRkcml2ZXJfdHlwZSAqbGRfdHlwZTsNCj4rDQo+KwlsZF90eXBlID0gX19maW5kX3Bu
ZnNfZHJpdmVyKGlkKTsNCj4rCWlmICghbGRfdHlwZSkgew0KPisJCXJlcXVlc3RfbW9kdWxlKCIl
cy0ldSIsIExBWU9VVF9ORlNWNF8xX01PRFVMRV9QUkVGSVgsIGlkKTsNCj4rCQlsZF90eXBlID0g
X19maW5kX3BuZnNfZHJpdmVyKGlkKTsNCj4rCQlpZiAoIWxkX3R5cGUpDQo+KwkJCWRwcmludGso
IiVzOiBObyBwTkZTIG1vZHVsZSBmb3VuZCBmb3IgJXUuXG4iLA0KPisJCQkJX19mdW5jX18sIGlk
KTsNCj4rCX0NCj4rCXJldHVybiBsZF90eXBlOw0KPit9DQo+Kw0KPiB2b2lkDQo+IHVuc2V0X3Bu
ZnNfbGF5b3V0ZHJpdmVyKHN0cnVjdCBuZnNfc2VydmVyICpuZnNzKQ0KPiB7DQo+QEAgLTEwMiw0
NCArMTE4LDcyIEBAIHVuc2V0X3BuZnNfbGF5b3V0ZHJpdmVyKHN0cnVjdCBuZnNfc2VydmVyICpu
ZnNzKQ0KPiAgKiBUcnkgdG8gc2V0IHRoZSBzZXJ2ZXIncyBwbmZzIG1vZHVsZSB0byB0aGUgcG5m
cyBsYXlvdXQgdHlwZSBzcGVjaWZpZWQgYnkgaWQuDQo+ICAqIEN1cnJlbnRseSBvbmx5IG9uZSBw
TkZTIGxheW91dCBkcml2ZXIgcGVyIGZpbGVzeXN0ZW0gaXMgc3VwcG9ydGVkLg0KPiAgKg0KPi0g
KiBAaWQgbGF5b3V0IHR5cGUuIFplcm8gKGlsbGVnYWwgbGF5b3V0IHR5cGUpIGluZGljYXRlcyBw
TkZTIG5vdCBpbiB1c2UuDQo+KyAqIEBsYXlvdXR0eXBlczogYml0ZmllbGQgc2hvd2luZyB3aGF0
IGxheW91dCB0eXBlcyBzZXJ2ZXIgc3VwcG9ydHMNCj4gICovDQo+IHZvaWQNCj4gc2V0X3BuZnNf
bGF5b3V0ZHJpdmVyKHN0cnVjdCBuZnNfc2VydmVyICpzZXJ2ZXIsIGNvbnN0IHN0cnVjdCBuZnNf
ZmggKm1udGZoLA0KPi0JCSAgICAgIHUzMiBpZCkNCj4rCQkgICAgICB1MzIgbGF5b3V0dHlwZXMp
DQo+IHsNCj4gCXN0cnVjdCBwbmZzX2xheW91dGRyaXZlcl90eXBlICpsZF90eXBlID0gTlVMTDsN
Cj4gDQo+LQlpZiAoaWQgPT0gMCkNCj4rCWlmIChsYXlvdXR0eXBlcyA9PSAwKQ0KPiAJCWdvdG8g
b3V0X25vX2RyaXZlcjsNCj4gCWlmICghKHNlcnZlci0+bmZzX2NsaWVudC0+Y2xfZXhjaGFuZ2Vf
ZmxhZ3MgJg0KPiAJCSAoRVhDSEdJRDRfRkxBR19VU0VfTk9OX1BORlMgfCBFWENIR0lENF9GTEFH
X1VTRV9QTkZTX01EUykpKSB7DQo+LQkJcHJpbnRrKEtFUk5fRVJSICJORlM6ICVzOiBpZCAldSBj
bF9leGNoYW5nZV9mbGFncyAweCV4XG4iLA0KPi0JCQlfX2Z1bmNfXywgaWQsIHNlcnZlci0+bmZz
X2NsaWVudC0+Y2xfZXhjaGFuZ2VfZmxhZ3MpOw0KPisJCXByaW50ayhLRVJOX0VSUiAiTkZTOiAl
czogbGF5b3V0dHlwZXMgMHgleCBjbF9leGNoYW5nZV9mbGFncyAweCV4XG4iLA0KPisJCQlfX2Z1
bmNfXywgbGF5b3V0dHlwZXMsIHNlcnZlci0+bmZzX2NsaWVudC0+Y2xfZXhjaGFuZ2VfZmxhZ3Mp
Ow0KPiAJCWdvdG8gb3V0X25vX2RyaXZlcjsNCj4gCX0NCj4tCWxkX3R5cGUgPSBmaW5kX3BuZnNf
ZHJpdmVyKGlkKTsNCj4tCWlmICghbGRfdHlwZSkgew0KPi0JCXJlcXVlc3RfbW9kdWxlKCIlcy0l
dSIsIExBWU9VVF9ORlNWNF8xX01PRFVMRV9QUkVGSVgsIGlkKTsNCj4tCQlsZF90eXBlID0gZmlu
ZF9wbmZzX2RyaXZlcihpZCk7DQo+LQkJaWYgKCFsZF90eXBlKSB7DQo+LQkJCWRwcmludGsoIiVz
OiBObyBwTkZTIG1vZHVsZSBmb3VuZCBmb3IgJXUuXG4iLA0KPi0JCQkJX19mdW5jX18sIGlkKTsN
Cj4rDQo+KwkvKg0KPisJICogU2VlIGlmIG9uZSBvZiB0aGUgbGF5b3V0IHR5cGVzIHRoYXQgd2Ug
Z290IGhhbmRlZCBpcyB1c2FibGUuIFdlDQo+KwkgKiBhdHRlbXB0IGluIGEgaGFyZGNvZGVkIG9y
ZGVyIG9mIHByZWZlcmVuY2UsIGluIG9yZGVyIG9mIChhc3N1bWVkKQ0KPisJICogZGVjcmVhc2lu
ZyBzcGVlZHMgYW5kIGZ1bmN0aW9uYWxpdHkuDQo+KwkgKg0KPisJICogRklYTUU6IHNob3VsZCB0
aGlzIG9yZGVyIGJlIGNvbmZpZ3VyYWJsZSBpbiBzb21lIGZhc2hpb24/DQo+KwkgKi8NCj4rCWlm
IChsYXlvdXR0eXBlcyAmICgxIDw8IExBWU9VVF9TQ1NJKSkgew0KPisJCWxkX3R5cGUgPSBmaW5k
X3BuZnNfZHJpdmVyKExBWU9VVF9TQ1NJKTsNCj4rCQlpZiAobGRfdHlwZSkNCj4rCQkJZ290byBz
ZXRfZHJpdmVyOw0KPisJfQ0KPisNCj4rCWlmIChsYXlvdXR0eXBlcyAmICgxIDw8IExBWU9VVF9C
TE9DS19WT0xVTUUpKSB7DQo+KwkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2ZXIoTEFZT1VUX0JM
T0NLX1ZPTFVNRSk7DQo+KwkJaWYgKGxkX3R5cGUpDQo+KwkJCWdvdG8gc2V0X2RyaXZlcjsNCj4r
CX0NCj4rDQo+KwlpZiAobGF5b3V0dHlwZXMgJiAoMSA8PCBMQVlPVVRfT1NEMl9PQkpFQ1RTKSkg
ew0KPisJCWxkX3R5cGUgPSBmaW5kX3BuZnNfZHJpdmVyKExBWU9VVF9PU0QyX09CSkVDVFMpOw0K
PisJCWlmIChsZF90eXBlKQ0KPisJCQlnb3RvIHNldF9kcml2ZXI7DQo+Kwl9DQo+Kw0KPisJaWYg
KGxheW91dHR5cGVzICYgKDEgPDwgTEFZT1VUX0ZMRVhfRklMRVMpKSB7DQo+KwkJbGRfdHlwZSA9
IGZpbmRfcG5mc19kcml2ZXIoTEFZT1VUX0ZMRVhfRklMRVMpOw0KPisJCWlmIChsZF90eXBlKQ0K
PisJCQlnb3RvIHNldF9kcml2ZXI7DQo+Kwl9DQo+Kw0KPisJaWYgKGxheW91dHR5cGVzICYgKDEg
PDwgTEFZT1VUX05GU1Y0XzFfRklMRVMpKSB7DQo+KwkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2
ZXIoTEFZT1VUX05GU1Y0XzFfRklMRVMpOw0KPisJCWlmICghbGRfdHlwZSkNCj4gCQkJZ290byBv
dXRfbm9fZHJpdmVyOw0KPi0JCX0NCj4gCX0NCj4rc2V0X2RyaXZlcjoNCj4gCXNlcnZlci0+cG5m
c19jdXJyX2xkID0gbGRfdHlwZTsNCj4gCWlmIChsZF90eXBlLT5zZXRfbGF5b3V0ZHJpdmVyDQo+
IAkgICAgJiYgbGRfdHlwZS0+c2V0X2xheW91dGRyaXZlcihzZXJ2ZXIsIG1udGZoKSkgew0KPiAJ
CXByaW50ayhLRVJOX0VSUiAiTkZTOiAlczogRXJyb3IgaW5pdGlhbGl6aW5nIHBORlMgbGF5b3V0
ICINCj4tCQkJImRyaXZlciAldS5cbiIsIF9fZnVuY19fLCBpZCk7DQo+KwkJCSJkcml2ZXIgJXUu
XG4iLCBfX2Z1bmNfXywgbGRfdHlwZS0+aWQpOw0KPiAJCW1vZHVsZV9wdXQobGRfdHlwZS0+b3du
ZXIpOw0KPiAJCWdvdG8gb3V0X25vX2RyaXZlcjsNCj4gCX0NCj4gCS8qIEJ1bXAgdGhlIE1EUyBj
b3VudCAqLw0KPiAJYXRvbWljX2luYygmc2VydmVyLT5uZnNfY2xpZW50LT5jbF9tZHNfY291bnQp
Ow0KPiANCj4tCWRwcmludGsoIiVzOiBwTkZTIG1vZHVsZSBmb3IgJXUgc2V0XG4iLCBfX2Z1bmNf
XywgaWQpOw0KPisJZHByaW50aygiJXM6IHBORlMgbW9kdWxlIGZvciAldSBzZXRcbiIsIF9fZnVu
Y19fLCBsZF90eXBlLT5pZCk7DQo+IAlyZXR1cm47DQo+IA0KPiBvdXRfbm9fZHJpdmVyOg0KPmRp
ZmYgLS1naXQgYS9pbmNsdWRlL2xpbnV4L25mc194ZHIuaCBiL2luY2x1ZGUvbGludXgvbmZzX3hk
ci5oDQo+aW5kZXggYzMwNGExMWI1YjFhLi4xZjZiYjU5ZjA1ZjIgMTAwNjQ0DQo+LS0tIGEvaW5j
bHVkZS9saW51eC9uZnNfeGRyLmgNCj4rKysgYi9pbmNsdWRlL2xpbnV4L25mc194ZHIuaA0KPkBA
IC0xMzksNyArMTM5LDcgQEAgc3RydWN0IG5mc19mc2luZm8gew0KPiAJX191NjQJCQltYXhmaWxl
c2l6ZTsNCj4gCXN0cnVjdCB0aW1lc3BlYwkJdGltZV9kZWx0YTsgLyogc2VydmVyIHRpbWUgZ3Jh
bnVsYXJpdHkgKi8NCj4gCV9fdTMyCQkJbGVhc2VfdGltZTsgLyogaW4gc2Vjb25kcyAqLw0KPi0J
X191MzIJCQlsYXlvdXR0eXBlOyAvKiBzdXBwb3J0ZWQgcG5mcyBsYXlvdXQgZHJpdmVyICovDQo+
KwlfX3UzMgkJCWxheW91dHR5cGVzOyAvKiBzdXBwb3J0ZWQgcG5mcyBsYXlvdXQgZHJpdmVycyAq
Lw0KPiAJX191MzIJCQlibGtzaXplOyAvKiBwcmVmZXJyZWQgcG5mcyBpbyBibG9jayBzaXplICov
DQo+IAlfX3UzMgkJCWNsb25lX2Jsa3NpemU7IC8qIGdyYW51bGFyaXR5IG9mIGEgQ0xPTkUgb3Bl
cmF0aW9uICovDQo+IH07DQo+LS0gDQo+Mi41LjUNCj4NCg0K


2016-05-31 21:09:47

by Jeff Layton

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
>
> On 5/30/16, 12:35, "Jeff Layton" <[email protected]> wrote:
>
> > Allow the client to deal with servers that hand out multiple layout
> > types for the same filesystem. When this happens, we pick the "best" one,
> > based on a hardcoded assumed order in the client code.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > fs/nfs/client.c         |  2 +-
> > fs/nfs/nfs4proc.c       |  2 +-
> > fs/nfs/nfs4xdr.c        | 41 +++++++++++++-------------
> > fs/nfs/pnfs.c           | 76 ++++++++++++++++++++++++++++++++++++++-----------
> > include/linux/nfs_xdr.h |  2 +-
> > 5 files changed, 85 insertions(+), 38 deletions(-)
> >
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 0c96528db94a..53b41f4bd45a 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> > }
> >
> > fsinfo.fattr = fattr;
> > - fsinfo.layouttype = 0;
> > + fsinfo.layouttypes = 0;
> > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > if (error < 0)
> > goto out_error;
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index de97567795a5..9446aef89b48 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> > if (error == 0) {
> > /* block layout checks this! */
> > server->pnfs_blksize = fsinfo->blksize;
> > - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > + set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> > }
> >
> > return error;
> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > index 661e753fe1c9..876a80802c1d 100644
> > --- a/fs/nfs/nfs4xdr.c
> > +++ b/fs/nfs/nfs4xdr.c
> > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> >  * Decode potentially multiple layout types. Currently we only support
> >  * one layout driver per file system.
> >  */
> > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> > -  uint32_t *layouttype)
> > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> > {
> > __be32 *p;
> > int num;
> > + u32 type;
> >
> > p = xdr_inline_decode(xdr, 4);
> > if (unlikely(!p))
> > goto out_overflow;
> > num = be32_to_cpup(p);
> >
> > - /* pNFS is not supported by the underlying file system */
> > - if (num == 0) {
> > - *layouttype = 0;
> > - return 0;
> > - }
> > - if (num > 1)
> > - printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> > - "drivers per filesystem not supported\n", __func__);
> > + *layouttypes = 0;
> >
> > - /* Decode and set first layout type, move xdr->p past unused types */
> > - p = xdr_inline_decode(xdr, num * 4);
> > - if (unlikely(!p))
> > - goto out_overflow;
> > - *layouttype = be32_to_cpup(p);
> > + for (; num; --num) {
> > + p = xdr_inline_decode(xdr, 4);
> > +
> > + if (unlikely(!p))
> > + goto out_overflow;
> > +
> > + type = be32_to_cpup(p);
> > +
> > + /* Ignore any that we don't understand */
> > + if (unlikely(type >= LAYOUT_TYPE_MAX))
>
> This will in effect hard code the layouts that the client supports.
> LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> Let’s not leak it into the client. I suggest just making this
> 8*sizeof(*layouttypes).
>

Fair enough. I'll make that change.

That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
that enum is used in both the client and the server code, AFAICT. If we
add a new LAYOUT_* value to that enum for the client, then we'll need
to increase that value anyway. So, I'm not sure I understand how this
limits the client in any way...


> > + continue;
> > +
> > + *layouttypes |= 1 << type;
> > + }
> > return 0;
> > out_overflow:
> > + *layouttypes = 0;
> > print_overflow_msg(__func__, xdr);
> > return -EIO;
> > }
> > @@ -4759,7 +4762,7 @@ out_overflow:
> >  * Note we must ensure that layouttype is set in any non-error case.
> >  */
> > static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> > - uint32_t *layouttype)
> > + __u32 *layouttypes)
> > {
> > int status = 0;
> >
> > @@ -4767,10 +4770,10 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> > if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
> > return -EIO;
> > if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
> > - status = decode_first_pnfs_layout_type(xdr, layouttype);
> > + status = decode_pnfs_layout_types(xdr, layouttypes);
> > bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
> > } else
> > - *layouttype = 0;
> > + *layouttypes = 0;
> > return status;
> > }
> >
> > @@ -4851,7 +4854,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
> > status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
> > if (status != 0)
> > goto xdr_error;
> > - status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
> > + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);
> > if (status != 0)
> > goto xdr_error;
> >
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 0c7e0d45a4de..20b7b1f4e041 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -70,7 +70,7 @@ out:
> > }
> >
> > static struct pnfs_layoutdriver_type *
> > -find_pnfs_driver(u32 id)
> > +__find_pnfs_driver(u32 id)
> > {
> > struct pnfs_layoutdriver_type *local;
> >
> > @@ -84,6 +84,22 @@ find_pnfs_driver(u32 id)
> > return local;
> > }
> >
> > +static struct pnfs_layoutdriver_type *
> > +find_pnfs_driver(u32 id)
> > +{
> > + struct pnfs_layoutdriver_type *ld_type;
> > +
> > + ld_type = __find_pnfs_driver(id);
> > + if (!ld_type) {
> > + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> > + ld_type = __find_pnfs_driver(id);
> > + if (!ld_type)
> > + dprintk("%s: No pNFS module found for %u.\n",
> > + __func__, id);
> > + }
> > + return ld_type;
> > +}
> > +
> > void
> > unset_pnfs_layoutdriver(struct nfs_server *nfss)
> > {
> > @@ -102,44 +118,72 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >  * Try to set the server's pnfs module to the pnfs layout type specified by id.
> >  * Currently only one pNFS layout driver per filesystem is supported.
> >  *
> > - * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
> > + * @layouttypes: bitfield showing what layout types server supports
> >  */
> > void
> > set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> > -       u32 id)
> > +       u32 layouttypes)
> > {
> > struct pnfs_layoutdriver_type *ld_type = NULL;
> >
> > - if (id == 0)
> > + if (layouttypes == 0)
> > goto out_no_driver;
> > if (!(server->nfs_client->cl_exchange_flags &
> >  (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
> > - printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
> > - __func__, id, server->nfs_client->cl_exchange_flags);
> > + printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",
> > + __func__, layouttypes, server->nfs_client->cl_exchange_flags);
> > goto out_no_driver;
> > }
> > - ld_type = find_pnfs_driver(id);
> > - if (!ld_type) {
> > - request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> > - ld_type = find_pnfs_driver(id);
> > - if (!ld_type) {
> > - dprintk("%s: No pNFS module found for %u.\n",
> > - __func__, id);
> > +
> > + /*
> > +  * See if one of the layout types that we got handed is usable. We
> > +  * attempt in a hardcoded order of preference, in order of (assumed)
> > +  * decreasing speeds and functionality.
> > +  *
> > +  * FIXME: should this order be configurable in some fashion?
> > +  */
> > + if (layouttypes & (1 << LAYOUT_SCSI)) {
> > + ld_type = find_pnfs_driver(LAYOUT_SCSI);
> > + if (ld_type)
> > + goto set_driver;
> > + }
> > +
> > + if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {
> > + ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);
> > + if (ld_type)
> > + goto set_driver;
> > + }
> > +
> > + if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {
> > + ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);
> > + if (ld_type)
> > + goto set_driver;
> > + }
> > +
> > + if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {
> > + ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);
> > + if (ld_type)
> > + goto set_driver;
> > + }
> > +
> > + if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {
> > + ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);
> > + if (!ld_type)
> > goto out_no_driver;
> > - }
> > }
> > +set_driver:
> > server->pnfs_curr_ld = ld_type;
> > if (ld_type->set_layoutdriver
> >     && ld_type->set_layoutdriver(server, mntfh)) {
> > printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
> > - "driver %u.\n", __func__, id);
> > + "driver %u.\n", __func__, ld_type->id);
> > module_put(ld_type->owner);
> > goto out_no_driver;
> > }
> > /* Bump the MDS count */
> > atomic_inc(&server->nfs_client->cl_mds_count);
> >
> > - dprintk("%s: pNFS module for %u set\n", __func__, id);
> > + dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);
> > return;
> >
> > out_no_driver:
> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > index c304a11b5b1a..1f6bb59f05f2 100644
> > --- a/include/linux/nfs_xdr.h
> > +++ b/include/linux/nfs_xdr.h
> > @@ -139,7 +139,7 @@ struct nfs_fsinfo {
> > __u64 maxfilesize;
> > struct timespec time_delta; /* server time granularity */
> > __u32 lease_time; /* in seconds */
> > - __u32 layouttype; /* supported pnfs layout driver */
> > + __u32 layouttypes; /* supported pnfs layout drivers */
> > __u32 blksize; /* preferred pnfs io block size */
> > __u32 clone_blksize; /* granularity of a CLONE operation */
> > };
> > -- 
> > 2.5.5
> >
>
> NrybXǧv^)޺{.n+{"^nrzh&Gh(階ݢj"mzޖfh~m
--
Jeff Layton <[email protected]>

2016-05-31 21:41:54

by Trond Myklebust

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

DQoNCk9uIDUvMzEvMTYsIDE3OjA5LCAiSmVmZiBMYXl0b24iIDxqbGF5dG9uQHBvb2NoaWVyZWRz
Lm5ldD4gd3JvdGU6DQoNCj5PbiBUdWUsIDIwMTYtMDUtMzEgYXQgMTY6MDMgKzAwMDAsIFRyb25k
IE15a2xlYnVzdCB3cm90ZToNCj4+IA0KPj4gT24gNS8zMC8xNiwgMTI6MzUsICJKZWZmIExheXRv
biIgPGpsYXl0b25AcG9vY2hpZXJlZHMubmV0PiB3cm90ZToNCj4+IA0KPj4gPiBBbGxvdyB0aGUg
Y2xpZW50IHRvIGRlYWwgd2l0aCBzZXJ2ZXJzIHRoYXQgaGFuZCBvdXQgbXVsdGlwbGUgbGF5b3V0
DQo+PiA+IHR5cGVzIGZvciB0aGUgc2FtZSBmaWxlc3lzdGVtLiBXaGVuIHRoaXMgaGFwcGVucywg
d2UgcGljayB0aGUgImJlc3QiIG9uZSwNCj4+ID4gYmFzZWQgb24gYSBoYXJkY29kZWQgYXNzdW1l
ZCBvcmRlciBpbiB0aGUgY2xpZW50IGNvZGUuDQo+PiA+IA0KPj4gPiBTaWduZWQtb2ZmLWJ5OiBK
ZWZmIExheXRvbiA8amVmZi5sYXl0b25AcHJpbWFyeWRhdGEuY29tPg0KPj4gPiAtLS0NCj4+ID4g
ZnMvbmZzL2NsaWVudC5jICAgICAgICAgfCAgMiArLQ0KPj4gPiBmcy9uZnMvbmZzNHByb2MuYyAg
ICAgICB8ICAyICstDQo+PiA+IGZzL25mcy9uZnM0eGRyLmMgICAgICAgIHwgNDEgKysrKysrKysr
KysrKy0tLS0tLS0tLS0tLS0NCj4+ID4gZnMvbmZzL3BuZnMuYyAgICAgICAgICAgfCA3NiArKysr
KysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKy0tLS0tLS0tLS0tDQo+PiA+IGluY2x1
ZGUvbGludXgvbmZzX3hkci5oIHwgIDIgKy0NCj4+ID4gNSBmaWxlcyBjaGFuZ2VkLCA4NSBpbnNl
cnRpb25zKCspLCAzOCBkZWxldGlvbnMoLSkNCj4+ID4gDQo+PiA+IGRpZmYgLS1naXQgYS9mcy9u
ZnMvY2xpZW50LmMgYi9mcy9uZnMvY2xpZW50LmMNCj4+ID4gaW5kZXggMGM5NjUyOGRiOTRhLi41
M2I0MWY0YmQ0NWEgMTAwNjQ0DQo+PiA+IC0tLSBhL2ZzL25mcy9jbGllbnQuYw0KPj4gPiArKysg
Yi9mcy9uZnMvY2xpZW50LmMNCj4+ID4gQEAgLTc4Nyw3ICs3ODcsNyBAQCBpbnQgbmZzX3Byb2Jl
X2ZzaW5mbyhzdHJ1Y3QgbmZzX3NlcnZlciAqc2VydmVyLCBzdHJ1Y3QgbmZzX2ZoICptbnRmaCwg
c3RydWN0IG5mcw0KPj4gPiAJfQ0KPj4gPiANCj4+ID4gCWZzaW5mby5mYXR0ciA9IGZhdHRyOw0K
Pj4gPiAtCWZzaW5mby5sYXlvdXR0eXBlID0gMDsNCj4+ID4gKwlmc2luZm8ubGF5b3V0dHlwZXMg
PSAwOw0KPj4gPiAJZXJyb3IgPSBjbHAtPnJwY19vcHMtPmZzaW5mbyhzZXJ2ZXIsIG1udGZoLCAm
ZnNpbmZvKTsNCj4+ID4gCWlmIChlcnJvciA8IDApDQo+PiA+IAkJZ290byBvdXRfZXJyb3I7DQo+
PiA+IGRpZmYgLS1naXQgYS9mcy9uZnMvbmZzNHByb2MuYyBiL2ZzL25mcy9uZnM0cHJvYy5jDQo+
PiA+IGluZGV4IGRlOTc1Njc3OTVhNS4uOTQ0NmFlZjg5YjQ4IDEwMDY0NA0KPj4gPiAtLS0gYS9m
cy9uZnMvbmZzNHByb2MuYw0KPj4gPiArKysgYi9mcy9uZnMvbmZzNHByb2MuYw0KPj4gPiBAQCAt
NDI1Miw3ICs0MjUyLDcgQEAgc3RhdGljIGludCBuZnM0X3Byb2NfZnNpbmZvKHN0cnVjdCBuZnNf
c2VydmVyICpzZXJ2ZXIsIHN0cnVjdCBuZnNfZmggKmZoYW5kbGUsIHMNCj4+ID4gCWlmIChlcnJv
ciA9PSAwKSB7DQo+PiA+IAkJLyogYmxvY2sgbGF5b3V0IGNoZWNrcyB0aGlzISAqLw0KPj4gPiAJ
CXNlcnZlci0+cG5mc19ibGtzaXplID0gZnNpbmZvLT5ibGtzaXplOw0KPj4gPiAtCQlzZXRfcG5m
c19sYXlvdXRkcml2ZXIoc2VydmVyLCBmaGFuZGxlLCBmc2luZm8tPmxheW91dHR5cGUpOw0KPj4g
PiArCQlzZXRfcG5mc19sYXlvdXRkcml2ZXIoc2VydmVyLCBmaGFuZGxlLCBmc2luZm8tPmxheW91
dHR5cGVzKTsNCj4+ID4gCX0NCj4+ID4gDQo+PiA+IAlyZXR1cm4gZXJyb3I7DQo+PiA+IGRpZmYg
LS1naXQgYS9mcy9uZnMvbmZzNHhkci5jIGIvZnMvbmZzL25mczR4ZHIuYw0KPj4gPiBpbmRleCA2
NjFlNzUzZmUxYzkuLjg3NmE4MDgwMmMxZCAxMDA2NDQNCj4+ID4gLS0tIGEvZnMvbmZzL25mczR4
ZHIuYw0KPj4gPiArKysgYi9mcy9uZnMvbmZzNHhkci5jDQo+PiA+IEBAIC00NzIzLDMzICs0NzIz
LDM2IEBAIHN0YXRpYyBpbnQgZGVjb2RlX2dldGZhdHRyKHN0cnVjdCB4ZHJfc3RyZWFtICp4ZHIs
IHN0cnVjdCBuZnNfZmF0dHIgKmZhdHRyLA0KPj4gPiAgKiBEZWNvZGUgcG90ZW50aWFsbHkgbXVs
dGlwbGUgbGF5b3V0IHR5cGVzLiBDdXJyZW50bHkgd2Ugb25seSBzdXBwb3J0DQo+PiA+ICAqIG9u
ZSBsYXlvdXQgZHJpdmVyIHBlciBmaWxlIHN5c3RlbS4NCj4+ID4gICovDQo+PiA+IC1zdGF0aWMg
aW50IGRlY29kZV9maXJzdF9wbmZzX2xheW91dF90eXBlKHN0cnVjdCB4ZHJfc3RyZWFtICp4ZHIs
DQo+PiA+IC0JCQkJCSB1aW50MzJfdCAqbGF5b3V0dHlwZSkNCj4+ID4gK3N0YXRpYyBpbnQgZGVj
b2RlX3BuZnNfbGF5b3V0X3R5cGVzKHN0cnVjdCB4ZHJfc3RyZWFtICp4ZHIsIHUzMiAqbGF5b3V0
dHlwZXMpDQo+PiA+IHsNCj4+ID4gCV9fYmUzMiAqcDsNCj4+ID4gCWludCBudW07DQo+PiA+ICsJ
dTMyIHR5cGU7DQo+PiA+IA0KPj4gPiAJcCA9IHhkcl9pbmxpbmVfZGVjb2RlKHhkciwgNCk7DQo+
PiA+IAlpZiAodW5saWtlbHkoIXApKQ0KPj4gPiAJCWdvdG8gb3V0X292ZXJmbG93Ow0KPj4gPiAJ
bnVtID0gYmUzMl90b19jcHVwKHApOw0KPj4gPiANCj4+ID4gLQkvKiBwTkZTIGlzIG5vdCBzdXBw
b3J0ZWQgYnkgdGhlIHVuZGVybHlpbmcgZmlsZSBzeXN0ZW0gKi8NCj4+ID4gLQlpZiAobnVtID09
IDApIHsNCj4+ID4gLQkJKmxheW91dHR5cGUgPSAwOw0KPj4gPiAtCQlyZXR1cm4gMDsNCj4+ID4g
LQl9DQo+PiA+IC0JaWYgKG51bSA+IDEpDQo+PiA+IC0JCXByaW50ayhLRVJOX0lORk8gIk5GUzog
JXM6IFdhcm5pbmc6IE11bHRpcGxlIHBORlMgbGF5b3V0ICINCj4+ID4gLQkJCSJkcml2ZXJzIHBl
ciBmaWxlc3lzdGVtIG5vdCBzdXBwb3J0ZWRcbiIsIF9fZnVuY19fKTsNCj4+ID4gKwkqbGF5b3V0
dHlwZXMgPSAwOw0KPj4gPiANCj4+ID4gLQkvKiBEZWNvZGUgYW5kIHNldCBmaXJzdCBsYXlvdXQg
dHlwZSwgbW92ZSB4ZHItPnAgcGFzdCB1bnVzZWQgdHlwZXMgKi8NCj4+ID4gLQlwID0geGRyX2lu
bGluZV9kZWNvZGUoeGRyLCBudW0gKiA0KTsNCj4+ID4gLQlpZiAodW5saWtlbHkoIXApKQ0KPj4g
PiAtCQlnb3RvIG91dF9vdmVyZmxvdzsNCj4+ID4gLQkqbGF5b3V0dHlwZSA9IGJlMzJfdG9fY3B1
cChwKTsNCj4+ID4gKwlmb3IgKDsgbnVtOyAtLW51bSkgew0KPj4gPiArCQlwID0geGRyX2lubGlu
ZV9kZWNvZGUoeGRyLCA0KTsNCj4+ID4gKw0KPj4gPiArCQlpZiAodW5saWtlbHkoIXApKQ0KPj4g
PiArCQkJZ290byBvdXRfb3ZlcmZsb3c7DQo+PiA+ICsNCj4+ID4gKwkJdHlwZSA9IGJlMzJfdG9f
Y3B1cChwKTsNCj4+ID4gKw0KPj4gPiArCQkvKiBJZ25vcmUgYW55IHRoYXQgd2UgZG9uJ3QgdW5k
ZXJzdGFuZCAqLw0KPj4gPiArCQlpZiAodW5saWtlbHkodHlwZSA+PSBMQVlPVVRfVFlQRV9NQVgp
KQ0KPj4gDQo+PiBUaGlzIHdpbGwgaW4gZWZmZWN0IGhhcmQgY29kZSB0aGUgbGF5b3V0cyB0aGF0
IHRoZSBjbGllbnQgc3VwcG9ydHMuDQo+PiBMQVlPVVRfVFlQRV9NQVggaXMgc29tZXRoaW5nIHRo
YXQgYXBwbGllcyB0byBrbmZzZCBvbmx5IGZvciBub3cuDQo+PiBMZXTigJlzIG5vdCBsZWFrIGl0
IGludG8gdGhlIGNsaWVudC4gSSBzdWdnZXN0IGp1c3QgbWFraW5nIHRoaXMNCj4+IDgqc2l6ZW9m
KCpsYXlvdXR0eXBlcykuDQo+PiANCj4NCj5GYWlyIGVub3VnaC4gSSdsbCBtYWtlIHRoYXQgY2hh
bmdlLg0KPg0KPlRoYXQgc2FpZC4uLkxBWU9VVF9UWVBFX01BWCBpcyBhIHZhbHVlIGluIHRoZSBw
bmZzX2xheW91dHR5cGUgZW51bSwgYW5kDQo+dGhhdCBlbnVtIGlzIHVzZWQgaW4gYm90aCB0aGUg
Y2xpZW50IGFuZCB0aGUgc2VydmVyIGNvZGUsIEFGQUlDVC4gSWYgd2UNCj5hZGQgYSBuZXcgTEFZ
T1VUXyogdmFsdWUgdG8gdGhhdCBlbnVtIGZvciB0aGUgY2xpZW50LCB0aGVuIHdlJ2xsIG5lZWQN
Cj50byBpbmNyZWFzZSB0aGF0IHZhbHVlIGFueXdheS4gU28sIEknbSBub3Qgc3VyZSBJIHVuZGVy
c3RhbmQgaG93IHRoaXMNCj5saW1pdHMgdGhlIGNsaWVudCBpbiBhbnkgd2F5Li4uDQoNCk5vLCB0
aGUgY2xpZW50IGRvZXNu4oCZdCB1c2UgZW51bSBwbmZzX2xheW91dHR5cGUgYW55d2hlcmUuIElm
IHlvdSBsb29rIGF0IHNldF9wbmZzX2xheW91dGRyaXZlcigpLCB5b3XigJlsbCBub3RlIHRoYXQg
d2UgY3VycmVudGx5IHN1cHBvcnQgYWxsIHZhbHVlcyBmb3IgdGhlIGxheW91dCB0eXBlLg0KDQo+
DQo+DQo+PiA+ICsJCQljb250aW51ZTsNCj4+ID4gKw0KPj4gPiArCQkqbGF5b3V0dHlwZXMgfD0g
MSA8PCB0eXBlOw0KPj4gPiArCX0NCj4+ID4gCXJldHVybiAwOw0KPj4gPiBvdXRfb3ZlcmZsb3c6
DQo+PiA+ICsJKmxheW91dHR5cGVzID0gMDsNCj4+ID4gCXByaW50X292ZXJmbG93X21zZyhfX2Z1
bmNfXywgeGRyKTsNCj4+ID4gCXJldHVybiAtRUlPOw0KPj4gPiB9DQo+PiA+IEBAIC00NzU5LDcg
KzQ3NjIsNyBAQCBvdXRfb3ZlcmZsb3c6DQo+PiA+ICAqIE5vdGUgd2UgbXVzdCBlbnN1cmUgdGhh
dCBsYXlvdXR0eXBlIGlzIHNldCBpbiBhbnkgbm9uLWVycm9yIGNhc2UuDQo+PiA+ICAqLw0KPj4g
PiBzdGF0aWMgaW50IGRlY29kZV9hdHRyX3BuZnN0eXBlKHN0cnVjdCB4ZHJfc3RyZWFtICp4ZHIs
IHVpbnQzMl90ICpiaXRtYXAsDQo+PiA+IC0JCQkJdWludDMyX3QgKmxheW91dHR5cGUpDQo+PiA+
ICsJCQkJX191MzIgKmxheW91dHR5cGVzKQ0KPj4gPiB7DQo+PiA+IAlpbnQgc3RhdHVzID0gMDsN
Cj4+ID4gDQo+PiA+IEBAIC00NzY3LDEwICs0NzcwLDEwIEBAIHN0YXRpYyBpbnQgZGVjb2RlX2F0
dHJfcG5mc3R5cGUoc3RydWN0IHhkcl9zdHJlYW0gKnhkciwgdWludDMyX3QgKmJpdG1hcCwNCj4+
ID4gCWlmICh1bmxpa2VseShiaXRtYXBbMV0gJiAoRkFUVFI0X1dPUkQxX0ZTX0xBWU9VVF9UWVBF
UyAtIDFVKSkpDQo+PiA+IAkJcmV0dXJuIC1FSU87DQo+PiA+IAlpZiAoYml0bWFwWzFdICYgRkFU
VFI0X1dPUkQxX0ZTX0xBWU9VVF9UWVBFUykgew0KPj4gPiAtCQlzdGF0dXMgPSBkZWNvZGVfZmly
c3RfcG5mc19sYXlvdXRfdHlwZSh4ZHIsIGxheW91dHR5cGUpOw0KPj4gPiArCQlzdGF0dXMgPSBk
ZWNvZGVfcG5mc19sYXlvdXRfdHlwZXMoeGRyLCBsYXlvdXR0eXBlcyk7DQo+PiA+IAkJYml0bWFw
WzFdICY9IH5GQVRUUjRfV09SRDFfRlNfTEFZT1VUX1RZUEVTOw0KPj4gPiAJfSBlbHNlDQo+PiA+
IC0JCSpsYXlvdXR0eXBlID0gMDsNCj4+ID4gKwkJKmxheW91dHR5cGVzID0gMDsNCj4+ID4gCXJl
dHVybiBzdGF0dXM7DQo+PiA+IH0NCj4+ID4gDQo+PiA+IEBAIC00ODUxLDcgKzQ4NTQsNyBAQCBz
dGF0aWMgaW50IGRlY29kZV9mc2luZm8oc3RydWN0IHhkcl9zdHJlYW0gKnhkciwgc3RydWN0IG5m
c19mc2luZm8gKmZzaW5mbykNCj4+ID4gCXN0YXR1cyA9IGRlY29kZV9hdHRyX3RpbWVfZGVsdGEo
eGRyLCBiaXRtYXAsICZmc2luZm8tPnRpbWVfZGVsdGEpOw0KPj4gPiAJaWYgKHN0YXR1cyAhPSAw
KQ0KPj4gPiAJCWdvdG8geGRyX2Vycm9yOw0KPj4gPiAtCXN0YXR1cyA9IGRlY29kZV9hdHRyX3Bu
ZnN0eXBlKHhkciwgYml0bWFwLCAmZnNpbmZvLT5sYXlvdXR0eXBlKTsNCj4+ID4gKwlzdGF0dXMg
PSBkZWNvZGVfYXR0cl9wbmZzdHlwZSh4ZHIsIGJpdG1hcCwgJmZzaW5mby0+bGF5b3V0dHlwZXMp
Ow0KPj4gPiAJaWYgKHN0YXR1cyAhPSAwKQ0KPj4gPiAJCWdvdG8geGRyX2Vycm9yOw0KPj4gPiAN
Cj4+ID4gZGlmZiAtLWdpdCBhL2ZzL25mcy9wbmZzLmMgYi9mcy9uZnMvcG5mcy5jDQo+PiA+IGlu
ZGV4IDBjN2UwZDQ1YTRkZS4uMjBiN2IxZjRlMDQxIDEwMDY0NA0KPj4gPiAtLS0gYS9mcy9uZnMv
cG5mcy5jDQo+PiA+ICsrKyBiL2ZzL25mcy9wbmZzLmMNCj4+ID4gQEAgLTcwLDcgKzcwLDcgQEAg
b3V0Og0KPj4gPiB9DQo+PiA+IA0KPj4gPiBzdGF0aWMgc3RydWN0IHBuZnNfbGF5b3V0ZHJpdmVy
X3R5cGUgKg0KPj4gPiAtZmluZF9wbmZzX2RyaXZlcih1MzIgaWQpDQo+PiA+ICtfX2ZpbmRfcG5m
c19kcml2ZXIodTMyIGlkKQ0KPj4gPiB7DQo+PiA+IAlzdHJ1Y3QgcG5mc19sYXlvdXRkcml2ZXJf
dHlwZSAqbG9jYWw7DQo+PiA+IA0KPj4gPiBAQCAtODQsNiArODQsMjIgQEAgZmluZF9wbmZzX2Ry
aXZlcih1MzIgaWQpDQo+PiA+IAlyZXR1cm4gbG9jYWw7DQo+PiA+IH0NCj4+ID4gDQo+PiA+ICtz
dGF0aWMgc3RydWN0IHBuZnNfbGF5b3V0ZHJpdmVyX3R5cGUgKg0KPj4gPiArZmluZF9wbmZzX2Ry
aXZlcih1MzIgaWQpDQo+PiA+ICt7DQo+PiA+ICsJc3RydWN0IHBuZnNfbGF5b3V0ZHJpdmVyX3R5
cGUgKmxkX3R5cGU7DQo+PiA+ICsNCj4+ID4gKwlsZF90eXBlID0gX19maW5kX3BuZnNfZHJpdmVy
KGlkKTsNCj4+ID4gKwlpZiAoIWxkX3R5cGUpIHsNCj4+ID4gKwkJcmVxdWVzdF9tb2R1bGUoIiVz
LSV1IiwgTEFZT1VUX05GU1Y0XzFfTU9EVUxFX1BSRUZJWCwgaWQpOw0KPj4gPiArCQlsZF90eXBl
ID0gX19maW5kX3BuZnNfZHJpdmVyKGlkKTsNCj4+ID4gKwkJaWYgKCFsZF90eXBlKQ0KPj4gPiAr
CQkJZHByaW50aygiJXM6IE5vIHBORlMgbW9kdWxlIGZvdW5kIGZvciAldS5cbiIsDQo+PiA+ICsJ
CQkJX19mdW5jX18sIGlkKTsNCj4+ID4gKwl9DQo+PiA+ICsJcmV0dXJuIGxkX3R5cGU7DQo+PiA+
ICt9DQo+PiA+ICsNCj4+ID4gdm9pZA0KPj4gPiB1bnNldF9wbmZzX2xheW91dGRyaXZlcihzdHJ1
Y3QgbmZzX3NlcnZlciAqbmZzcykNCj4+ID4gew0KPj4gPiBAQCAtMTAyLDQ0ICsxMTgsNzIgQEAg
dW5zZXRfcG5mc19sYXlvdXRkcml2ZXIoc3RydWN0IG5mc19zZXJ2ZXIgKm5mc3MpDQo+PiA+ICAq
IFRyeSB0byBzZXQgdGhlIHNlcnZlcidzIHBuZnMgbW9kdWxlIHRvIHRoZSBwbmZzIGxheW91dCB0
eXBlIHNwZWNpZmllZCBieSBpZC4NCj4+ID4gICogQ3VycmVudGx5IG9ubHkgb25lIHBORlMgbGF5
b3V0IGRyaXZlciBwZXIgZmlsZXN5c3RlbSBpcyBzdXBwb3J0ZWQuDQo+PiA+ICAqDQo+PiA+IC0g
KiBAaWQgbGF5b3V0IHR5cGUuIFplcm8gKGlsbGVnYWwgbGF5b3V0IHR5cGUpIGluZGljYXRlcyBw
TkZTIG5vdCBpbiB1c2UuDQo+PiA+ICsgKiBAbGF5b3V0dHlwZXM6IGJpdGZpZWxkIHNob3dpbmcg
d2hhdCBsYXlvdXQgdHlwZXMgc2VydmVyIHN1cHBvcnRzDQo+PiA+ICAqLw0KPj4gPiB2b2lkDQo+
PiA+IHNldF9wbmZzX2xheW91dGRyaXZlcihzdHJ1Y3QgbmZzX3NlcnZlciAqc2VydmVyLCBjb25z
dCBzdHJ1Y3QgbmZzX2ZoICptbnRmaCwNCj4+ID4gLQkJICAgICAgdTMyIGlkKQ0KPj4gPiArCQkg
ICAgICB1MzIgbGF5b3V0dHlwZXMpDQo+PiA+IHsNCj4+ID4gCXN0cnVjdCBwbmZzX2xheW91dGRy
aXZlcl90eXBlICpsZF90eXBlID0gTlVMTDsNCj4+ID4gDQo+PiA+IC0JaWYgKGlkID09IDApDQo+
PiA+ICsJaWYgKGxheW91dHR5cGVzID09IDApDQo+PiA+IAkJZ290byBvdXRfbm9fZHJpdmVyOw0K
Pj4gPiAJaWYgKCEoc2VydmVyLT5uZnNfY2xpZW50LT5jbF9leGNoYW5nZV9mbGFncyAmDQo+PiA+
IAkJIChFWENIR0lENF9GTEFHX1VTRV9OT05fUE5GUyB8IEVYQ0hHSUQ0X0ZMQUdfVVNFX1BORlNf
TURTKSkpIHsNCj4+ID4gLQkJcHJpbnRrKEtFUk5fRVJSICJORlM6ICVzOiBpZCAldSBjbF9leGNo
YW5nZV9mbGFncyAweCV4XG4iLA0KPj4gPiAtCQkJX19mdW5jX18sIGlkLCBzZXJ2ZXItPm5mc19j
bGllbnQtPmNsX2V4Y2hhbmdlX2ZsYWdzKTsNCj4+ID4gKwkJcHJpbnRrKEtFUk5fRVJSICJORlM6
ICVzOiBsYXlvdXR0eXBlcyAweCV4IGNsX2V4Y2hhbmdlX2ZsYWdzIDB4JXhcbiIsDQo+PiA+ICsJ
CQlfX2Z1bmNfXywgbGF5b3V0dHlwZXMsIHNlcnZlci0+bmZzX2NsaWVudC0+Y2xfZXhjaGFuZ2Vf
ZmxhZ3MpOw0KPj4gPiAJCWdvdG8gb3V0X25vX2RyaXZlcjsNCj4+ID4gCX0NCj4+ID4gLQlsZF90
eXBlID0gZmluZF9wbmZzX2RyaXZlcihpZCk7DQo+PiA+IC0JaWYgKCFsZF90eXBlKSB7DQo+PiA+
IC0JCXJlcXVlc3RfbW9kdWxlKCIlcy0ldSIsIExBWU9VVF9ORlNWNF8xX01PRFVMRV9QUkVGSVgs
IGlkKTsNCj4+ID4gLQkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2ZXIoaWQpOw0KPj4gPiAtCQlp
ZiAoIWxkX3R5cGUpIHsNCj4+ID4gLQkJCWRwcmludGsoIiVzOiBObyBwTkZTIG1vZHVsZSBmb3Vu
ZCBmb3IgJXUuXG4iLA0KPj4gPiAtCQkJCV9fZnVuY19fLCBpZCk7DQo+PiA+ICsNCj4+ID4gKwkv
Kg0KPj4gPiArCSAqIFNlZSBpZiBvbmUgb2YgdGhlIGxheW91dCB0eXBlcyB0aGF0IHdlIGdvdCBo
YW5kZWQgaXMgdXNhYmxlLiBXZQ0KPj4gPiArCSAqIGF0dGVtcHQgaW4gYSBoYXJkY29kZWQgb3Jk
ZXIgb2YgcHJlZmVyZW5jZSwgaW4gb3JkZXIgb2YgKGFzc3VtZWQpDQo+PiA+ICsJICogZGVjcmVh
c2luZyBzcGVlZHMgYW5kIGZ1bmN0aW9uYWxpdHkuDQo+PiA+ICsJICoNCj4+ID4gKwkgKiBGSVhN
RTogc2hvdWxkIHRoaXMgb3JkZXIgYmUgY29uZmlndXJhYmxlIGluIHNvbWUgZmFzaGlvbj8NCj4+
ID4gKwkgKi8NCj4+ID4gKwlpZiAobGF5b3V0dHlwZXMgJiAoMSA8PCBMQVlPVVRfU0NTSSkpIHsN
Cj4+ID4gKwkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2ZXIoTEFZT1VUX1NDU0kpOw0KPj4gPiAr
CQlpZiAobGRfdHlwZSkNCj4+ID4gKwkJCWdvdG8gc2V0X2RyaXZlcjsNCj4+ID4gKwl9DQo+PiA+
ICsNCj4+ID4gKwlpZiAobGF5b3V0dHlwZXMgJiAoMSA8PCBMQVlPVVRfQkxPQ0tfVk9MVU1FKSkg
ew0KPj4gPiArCQlsZF90eXBlID0gZmluZF9wbmZzX2RyaXZlcihMQVlPVVRfQkxPQ0tfVk9MVU1F
KTsNCj4+ID4gKwkJaWYgKGxkX3R5cGUpDQo+PiA+ICsJCQlnb3RvIHNldF9kcml2ZXI7DQo+PiA+
ICsJfQ0KPj4gPiArDQo+PiA+ICsJaWYgKGxheW91dHR5cGVzICYgKDEgPDwgTEFZT1VUX09TRDJf
T0JKRUNUUykpIHsNCj4+ID4gKwkJbGRfdHlwZSA9IGZpbmRfcG5mc19kcml2ZXIoTEFZT1VUX09T
RDJfT0JKRUNUUyk7DQo+PiA+ICsJCWlmIChsZF90eXBlKQ0KPj4gPiArCQkJZ290byBzZXRfZHJp
dmVyOw0KPj4gPiArCX0NCj4+ID4gKw0KPj4gPiArCWlmIChsYXlvdXR0eXBlcyAmICgxIDw8IExB
WU9VVF9GTEVYX0ZJTEVTKSkgew0KPj4gPiArCQlsZF90eXBlID0gZmluZF9wbmZzX2RyaXZlcihM
QVlPVVRfRkxFWF9GSUxFUyk7DQo+PiA+ICsJCWlmIChsZF90eXBlKQ0KPj4gPiArCQkJZ290byBz
ZXRfZHJpdmVyOw0KPj4gPiArCX0NCj4+ID4gKw0KPj4gPiArCWlmIChsYXlvdXR0eXBlcyAmICgx
IDw8IExBWU9VVF9ORlNWNF8xX0ZJTEVTKSkgew0KPj4gPiArCQlsZF90eXBlID0gZmluZF9wbmZz
X2RyaXZlcihMQVlPVVRfTkZTVjRfMV9GSUxFUyk7DQo+PiA+ICsJCWlmICghbGRfdHlwZSkNCj4+
ID4gCQkJZ290byBvdXRfbm9fZHJpdmVyOw0KPj4gPiAtCQl9DQo+PiA+IAl9DQo+PiA+ICtzZXRf
ZHJpdmVyOg0KPj4gPiAJc2VydmVyLT5wbmZzX2N1cnJfbGQgPSBsZF90eXBlOw0KPj4gPiAJaWYg
KGxkX3R5cGUtPnNldF9sYXlvdXRkcml2ZXINCj4+ID4gCSAgICAmJiBsZF90eXBlLT5zZXRfbGF5
b3V0ZHJpdmVyKHNlcnZlciwgbW50ZmgpKSB7DQo+PiA+IAkJcHJpbnRrKEtFUk5fRVJSICJORlM6
ICVzOiBFcnJvciBpbml0aWFsaXppbmcgcE5GUyBsYXlvdXQgIg0KPj4gPiAtCQkJImRyaXZlciAl
dS5cbiIsIF9fZnVuY19fLCBpZCk7DQo+PiA+ICsJCQkiZHJpdmVyICV1LlxuIiwgX19mdW5jX18s
IGxkX3R5cGUtPmlkKTsNCj4+ID4gCQltb2R1bGVfcHV0KGxkX3R5cGUtPm93bmVyKTsNCj4+ID4g
CQlnb3RvIG91dF9ub19kcml2ZXI7DQo+PiA+IAl9DQo+PiA+IAkvKiBCdW1wIHRoZSBNRFMgY291
bnQgKi8NCj4+ID4gCWF0b21pY19pbmMoJnNlcnZlci0+bmZzX2NsaWVudC0+Y2xfbWRzX2NvdW50
KTsNCj4+ID4gDQo+PiA+IC0JZHByaW50aygiJXM6IHBORlMgbW9kdWxlIGZvciAldSBzZXRcbiIs
IF9fZnVuY19fLCBpZCk7DQo+PiA+ICsJZHByaW50aygiJXM6IHBORlMgbW9kdWxlIGZvciAldSBz
ZXRcbiIsIF9fZnVuY19fLCBsZF90eXBlLT5pZCk7DQo+PiA+IAlyZXR1cm47DQo+PiA+IA0KPj4g
PiBvdXRfbm9fZHJpdmVyOg0KPj4gPiBkaWZmIC0tZ2l0IGEvaW5jbHVkZS9saW51eC9uZnNfeGRy
LmggYi9pbmNsdWRlL2xpbnV4L25mc194ZHIuaA0KPj4gPiBpbmRleCBjMzA0YTExYjViMWEuLjFm
NmJiNTlmMDVmMiAxMDA2NDQNCj4+ID4gLS0tIGEvaW5jbHVkZS9saW51eC9uZnNfeGRyLmgNCj4+
ID4gKysrIGIvaW5jbHVkZS9saW51eC9uZnNfeGRyLmgNCj4+ID4gQEAgLTEzOSw3ICsxMzksNyBA
QCBzdHJ1Y3QgbmZzX2ZzaW5mbyB7DQo+PiA+IAlfX3U2NAkJCW1heGZpbGVzaXplOw0KPj4gPiAJ
c3RydWN0IHRpbWVzcGVjCQl0aW1lX2RlbHRhOyAvKiBzZXJ2ZXIgdGltZSBncmFudWxhcml0eSAq
Lw0KPj4gPiAJX191MzIJCQlsZWFzZV90aW1lOyAvKiBpbiBzZWNvbmRzICovDQo+PiA+IC0JX191
MzIJCQlsYXlvdXR0eXBlOyAvKiBzdXBwb3J0ZWQgcG5mcyBsYXlvdXQgZHJpdmVyICovDQo+PiA+
ICsJX191MzIJCQlsYXlvdXR0eXBlczsgLyogc3VwcG9ydGVkIHBuZnMgbGF5b3V0IGRyaXZlcnMg
Ki8NCj4+ID4gCV9fdTMyCQkJYmxrc2l6ZTsgLyogcHJlZmVycmVkIHBuZnMgaW8gYmxvY2sgc2l6
ZSAqLw0KPj4gPiAJX191MzIJCQljbG9uZV9ibGtzaXplOyAvKiBncmFudWxhcml0eSBvZiBhIENM
T05FIG9wZXJhdGlvbiAqLw0KPj4gPiB9Ow0KPj4gPiAtLSANCj4+ID4gMi41LjUNCj4+ID4gDQo+
PiANCj4+IE5yeWJYx6d2Xineunsubit7Il5ucno/aCY/R2g/KOmajt2iaiI/P2163pZmaH5tDQo+
LS0gDQo+SmVmZiBMYXl0b24gPGpsYXl0b25AcG9vY2hpZXJlZHMubmV0Pg0KPg0KDQo=


2016-05-31 21:54:05

by Jeff Layton

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
>
>
> On 5/31/16, 17:09, "Jeff Layton" <[email protected]> wrote:
>
> >On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> >> 
> >> On 5/30/16, 12:35, "Jeff Layton" <[email protected]> wrote:
> >> 
> >> > Allow the client to deal with servers that hand out multiple layout
> >> > types for the same filesystem. When this happens, we pick the "best" one,
> >> > based on a hardcoded assumed order in the client code.
> >> > 
> >> > Signed-off-by: Jeff Layton <[email protected]>
> >> > ---
> >> > fs/nfs/client.c | 2 +-
> >> > fs/nfs/nfs4proc.c | 2 +-
> >> > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> >> > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
> >> > include/linux/nfs_xdr.h | 2 +-
> >> > 5 files changed, 85 insertions(+), 38 deletions(-)
> >> > 
> >> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> >> > index 0c96528db94a..53b41f4bd45a 100644
> >> > --- a/fs/nfs/client.c
> >> > +++ b/fs/nfs/client.c
> >> > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> >> > }
> >> > 
> >> > fsinfo.fattr = fattr;
> >> > - fsinfo.layouttype = 0;
> >> > + fsinfo.layouttypes = 0;
> >> > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> >> > if (error < 0)
> >> > goto out_error;
> >> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> >> > index de97567795a5..9446aef89b48 100644
> >> > --- a/fs/nfs/nfs4proc.c
> >> > +++ b/fs/nfs/nfs4proc.c
> >> > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> >> > if (error == 0) {
> >> > /* block layout checks this! */
> >> > server->pnfs_blksize = fsinfo->blksize;
> >> > -  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> >> > +  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> >> > }
> >> > 
> >> > return error;
> >> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >> > index 661e753fe1c9..876a80802c1d 100644
> >> > --- a/fs/nfs/nfs4xdr.c
> >> > +++ b/fs/nfs/nfs4xdr.c
> >> > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> >> > * Decode potentially multiple layout types. Currently we only support
> >> > * one layout driver per file system.
> >> > */
> >> > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> >> > -  uint32_t *layouttype)
> >> > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> >> > {
> >> > __be32 *p;
> >> > int num;
> >> > + u32 type;
> >> > 
> >> > p = xdr_inline_decode(xdr, 4);
> >> > if (unlikely(!p))
> >> > goto out_overflow;
> >> > num = be32_to_cpup(p);
> >> > 
> >> > - /* pNFS is not supported by the underlying file system */
> >> > - if (num == 0) {
> >> > -  *layouttype = 0;
> >> > -  return 0;
> >> > - }
> >> > - if (num > 1)
> >> > -  printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> >> > -  "drivers per filesystem not supported\n", __func__);
> >> > + *layouttypes = 0;
> >> > 
> >> > - /* Decode and set first layout type, move xdr->p past unused types */
> >> > - p = xdr_inline_decode(xdr, num * 4);
> >> > - if (unlikely(!p))
> >> > -  goto out_overflow;
> >> > - *layouttype = be32_to_cpup(p);
> >> > + for (; num; --num) {
> >> > +  p = xdr_inline_decode(xdr, 4);
> >> > +
> >> > +  if (unlikely(!p))
> >> > +  goto out_overflow;
> >> > +
> >> > +  type = be32_to_cpup(p);
> >> > +
> >> > +  /* Ignore any that we don't understand */
> >> > +  if (unlikely(type >= LAYOUT_TYPE_MAX))
> >> 
> >> This will in effect hard code the layouts that the client supports.
> >> LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> >> Let’s not leak it into the client. I suggest just making this
> >> 8*sizeof(*layouttypes).
> >> 
> >
> >Fair enough. I'll make that change.
> >
> >That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
> >that enum is used in both the client and the server code, AFAICT. If we
> >add a new LAYOUT_* value to that enum for the client, then we'll need
> >to increase that value anyway. So, I'm not sure I understand how this
> >limits the client in any way...
>
> No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
> at set_pnfs_layoutdriver(), you’ll note that we currently support all
> values for the layout type.
>

Ok, I see. So if someone were to (for instance) create a 3rd party
layout driver module that had used a value above LAYOUT_TYPE_MAX then
this would prevent it from working.

Hmmm...so even if I make the change that you're suggesting, this will
still limit the client to working with layout types that are below a
value of 32. Is that also a problem? If so, then maybe I should respin
this to be more like the one Tigran had: make an array or something to
hold those values.

Thoughts?

> >
> >
> >> > +  continue;
> >> > +
> >> > +  *layouttypes |= 1 << type;
> >> > + }
> >> > return 0;
> >> > out_overflow:
> >> > + *layouttypes = 0;
> >> > print_overflow_msg(__func__, xdr);
> >> > return -EIO;
> >> > }
> >> > @@ -4759,7 +4762,7 @@ out_overflow:
> >> > * Note we must ensure that layouttype is set in any non-error case.
> >> > */
> >> > static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> >> > -  uint32_t *layouttype)
> >> > +  __u32 *layouttypes)
> >> > {
> >> > int status = 0;
> >> > 
> >> > @@ -4767,10 +4770,10 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> >> > if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
> >> > return -EIO;
> >> > if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
> >> > -  status = decode_first_pnfs_layout_type(xdr, layouttype);
> >> > +  status = decode_pnfs_layout_types(xdr, layouttypes);
> >> > bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
> >> > } else
> >> > -  *layouttype = 0;
> >> > +  *layouttypes = 0;
> >> > return status;
> >> > }
> >> > 
> >> > @@ -4851,7 +4854,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
> >> > status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
> >> > if (status != 0)
> >> > goto xdr_error;
> >> > - status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
> >> > + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttypes);
> >> > if (status != 0)
> >> > goto xdr_error;
> >> > 
> >> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> >> > index 0c7e0d45a4de..20b7b1f4e041 100644
> >> > --- a/fs/nfs/pnfs.c
> >> > +++ b/fs/nfs/pnfs.c
> >> > @@ -70,7 +70,7 @@ out:
> >> > }
> >> > 
> >> > static struct pnfs_layoutdriver_type *
> >> > -find_pnfs_driver(u32 id)
> >> > +__find_pnfs_driver(u32 id)
> >> > {
> >> > struct pnfs_layoutdriver_type *local;
> >> > 
> >> > @@ -84,6 +84,22 @@ find_pnfs_driver(u32 id)
> >> > return local;
> >> > }
> >> > 
> >> > +static struct pnfs_layoutdriver_type *
> >> > +find_pnfs_driver(u32 id)
> >> > +{
> >> > + struct pnfs_layoutdriver_type *ld_type;
> >> > +
> >> > + ld_type = __find_pnfs_driver(id);
> >> > + if (!ld_type) {
> >> > +  request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> >> > +  ld_type = __find_pnfs_driver(id);
> >> > +  if (!ld_type)
> >> > +  dprintk("%s: No pNFS module found for %u.\n",
> >> > +  __func__, id);
> >> > + }
> >> > + return ld_type;
> >> > +}
> >> > +
> >> > void
> >> > unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >> > {
> >> > @@ -102,44 +118,72 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >> > * Try to set the server's pnfs module to the pnfs layout type specified by id.
> >> > * Currently only one pNFS layout driver per filesystem is supported.
> >> > *
> >> > - * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
> >> > + * @layouttypes: bitfield showing what layout types server supports
> >> > */
> >> > void
> >> > set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> >> > -  u32 id)
> >> > +  u32 layouttypes)
> >> > {
> >> > struct pnfs_layoutdriver_type *ld_type = NULL;
> >> > 
> >> > - if (id == 0)
> >> > + if (layouttypes == 0)
> >> > goto out_no_driver;
> >> > if (!(server->nfs_client->cl_exchange_flags &
> >> > (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
> >> > -  printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
> >> > -  __func__, id, server->nfs_client->cl_exchange_flags);
> >> > +  printk(KERN_ERR "NFS: %s: layouttypes 0x%x cl_exchange_flags 0x%x\n",
> >> > +  __func__, layouttypes, server->nfs_client->cl_exchange_flags);
> >> > goto out_no_driver;
> >> > }
> >> > - ld_type = find_pnfs_driver(id);
> >> > - if (!ld_type) {
> >> > -  request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> >> > -  ld_type = find_pnfs_driver(id);
> >> > -  if (!ld_type) {
> >> > -  dprintk("%s: No pNFS module found for %u.\n",
> >> > -  __func__, id);
> >> > +
> >> > + /*
> >> > +  * See if one of the layout types that we got handed is usable. We
> >> > +  * attempt in a hardcoded order of preference, in order of (assumed)
> >> > +  * decreasing speeds and functionality.
> >> > +  *
> >> > +  * FIXME: should this order be configurable in some fashion?
> >> > +  */
> >> > + if (layouttypes & (1 << LAYOUT_SCSI)) {
> >> > +  ld_type = find_pnfs_driver(LAYOUT_SCSI);
> >> > +  if (ld_type)
> >> > +  goto set_driver;
> >> > + }
> >> > +
> >> > + if (layouttypes & (1 << LAYOUT_BLOCK_VOLUME)) {
> >> > +  ld_type = find_pnfs_driver(LAYOUT_BLOCK_VOLUME);
> >> > +  if (ld_type)
> >> > +  goto set_driver;
> >> > + }
> >> > +
> >> > + if (layouttypes & (1 << LAYOUT_OSD2_OBJECTS)) {
> >> > +  ld_type = find_pnfs_driver(LAYOUT_OSD2_OBJECTS);
> >> > +  if (ld_type)
> >> > +  goto set_driver;
> >> > + }
> >> > +
> >> > + if (layouttypes & (1 << LAYOUT_FLEX_FILES)) {
> >> > +  ld_type = find_pnfs_driver(LAYOUT_FLEX_FILES);
> >> > +  if (ld_type)
> >> > +  goto set_driver;
> >> > + }
> >> > +
> >> > + if (layouttypes & (1 << LAYOUT_NFSV4_1_FILES)) {
> >> > +  ld_type = find_pnfs_driver(LAYOUT_NFSV4_1_FILES);
> >> > +  if (!ld_type)
> >> > goto out_no_driver;
> >> > -  }
> >> > }
> >> > +set_driver:
> >> > server->pnfs_curr_ld = ld_type;
> >> > if (ld_type->set_layoutdriver
> >> > && ld_type->set_layoutdriver(server, mntfh)) {
> >> > printk(KERN_ERR "NFS: %s: Error initializing pNFS layout "
> >> > -  "driver %u.\n", __func__, id);
> >> > +  "driver %u.\n", __func__, ld_type->id);
> >> > module_put(ld_type->owner);
> >> > goto out_no_driver;
> >> > }
> >> > /* Bump the MDS count */
> >> > atomic_inc(&server->nfs_client->cl_mds_count);
> >> > 
> >> > - dprintk("%s: pNFS module for %u set\n", __func__, id);
> >> > + dprintk("%s: pNFS module for %u set\n", __func__, ld_type->id);
> >> > return;
> >> > 
> >> > out_no_driver:
> >> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> >> > index c304a11b5b1a..1f6bb59f05f2 100644
> >> > --- a/include/linux/nfs_xdr.h
> >> > +++ b/include/linux/nfs_xdr.h
> >> > @@ -139,7 +139,7 @@ struct nfs_fsinfo {
> >> > __u64  maxfilesize;
> >> > struct timespec  time_delta; /* server time granularity */
> >> > __u32  lease_time; /* in seconds */
> >> > - __u32  layouttype; /* supported pnfs layout driver */
> >> > + __u32  layouttypes; /* supported pnfs layout drivers */
> >> > __u32  blksize; /* preferred pnfs io block size */
> >> > __u32  clone_blksize; /* granularity of a CLONE operation */
> >> > };
> >> > -- 
> >> > 2.5.5
> >> > 
> >> 
> >> NrybXǧv^)޺{.n+{"^nrz?h&?Gh?(階ݢj"??mzޖfh~m
> >-- 
> >Jeff Layton <[email protected]>
> >
>
>
> Disclaimer
> The information contained in this communication from the sender is confidential. It is intended solely for use by the recipient and others authorized to receive it. If you are not the recipient, you are hereby notified that any disclosure, copying, distribution or taking action in relation of the contents of this information is strictly prohibited and may be unlawful.
--
Jeff Layton <[email protected]>

2016-06-01 21:53:07

by Jeff Layton

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
> On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> >
> >
> > On 5/31/16, 17:09, "Jeff Layton" <[email protected]> wrote:
> >
> > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> > > >  
> > > > On 5/30/16, 12:35, "Jeff Layton" <[email protected]> wrote:
> > > >  
> > > > > Allow the client to deal with servers that hand out multiple layout
> > > > > types for the same filesystem. When this happens, we pick the "best" one,
> > > > > based on a hardcoded assumed order in the client code.
> > > > >  
> > > > > Signed-off-by: Jeff Layton <[email protected]>
> > > > > ---
> > > > > fs/nfs/client.c | 2 +-
> > > > > fs/nfs/nfs4proc.c | 2 +-
> > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
> > > > > include/linux/nfs_xdr.h | 2 +-
> > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
> > > > >  
> > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > index 0c96528db94a..53b41f4bd45a 100644
> > > > > --- a/fs/nfs/client.c
> > > > > +++ b/fs/nfs/client.c
> > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> > > > > }
> > > > >  
> > > > > fsinfo.fattr = fattr;
> > > > > - fsinfo.layouttype = 0;
> > > > > + fsinfo.layouttypes = 0;
> > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > > > if (error < 0)
> > > > > goto out_error;
> > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > index de97567795a5..9446aef89b48 100644
> > > > > --- a/fs/nfs/nfs4proc.c
> > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> > > > > if (error == 0) {
> > > > > /* block layout checks this! */
> > > > > server->pnfs_blksize = fsinfo->blksize;
> > > > > -  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > > > > +  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> > > > > }
> > > > >  
> > > > > return error;
> > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > > index 661e753fe1c9..876a80802c1d 100644
> > > > > --- a/fs/nfs/nfs4xdr.c
> > > > > +++ b/fs/nfs/nfs4xdr.c
> > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> > > > > * Decode potentially multiple layout types. Currently we only support
> > > > > * one layout driver per file system.
> > > > > */
> > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> > > > > -  uint32_t *layouttype)
> > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> > > > > {
> > > > > __be32 *p;
> > > > > int num;
> > > > > + u32 type;
> > > > >  
> > > > > p = xdr_inline_decode(xdr, 4);
> > > > > if (unlikely(!p))
> > > > > goto out_overflow;
> > > > > num = be32_to_cpup(p);
> > > > >  
> > > > > - /* pNFS is not supported by the underlying file system */
> > > > > - if (num == 0) {
> > > > > -  *layouttype = 0;
> > > > > -  return 0;
> > > > > - }
> > > > > - if (num > 1)
> > > > > -  printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> > > > > -  "drivers per filesystem not supported\n", __func__);
> > > > > + *layouttypes = 0;
> > > > >  
> > > > > - /* Decode and set first layout type, move xdr->p past unused types */
> > > > > - p = xdr_inline_decode(xdr, num * 4);
> > > > > - if (unlikely(!p))
> > > > > -  goto out_overflow;
> > > > > - *layouttype = be32_to_cpup(p);
> > > > > + for (; num; --num) {
> > > > > +  p = xdr_inline_decode(xdr, 4);
> > > > > +
> > > > > +  if (unlikely(!p))
> > > > > +  goto out_overflow;
> > > > > +
> > > > > +  type = be32_to_cpup(p);
> > > > > +
> > > > > +  /* Ignore any that we don't understand */
> > > > > +  if (unlikely(type >= LAYOUT_TYPE_MAX))
> > > >  
> > > > This will in effect hard code the layouts that the client supports.
> > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> > > > Let’s not leak it into the client. I suggest just making this
> > > > 8*sizeof(*layouttypes).
> > > >  
> > >
> > > Fair enough. I'll make that change.
> > >
> > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
> > > that enum is used in both the client and the server code, AFAICT. If we
> > > add a new LAYOUT_* value to that enum for the client, then we'll need
> > > to increase that value anyway. So, I'm not sure I understand how this
> > > limits the client in any way...
> >
> > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
> > at set_pnfs_layoutdriver(), you’ll note that we currently support all
> > values for the layout type.
> >
>
> Ok, I see. So if someone were to (for instance) create a 3rd party
> layout driver module that had used a value above LAYOUT_TYPE_MAX then
> this would prevent it from working.
>
> Hmmm...so even if I make the change that you're suggesting, this will
> still limit the client to working with layout types that are below a
> value of 32. Is that also a problem? If so, then maybe I should respin
> this to be more like the one Tigran had: make an array or something to
> hold those values.
>
> Thoughts?
>

Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
driver possibility really throws a wrench into this plan...

Suppose someone creates such a layout driver, drops the module onto the
client and the core kernel knows nothing about it.  With the current
patch, it'd be ignored. I don't think that's what we want though.

Where should that driver fit in the selection order in
set_pnfs_layoutdriver?

Tigran's patch had the client start with the second element and only
pick the first one in the list if nothing else worked. That's sort of
icky though.

Another idea might be to just attempt unrecognized ones as the driver
of last resort, when no other driver has worked?

Alternately, we could add a mount option or something that would affect
the selection order? If so, how should such an option work?

I'm really open to suggestions here -- I've no idea what the right
thing to do is at this point...sigh.

--
Jeff Layton <[email protected]>

2016-06-02 07:12:35

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types



----- Original Message -----
> From: "Jeff Layton" <[email protected]>
> To: "Trond Myklebust" <[email protected]>, [email protected]
> Cc: "tigran mkrtchyan" <[email protected]>, "Anna Schumaker" <[email protected]>, [email protected]
> Sent: Wednesday, June 1, 2016 11:53:03 PM
> Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

> On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
>> On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
>> >
>> >
>> > On 5/31/16, 17:09, "Jeff Layton" <[email protected]> wrote:
>> >
>> > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
>> > > >  
>> > > > On 5/30/16, 12:35, "Jeff Layton" <[email protected]> wrote:
>> > > >  
>> > > > > Allow the client to deal with servers that hand out multiple layout
>> > > > > types for the same filesystem. When this happens, we pick the "best" one,
>> > > > > based on a hardcoded assumed order in the client code.
>> > > > >  
>> > > > > Signed-off-by: Jeff Layton <[email protected]>
>> > > > > ---
>> > > > > fs/nfs/client.c | 2 +-
>> > > > > fs/nfs/nfs4proc.c | 2 +-
>> > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
>> > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
>> > > > > include/linux/nfs_xdr.h | 2 +-
>> > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
>> > > > >  
>> > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>> > > > > index 0c96528db94a..53b41f4bd45a 100644
>> > > > > --- a/fs/nfs/client.c
>> > > > > +++ b/fs/nfs/client.c
>> > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct
>> > > > > nfs_fh *mntfh, struct nfs
>> > > > > }
>> > > > >  
>> > > > > fsinfo.fattr = fattr;
>> > > > > - fsinfo.layouttype = 0;
>> > > > > + fsinfo.layouttypes = 0;
>> > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
>> > > > > if (error < 0)
>> > > > > goto out_error;
>> > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> > > > > index de97567795a5..9446aef89b48 100644
>> > > > > --- a/fs/nfs/nfs4proc.c
>> > > > > +++ b/fs/nfs/nfs4proc.c
>> > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server,
>> > > > > struct nfs_fh *fhandle, s
>> > > > > if (error == 0) {
>> > > > > /* block layout checks this! */
>> > > > > server->pnfs_blksize = fsinfo->blksize;
>> > > > > -  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
>> > > > > +  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
>> > > > > }
>> > > > >  
>> > > > > return error;
>> > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> > > > > index 661e753fe1c9..876a80802c1d 100644
>> > > > > --- a/fs/nfs/nfs4xdr.c
>> > > > > +++ b/fs/nfs/nfs4xdr.c
>> > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr,
>> > > > > struct nfs_fattr *fattr,
>> > > > > * Decode potentially multiple layout types. Currently we only support
>> > > > > * one layout driver per file system.
>> > > > > */
>> > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
>> > > > > -  uint32_t *layouttype)
>> > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
>> > > > > {
>> > > > > __be32 *p;
>> > > > > int num;
>> > > > > + u32 type;
>> > > > >  
>> > > > > p = xdr_inline_decode(xdr, 4);
>> > > > > if (unlikely(!p))
>> > > > > goto out_overflow;
>> > > > > num = be32_to_cpup(p);
>> > > > >  
>> > > > > - /* pNFS is not supported by the underlying file system */
>> > > > > - if (num == 0) {
>> > > > > -  *layouttype = 0;
>> > > > > -  return 0;
>> > > > > - }
>> > > > > - if (num > 1)
>> > > > > -  printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
>> > > > > -  "drivers per filesystem not supported\n", __func__);
>> > > > > + *layouttypes = 0;
>> > > > >  
>> > > > > - /* Decode and set first layout type, move xdr->p past unused types */
>> > > > > - p = xdr_inline_decode(xdr, num * 4);
>> > > > > - if (unlikely(!p))
>> > > > > -  goto out_overflow;
>> > > > > - *layouttype = be32_to_cpup(p);
>> > > > > + for (; num; --num) {
>> > > > > +  p = xdr_inline_decode(xdr, 4);
>> > > > > +
>> > > > > +  if (unlikely(!p))
>> > > > > +  goto out_overflow;
>> > > > > +
>> > > > > +  type = be32_to_cpup(p);
>> > > > > +
>> > > > > +  /* Ignore any that we don't understand */
>> > > > > +  if (unlikely(type >= LAYOUT_TYPE_MAX))
>> > > >  
>> > > > This will in effect hard code the layouts that the client supports.
>> > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
>> > > > Let’s not leak it into the client. I suggest just making this
>> > > > 8*sizeof(*layouttypes).
>> > > >  
>> > >
>> > > Fair enough. I'll make that change.
>> > >
>> > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
>> > > that enum is used in both the client and the server code, AFAICT. If we
>> > > add a new LAYOUT_* value to that enum for the client, then we'll need
>> > > to increase that value anyway. So, I'm not sure I understand how this
>> > > limits the client in any way...
>> >
>> > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
>> > at set_pnfs_layoutdriver(), you’ll note that we currently support all
>> > values for the layout type.
>> >
>>
>> Ok, I see. So if someone were to (for instance) create a 3rd party
>> layout driver module that had used a value above LAYOUT_TYPE_MAX then
>> this would prevent it from working.
>>
>> Hmmm...so even if I make the change that you're suggesting, this will
>> still limit the client to working with layout types that are below a
>> value of 32. Is that also a problem? If so, then maybe I should respin
>> this to be more like the one Tigran had: make an array or something to
>> hold those values.
>>
>> Thoughts?
>>
>
> Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
> driver possibility really throws a wrench into this plan...
>
> Suppose someone creates such a layout driver, drops the module onto the
> client and the core kernel knows nothing about it.  With the current
> patch, it'd be ignored. I don't think that's what we want though.
>
> Where should that driver fit in the selection order in
> set_pnfs_layoutdriver?
>
> Tigran's patch had the client start with the second element and only
> pick the first one in the list if nothing else worked. That's sort of
> icky though.
>
> Another idea might be to just attempt unrecognized ones as the driver
> of last resort, when no other driver has worked?
>
> Alternately, we could add a mount option or something that would affect
> the selection order? If so, how should such an option work?
>
> I'm really open to suggestions here -- I've no idea what the right
> thing to do is at this point...sigh.


There are two things in my patch what I don't like:

- an int array to store layouts, which mostly will be used by a single element only
- server must know client implementation to achieve desired result

In your approach other two problems:

- max layout type id 32
- hard coded supported layout types and the order

Any of them will help in adoption of flexfile layout, especially if we get it into
RHEL7.

In discussion with Christoph Hellwig back in March, I have proposed a mount option:

mount -o preferred_layout=nfs4_file,vers=4.1

or may be even an nfs kernel module option.

This will allow server to send layout in any order, but let client to re-order
them by it's own rules.

BTW, in Storage Resource Management (SRM) protocol, used to distribute scientific data around
the world, we do transfer protocol negotiation other way around: client sends to server
ordered list of supported protocols (something like layoutget) and server picks first matching one.


Regards,
Tigran.
>
> --
> Jeff Layton <[email protected]>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2016-06-02 11:04:24

by Jeff Layton

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

On Thu, 2016-06-02 at 09:12 +0200, Mkrtchyan, Tigran wrote:
>
> ----- Original Message -----
> > From: "Jeff Layton" <[email protected]>
> > To: "Trond Myklebust" <[email protected]>, [email protected]
> > Cc: "tigran mkrtchyan" <[email protected]>, "Anna Schumaker" <[email protected]>, [email protected]
> > Sent: Wednesday, June 1, 2016 11:53:03 PM
> > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types
>
> > On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
> > > On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> > > >
> > > >
> > > > On 5/31/16, 17:09, "Jeff Layton" <[email protected]> wrote:
> > > >
> > > > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> > > > > >  
> > > > > > On 5/30/16, 12:35, "Jeff Layton" <[email protected]> wrote:
> > > > > >  
> > > > > > > Allow the client to deal with servers that hand out multiple layout
> > > > > > > types for the same filesystem. When this happens, we pick the "best" one,
> > > > > > > based on a hardcoded assumed order in the client code.
> > > > > > >  
> > > > > > > Signed-off-by: Jeff Layton <[email protected]>
> > > > > > > ---
> > > > > > > fs/nfs/client.c | 2 +-
> > > > > > > fs/nfs/nfs4proc.c | 2 +-
> > > > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> > > > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
> > > > > > > include/linux/nfs_xdr.h | 2 +-
> > > > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
> > > > > > >  
> > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > index 0c96528db94a..53b41f4bd45a 100644
> > > > > > > --- a/fs/nfs/client.c
> > > > > > > +++ b/fs/nfs/client.c
> > > > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct
> > > > > > > nfs_fh *mntfh, struct nfs
> > > > > > > }
> > > > > > >  
> > > > > > > fsinfo.fattr = fattr;
> > > > > > > - fsinfo.layouttype = 0;
> > > > > > > + fsinfo.layouttypes = 0;
> > > > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > > > > > if (error < 0)
> > > > > > > goto out_error;
> > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > index de97567795a5..9446aef89b48 100644
> > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server,
> > > > > > > struct nfs_fh *fhandle, s
> > > > > > > if (error == 0) {
> > > > > > > /* block layout checks this! */
> > > > > > > server->pnfs_blksize = fsinfo->blksize;
> > > > > > > -  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > > > > > > +  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
> > > > > > > }
> > > > > > >  
> > > > > > > return error;
> > > > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > > > > index 661e753fe1c9..876a80802c1d 100644
> > > > > > > --- a/fs/nfs/nfs4xdr.c
> > > > > > > +++ b/fs/nfs/nfs4xdr.c
> > > > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr,
> > > > > > > struct nfs_fattr *fattr,
> > > > > > > * Decode potentially multiple layout types. Currently we only support
> > > > > > > * one layout driver per file system.
> > > > > > > */
> > > > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
> > > > > > > -  uint32_t *layouttype)
> > > > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
> > > > > > > {
> > > > > > > __be32 *p;
> > > > > > > int num;
> > > > > > > + u32 type;
> > > > > > >  
> > > > > > > p = xdr_inline_decode(xdr, 4);
> > > > > > > if (unlikely(!p))
> > > > > > > goto out_overflow;
> > > > > > > num = be32_to_cpup(p);
> > > > > > >  
> > > > > > > - /* pNFS is not supported by the underlying file system */
> > > > > > > - if (num == 0) {
> > > > > > > -  *layouttype = 0;
> > > > > > > -  return 0;
> > > > > > > - }
> > > > > > > - if (num > 1)
> > > > > > > -  printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
> > > > > > > -  "drivers per filesystem not supported\n", __func__);
> > > > > > > + *layouttypes = 0;
> > > > > > >  
> > > > > > > - /* Decode and set first layout type, move xdr->p past unused types */
> > > > > > > - p = xdr_inline_decode(xdr, num * 4);
> > > > > > > - if (unlikely(!p))
> > > > > > > -  goto out_overflow;
> > > > > > > - *layouttype = be32_to_cpup(p);
> > > > > > > + for (; num; --num) {
> > > > > > > +  p = xdr_inline_decode(xdr, 4);
> > > > > > > +
> > > > > > > +  if (unlikely(!p))
> > > > > > > +  goto out_overflow;
> > > > > > > +
> > > > > > > +  type = be32_to_cpup(p);
> > > > > > > +
> > > > > > > +  /* Ignore any that we don't understand */
> > > > > > > +  if (unlikely(type >= LAYOUT_TYPE_MAX))
> > > > > >  
> > > > > > This will in effect hard code the layouts that the client supports.
> > > > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
> > > > > > Let’s not leak it into the client. I suggest just making this
> > > > > > 8*sizeof(*layouttypes).
> > > > > >  
> > > > >
> > > > > Fair enough. I'll make that change.
> > > > >
> > > > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
> > > > > that enum is used in both the client and the server code, AFAICT. If we
> > > > > add a new LAYOUT_* value to that enum for the client, then we'll need
> > > > > to increase that value anyway. So, I'm not sure I understand how this
> > > > > limits the client in any way...
> > > >
> > > > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
> > > > at set_pnfs_layoutdriver(), you’ll note that we currently support all
> > > > values for the layout type.
> > > >
> > >
> > > Ok, I see. So if someone were to (for instance) create a 3rd party
> > > layout driver module that had used a value above LAYOUT_TYPE_MAX then
> > > this would prevent it from working.
> > >
> > > Hmmm...so even if I make the change that you're suggesting, this will
> > > still limit the client to working with layout types that are below a
> > > value of 32. Is that also a problem? If so, then maybe I should respin
> > > this to be more like the one Tigran had: make an array or something to
> > > hold those values.
> > >
> > > Thoughts?
> > >
> >
> > Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
> > driver possibility really throws a wrench into this plan...
> >
> > Suppose someone creates such a layout driver, drops the module onto the
> > client and the core kernel knows nothing about it.  With the current
> > patch, it'd be ignored. I don't think that's what we want though.
> >
> > Where should that driver fit in the selection order in
> > set_pnfs_layoutdriver?
> >
> > Tigran's patch had the client start with the second element and only
> > pick the first one in the list if nothing else worked. That's sort of
> > icky though.
> >
> > Another idea might be to just attempt unrecognized ones as the driver
> > of last resort, when no other driver has worked?
> >
> > Alternately, we could add a mount option or something that would affect
> > the selection order? If so, how should such an option work?
> >
> > I'm really open to suggestions here -- I've no idea what the right
> > thing to do is at this point...sigh.
>
>
> There are two things in my patch what I don't like:
>
>   - an int array to store layouts, which mostly will be used by a single element only
>   - server must know client implementation to achieve desired result
>

Meh, the array is not too big a deal. We only allocate a fsinfo struct
to handle the call. Once we've selected the layout type, it gets
discarded. The second problem is the bigger one, IMO.

> In your approach other two problems:
>
>   - max layout type id 32
>   - hard coded supported layout types and the order
>

Right, both are problems. For now, I'm not too worried about getting
_official_ layout type values that are above 32, but the spec says:

   Types within the range 0x00000001-0x7FFFFFFF are
   globally unique and are assigned according to the description in
   Section 22.4; they are maintained by IANA.  Types within the range
   0x80000000-0xFFFFFFFF are site specific and for private use only.

So both of the above problems in my RFC patch make it difficult to
experiment with new layout types.

> Any of them will help in adoption of flexfile layout, especially if we get it into
> RHEL7.
>
> In discussion with Christoph Hellwig back in March, I have proposed a mount option:
>
>    mount -o preferred_layout=nfs4_file,vers=4.1
>
> or may be even an nfs kernel module option.
>

> This will allow server to send layout in any order, but let client to re-order
> them by it's own rules.
>

Yeah, I was thinking something along the same lines.

The problem with a mount option is that you can transit to different
filesystems in multiple ways with NFS these days (referrals, etc...).
Propagating and handling mount options in those cases can quickly
become quite messy.

A module option to set the selection order might be best. For instance:

    nfs4.pnfs_layout_order=0x80000006:scsi:block:object:flexfile:file

Then the client could pick the best one based on that order. The
default could be the order that I set up in the proposed patch (or
something else, fwiw).

Either way, I'd like Trond and/or Anna to weigh in on what they'd find
acceptable before we go down either of those roads.

--
Jeff Layton <[email protected]>

2016-06-07 12:26:18

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types



----- Original Message -----
> From: "Jeff Layton" <[email protected]>
> To: "Mkrtchyan, Tigran" <[email protected]>
> Cc: "Trond Myklebust" <[email protected]>, [email protected], "Anna Schumaker"
> <[email protected]>, [email protected]
> Sent: Thursday, June 2, 2016 1:04:19 PM
> Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

> On Thu, 2016-06-02 at 09:12 +0200, Mkrtchyan, Tigran wrote:
>>
>> ----- Original Message -----
>> > From: "Jeff Layton" <[email protected]>
>> > To: "Trond Myklebust" <[email protected]>, [email protected]
>> > Cc: "tigran mkrtchyan" <[email protected]>, "Anna Schumaker"
>> > <[email protected]>, [email protected]
>> > Sent: Wednesday, June 1, 2016 11:53:03 PM
>> > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out
>> > multiple layout types
>>
>> > On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
>> > > On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
>> > > >
>> > > >
>> > > > On 5/31/16, 17:09, "Jeff Layton" <[email protected]> wrote:
>> > > >
>> > > > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
>> > > > > >  
>> > > > > > On 5/30/16, 12:35, "Jeff Layton" <[email protected]> wrote:
>> > > > > >  
>> > > > > > > Allow the client to deal with servers that hand out multiple layout
>> > > > > > > types for the same filesystem. When this happens, we pick the "best" one,
>> > > > > > > based on a hardcoded assumed order in the client code.
>> > > > > > >  
>> > > > > > > Signed-off-by: Jeff Layton <[email protected]>
>> > > > > > > ---
>> > > > > > > fs/nfs/client.c | 2 +-
>> > > > > > > fs/nfs/nfs4proc.c | 2 +-
>> > > > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
>> > > > > > > fs/nfs/pnfs.c | 76 ++++++++++++++++++++++++++++++++++++++-----------
>> > > > > > > include/linux/nfs_xdr.h | 2 +-
>> > > > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
>> > > > > > >  
>> > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>> > > > > > > index 0c96528db94a..53b41f4bd45a 100644
>> > > > > > > --- a/fs/nfs/client.c
>> > > > > > > +++ b/fs/nfs/client.c
>> > > > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct
>> > > > > > > nfs_fh *mntfh, struct nfs
>> > > > > > > }
>> > > > > > >  
>> > > > > > > fsinfo.fattr = fattr;
>> > > > > > > - fsinfo.layouttype = 0;
>> > > > > > > + fsinfo.layouttypes = 0;
>> > > > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
>> > > > > > > if (error < 0)
>> > > > > > > goto out_error;
>> > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> > > > > > > index de97567795a5..9446aef89b48 100644
>> > > > > > > --- a/fs/nfs/nfs4proc.c
>> > > > > > > +++ b/fs/nfs/nfs4proc.c
>> > > > > > > @@ -4252,7 +4252,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server,
>> > > > > > > struct nfs_fh *fhandle, s
>> > > > > > > if (error == 0) {
>> > > > > > > /* block layout checks this! */
>> > > > > > > server->pnfs_blksize = fsinfo->blksize;
>> > > > > > > -  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
>> > > > > > > +  set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttypes);
>> > > > > > > }
>> > > > > > >  
>> > > > > > > return error;
>> > > > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> > > > > > > index 661e753fe1c9..876a80802c1d 100644
>> > > > > > > --- a/fs/nfs/nfs4xdr.c
>> > > > > > > +++ b/fs/nfs/nfs4xdr.c
>> > > > > > > @@ -4723,33 +4723,36 @@ static int decode_getfattr(struct xdr_stream *xdr,
>> > > > > > > struct nfs_fattr *fattr,
>> > > > > > > * Decode potentially multiple layout types. Currently we only support
>> > > > > > > * one layout driver per file system.
>> > > > > > > */
>> > > > > > > -static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
>> > > > > > > -  uint32_t *layouttype)
>> > > > > > > +static int decode_pnfs_layout_types(struct xdr_stream *xdr, u32 *layouttypes)
>> > > > > > > {
>> > > > > > > __be32 *p;
>> > > > > > > int num;
>> > > > > > > + u32 type;
>> > > > > > >  
>> > > > > > > p = xdr_inline_decode(xdr, 4);
>> > > > > > > if (unlikely(!p))
>> > > > > > > goto out_overflow;
>> > > > > > > num = be32_to_cpup(p);
>> > > > > > >  
>> > > > > > > - /* pNFS is not supported by the underlying file system */
>> > > > > > > - if (num == 0) {
>> > > > > > > -  *layouttype = 0;
>> > > > > > > -  return 0;
>> > > > > > > - }
>> > > > > > > - if (num > 1)
>> > > > > > > -  printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
>> > > > > > > -  "drivers per filesystem not supported\n", __func__);
>> > > > > > > + *layouttypes = 0;
>> > > > > > >  
>> > > > > > > - /* Decode and set first layout type, move xdr->p past unused types */
>> > > > > > > - p = xdr_inline_decode(xdr, num * 4);
>> > > > > > > - if (unlikely(!p))
>> > > > > > > -  goto out_overflow;
>> > > > > > > - *layouttype = be32_to_cpup(p);
>> > > > > > > + for (; num; --num) {
>> > > > > > > +  p = xdr_inline_decode(xdr, 4);
>> > > > > > > +
>> > > > > > > +  if (unlikely(!p))
>> > > > > > > +  goto out_overflow;
>> > > > > > > +
>> > > > > > > +  type = be32_to_cpup(p);
>> > > > > > > +
>> > > > > > > +  /* Ignore any that we don't understand */
>> > > > > > > +  if (unlikely(type >= LAYOUT_TYPE_MAX))
>> > > > > >  
>> > > > > > This will in effect hard code the layouts that the client supports.
>> > > > > > LAYOUT_TYPE_MAX is something that applies to knfsd only for now.
>> > > > > > Let’s not leak it into the client. I suggest just making this
>> > > > > > 8*sizeof(*layouttypes).
>> > > > > >  
>> > > > >
>> > > > > Fair enough. I'll make that change.
>> > > > >
>> > > > > That said...LAYOUT_TYPE_MAX is a value in the pnfs_layouttype enum, and
>> > > > > that enum is used in both the client and the server code, AFAICT. If we
>> > > > > add a new LAYOUT_* value to that enum for the client, then we'll need
>> > > > > to increase that value anyway. So, I'm not sure I understand how this
>> > > > > limits the client in any way...
>> > > >
>> > > > No, the client doesn’t use enum pnfs_layouttype anywhere. If you look
>> > > > at set_pnfs_layoutdriver(), you’ll note that we currently support all
>> > > > values for the layout type.
>> > > >
>> > >
>> > > Ok, I see. So if someone were to (for instance) create a 3rd party
>> > > layout driver module that had used a value above LAYOUT_TYPE_MAX then
>> > > this would prevent it from working.
>> > >
>> > > Hmmm...so even if I make the change that you're suggesting, this will
>> > > still limit the client to working with layout types that are below a
>> > > value of 32. Is that also a problem? If so, then maybe I should respin
>> > > this to be more like the one Tigran had: make an array or something to
>> > > hold those values.
>> > >
>> > > Thoughts?
>> > >
>> >
>> > Yecchhhhh...ok after thinking about this, the whole out-of-tree layout
>> > driver possibility really throws a wrench into this plan...
>> >
>> > Suppose someone creates such a layout driver, drops the module onto the
>> > client and the core kernel knows nothing about it.  With the current
>> > patch, it'd be ignored. I don't think that's what we want though.
>> >
>> > Where should that driver fit in the selection order in
>> > set_pnfs_layoutdriver?
>> >
>> > Tigran's patch had the client start with the second element and only
>> > pick the first one in the list if nothing else worked. That's sort of
>> > icky though.
>> >
>> > Another idea might be to just attempt unrecognized ones as the driver
>> > of last resort, when no other driver has worked?
>> >
>> > Alternately, we could add a mount option or something that would affect
>> > the selection order? If so, how should such an option work?
>> >
>> > I'm really open to suggestions here -- I've no idea what the right
>> > thing to do is at this point...sigh.
>>
>>
>> There are two things in my patch what I don't like:
>>
>>   - an int array to store layouts, which mostly will be used by a single element
>>   only
>>   - server must know client implementation to achieve desired result
>>
>
> Meh, the array is not too big a deal. We only allocate a fsinfo struct
> to handle the call. Once we've selected the layout type, it gets
> discarded. The second problem is the bigger one, IMO.
>
>> In your approach other two problems:
>>
>>   - max layout type id 32
>>   - hard coded supported layout types and the order
>>
>
> Right, both are problems. For now, I'm not too worried about getting
> _official_ layout type values that are above 32, but the spec says:
>
>   Types within the range 0x00000001-0x7FFFFFFF are
>   globally unique and are assigned according to the description in
>   Section 22.4; they are maintained by IANA.  Types within the range
>   0x80000000-0xFFFFFFFF are site specific and for private use only.
>
> So both of the above problems in my RFC patch make it difficult to
> experiment with new layout types.
>
>> Any of them will help in adoption of flexfile layout, especially if we get it
>> into
>> RHEL7.
>>
>> In discussion with Christoph Hellwig back in March, I have proposed a mount
>> option:
>>
>>    mount -o preferred_layout=nfs4_file,vers=4.1
>>
>> or may be even an nfs kernel module option.
>>
>
>> This will allow server to send layout in any order, but let client to re-order
>> them by it's own rules.
>>
>
> Yeah, I was thinking something along the same lines.
>
> The problem with a mount option is that you can transit to different
> filesystems in multiple ways with NFS these days (referrals, etc...).
> Propagating and handling mount options in those cases can quickly
> become quite messy.
>
> A module option to set the selection order might be best. For instance:
>
>    nfs4.pnfs_layout_order=0x80000006:scsi:block:object:flexfile:file

Hi Jeff,

after some mental exercises around this topic, I came to a conclusion, that
module option is a wrong approach. The module configuration is a global
setting for kernel nfs client. Imagine a situation in which you want to use
flexfiles with one server and nfs4_files with another server, but both
support both layout types.

Looks like there is no way around mount option.

Tigran.


>
> Then the client could pick the best one based on that order. The
> default could be the order that I set up in the proposed patch (or
> something else, fwiw).
>
> Either way, I'd like Trond and/or Anna to weigh in on what they'd find
> acceptable before we go down either of those roads.
>
> --
> Jeff Layton <[email protected]>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2016-06-07 12:43:14

by Jeff Layton

[permalink] [raw]
Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers that hand out multiple layout types

On Tue, 2016-06-07 at 14:26 +0200, Mkrtchyan, Tigran wrote:
>
> ----- Original Message -----
> >
> > From: "Jeff Layton" <[email protected]>
> > To: "Mkrtchyan, Tigran" <[email protected]>
> > Cc: "Trond Myklebust" <[email protected]>, [email protected]
> > nel.org, "Anna Schumaker"
> > <[email protected]>, [email protected]
> > Sent: Thursday, June 2, 2016 1:04:19 PM
> > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle servers
> > that hand out multiple layout types
> >
> > On Thu, 2016-06-02 at 09:12 +0200, Mkrtchyan, Tigran wrote:
> > >
> > >
> > > ----- Original Message -----
> > > >
> > > > From: "Jeff Layton" <[email protected]>
> > > > To: "Trond Myklebust" <[email protected]>, linux-nfs@vger
> > > > .kernel.org
> > > > Cc: "tigran mkrtchyan" <[email protected]>, "Anna
> > > > Schumaker"
> > > > <[email protected]>, [email protected]
> > > > Sent: Wednesday, June 1, 2016 11:53:03 PM
> > > > Subject: Re: [RFC PATCH] nfs: allow nfs client to handle
> > > > servers that hand out
> > > > multiple layout types
> > > >
> > > > On Tue, 2016-05-31 at 17:54 -0400, Jeff Layton wrote:
> > > > >
> > > > > On Tue, 2016-05-31 at 21:41 +0000, Trond Myklebust wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > On 5/31/16, 17:09, "Jeff Layton" <[email protected]>
> > > > > > wrote:
> > > > > >
> > > > > > >
> > > > > > > On Tue, 2016-05-31 at 16:03 +0000, Trond Myklebust wrote:
> > > > > > > >
> > > > > > > >  
> > > > > > > > On 5/30/16, 12:35, "Jeff Layton" <[email protected]
> > > > > > > > et> wrote:
> > > > > > > >  
> > > > > > > > >
> > > > > > > > > Allow the client to deal with servers that hand out
> > > > > > > > > multiple layout
> > > > > > > > > types for the same filesystem. When this happens, we
> > > > > > > > > pick the "best" one,
> > > > > > > > > based on a hardcoded assumed order in the client
> > > > > > > > > code.
> > > > > > > > >  
> > > > > > > > > Signed-off-by: Jeff Layton <[email protected]
> > > > > > > > > om>
> > > > > > > > > ---
> > > > > > > > > fs/nfs/client.c | 2 +-
> > > > > > > > > fs/nfs/nfs4proc.c | 2 +-
> > > > > > > > > fs/nfs/nfs4xdr.c | 41 +++++++++++++-------------
> > > > > > > > > fs/nfs/pnfs.c | 76
> > > > > > > > > ++++++++++++++++++++++++++++++++++++++-----------
> > > > > > > > > include/linux/nfs_xdr.h | 2 +-
> > > > > > > > > 5 files changed, 85 insertions(+), 38 deletions(-)
> > > > > > > > >  
> > > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > > > > > > index 0c96528db94a..53b41f4bd45a 100644
> > > > > > > > > --- a/fs/nfs/client.c
> > > > > > > > > +++ b/fs/nfs/client.c
> > > > > > > > > @@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct
> > > > > > > > > nfs_server *server, struct
> > > > > > > > > nfs_fh *mntfh, struct nfs
> > > > > > > > > }
> > > > > > > > >  
> > > > > > > > > fsinfo.fattr = fattr;
> > > > > > > > > - fsinfo.layouttype = 0;
> > > > > > > > > + fsinfo.layouttypes = 0;
> > > > > > > > > error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > > > > > > > if (error < 0)
> > > > > > > > > goto out_error;
> > > > > > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > > > > > > > index de97567795a5..9446aef89b48 100644
> > > > > > > > > --- a/fs/nfs/nfs4proc.c
> > > > > > > > > +++ b/fs/nfs/nfs4proc.c
> > > > > > > > > @@ -4252,7 +4252,7 @@ static int
> > > > > > > > > nfs4_proc_fsinfo(struct nfs_server *server,
> > > > > > > > > struct nfs_fh *fhandle, s
> > > > > > > > > if (error == 0) {
> > > > > > > > > /* block layout checks this! */
> > > > > > > > > server->pnfs_blksize = fsinfo->blksize;
> > > > > > > > > -  set_pnfs_layoutdriver(server, fhandle,
> > > > > > > > > fsinfo->layouttype);
> > > > > > > > > +  set_pnfs_layoutdriver(server, fhandle,
> > > > > > > > > fsinfo->layouttypes);
> > > > > > > > > }
> > > > > > > > >  
> > > > > > > > > return error;
> > > > > > > > > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > > > > > > index 661e753fe1c9..876a80802c1d 100644
> > > > > > > > > --- a/fs/nfs/nfs4xdr.c
> > > > > > > > > +++ b/fs/nfs/nfs4xdr.c
> > > > > > > > > @@ -4723,33 +4723,36 @@ static int
> > > > > > > > > decode_getfattr(struct xdr_stream *xdr,
> > > > > > > > > struct nfs_fattr *fattr,
> > > > > > > > > * Decode potentially multiple layout types. Currently
> > > > > > > > > we only support
> > > > > > > > > * one layout driver per file system.
> > > > > > > > > */
> > > > > > > > > -static int decode_first_pnfs_layout_type(struct
> > > > > > > > > xdr_stream *xdr,
> > > > > > > > > -  uint32_t *layouttype)
> > > > > > > > > +static int decode_pnfs_layout_types(struct
> > > > > > > > > xdr_stream *xdr, u32 *layouttypes)
> > > > > > > > > {
> > > > > > > > > __be32 *p;
> > > > > > > > > int num;
> > > > > > > > > + u32 type;
> > > > > > > > >  
> > > > > > > > > p = xdr_inline_decode(xdr, 4);
> > > > > > > > > if (unlikely(!p))
> > > > > > > > > goto out_overflow;
> > > > > > > > > num = be32_to_cpup(p);
> > > > > > > > >  
> > > > > > > > > - /* pNFS is not supported by the underlying
> > > > > > > > > file system */
> > > > > > > > > - if (num == 0) {
> > > > > > > > > -  *layouttype = 0;
> > > > > > > > > -  return 0;
> > > > > > > > > - }
> > > > > > > > > - if (num > 1)
> > > > > > > > > -  printk(KERN_INFO "NFS: %s: Warning:
> > > > > > > > > Multiple pNFS layout "
> > > > > > > > > -  "drivers per filesystem not supported\n",
> > > > > > > > > __func__);
> > > > > > > > > + *layouttypes = 0;
> > > > > > > > >  
> > > > > > > > > - /* Decode and set first layout type, move
> > > > > > > > > xdr->p past unused types */
> > > > > > > > > - p = xdr_inline_decode(xdr, num * 4);
> > > > > > > > > - if (unlikely(!p))
> > > > > > > > > -  goto out_overflow;
> > > > > > > > > - *layouttype = be32_to_cpup(p);
> > > > > > > > > + for (; num; --num) {
> > > > > > > > > +  p = xdr_inline_decode(xdr, 4);
> > > > > > > > > +
> > > > > > > > > +  if (unlikely(!p))
> > > > > > > > > +  goto out_overflow;
> > > > > > > > > +
> > > > > > > > > +  type = be32_to_cpup(p);
> > > > > > > > > +
> > > > > > > > > +  /* Ignore any that we don't understand */
> > > > > > > > > +  if (unlikely(type >= LAYOUT_TYPE_MAX))
> > > > > > > >  
> > > > > > > > This will in effect hard code the layouts that the
> > > > > > > > client supports.
> > > > > > > > LAYOUT_TYPE_MAX is something that applies to knfsd only
> > > > > > > > for now.
> > > > > > > > Let’s not leak it into the client. I suggest just
> > > > > > > > making this
> > > > > > > > 8*sizeof(*layouttypes).
> > > > > > > >  
> > > > > > > Fair enough. I'll make that change.
> > > > > > >
> > > > > > > That said...LAYOUT_TYPE_MAX is a value in the
> > > > > > > pnfs_layouttype enum, and
> > > > > > > that enum is used in both the client and the server code,
> > > > > > > AFAICT. If we
> > > > > > > add a new LAYOUT_* value to that enum for the client,
> > > > > > > then we'll need
> > > > > > > to increase that value anyway. So, I'm not sure I
> > > > > > > understand how this
> > > > > > > limits the client in any way...
> > > > > > No, the client doesn’t use enum pnfs_layouttype anywhere.
> > > > > > If you look
> > > > > > at set_pnfs_layoutdriver(), you’ll note that we currently
> > > > > > support all
> > > > > > values for the layout type.
> > > > > >
> > > > > Ok, I see. So if someone were to (for instance) create a 3rd
> > > > > party
> > > > > layout driver module that had used a value above
> > > > > LAYOUT_TYPE_MAX then
> > > > > this would prevent it from working.
> > > > >
> > > > > Hmmm...so even if I make the change that you're suggesting,
> > > > > this will
> > > > > still limit the client to working with layout types that are
> > > > > below a
> > > > > value of 32. Is that also a problem? If so, then maybe I
> > > > > should respin
> > > > > this to be more like the one Tigran had: make an array or
> > > > > something to
> > > > > hold those values.
> > > > >
> > > > > Thoughts?
> > > > >
> > > > Yecchhhhh...ok after thinking about this, the whole out-of-tree
> > > > layout
> > > > driver possibility really throws a wrench into this plan...
> > > >
> > > > Suppose someone creates such a layout driver, drops the module
> > > > onto the
> > > > client and the core kernel knows nothing about it.  With the
> > > > current
> > > > patch, it'd be ignored. I don't think that's what we want
> > > > though.
> > > >
> > > > Where should that driver fit in the selection order in
> > > > set_pnfs_layoutdriver?
> > > >
> > > > Tigran's patch had the client start with the second element and
> > > > only
> > > > pick the first one in the list if nothing else worked. That's
> > > > sort of
> > > > icky though.
> > > >
> > > > Another idea might be to just attempt unrecognized ones as the
> > > > driver
> > > > of last resort, when no other driver has worked?
> > > >
> > > > Alternately, we could add a mount option or something that
> > > > would affect
> > > > the selection order? If so, how should such an option work?
> > > >
> > > > I'm really open to suggestions here -- I've no idea what the
> > > > right
> > > > thing to do is at this point...sigh.
> > >
> > > There are two things in my patch what I don't like:
> > >
> > >   - an int array to store layouts, which mostly will be used by a
> > > single element
> > >   only
> > >   - server must know client implementation to achieve desired
> > > result
> > >
> > Meh, the array is not too big a deal. We only allocate a fsinfo
> > struct
> > to handle the call. Once we've selected the layout type, it gets
> > discarded. The second problem is the bigger one, IMO.
> >
> > >
> > > In your approach other two problems:
> > >
> > >   - max layout type id 32
> > >   - hard coded supported layout types and the order
> > >
> > Right, both are problems. For now, I'm not too worried about
> > getting
> > _official_ layout type values that are above 32, but the spec says:
> >
> >    Types within the range 0x00000001-0x7FFFFFFF are
> >    globally unique and are assigned according to the description in
> >    Section 22.4; they are maintained by IANA.  Types within the
> > range
> >    0x80000000-0xFFFFFFFF are site specific and for private use
> > only.
> >
> > So both of the above problems in my RFC patch make it difficult to
> > experiment with new layout types.
> >
> > >
> > > Any of them will help in adoption of flexfile layout, especially
> > > if we get it
> > > into
> > > RHEL7.
> > >
> > > In discussion with Christoph Hellwig back in March, I have
> > > proposed a mount
> > > option:
> > >
> > >    mount -o preferred_layout=nfs4_file,vers=4.1
> > >
> > > or may be even an nfs kernel module option.
> > >
> > >
> > > This will allow server to send layout in any order, but let
> > > client to re-order
> > > them by it's own rules.
> > >
> > Yeah, I was thinking something along the same lines.
> >
> > The problem with a mount option is that you can transit to
> > different
> > filesystems in multiple ways with NFS these days (referrals,
> > etc...).
> > Propagating and handling mount options in those cases can quickly
> > become quite messy.
> >
> > A module option to set the selection order might be best. For
> > instance:
> >
> >    
> > nfs4.pnfs_layout_order=0x80000006:scsi:block:object:flexfile:file
> Hi Jeff,
>
> after some mental exercises around this topic, I came to a
> conclusion, that
> module option is a wrong approach. The module configuration is a
> global
> setting for kernel nfs client. Imagine a situation in which you want
> to use
> flexfiles with one server and nfs4_files with another server, but
> both
> support both layout types.
>
> Looks like there is no way around mount option.
>
> Tigran.
>
>

Sure, that sort of thing is possible. For now though most servers still
only send a list of 1 layout type, with a few sending a list of two or
three. I don't know that we really need to plumb in that level of
granularity just yet.

The reason I'm hesitant to add a mount option is that because of the
way that structures are aggressively shared, it can be difficult to set
this type of thing on a per-mount basis.

The set I sent this morning sidesteps the whole configuration issue,
but should make it possible to add that in later once the maintainers
express a preference on how they'd like that to work (hint, hint)...

--
Jeff Layton <[email protected]>