2016-07-10 19:56:04

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 0/3][resend] pnfs/nfsd: have client and server support multiple layout types

From: Jeff Layton <[email protected]>

v3:
- use sort() from lib/sort.c to sort the list

v2:
- rework Tigran's patch to preserve existing selection behaviour
- simplify layout driver selection by sorting the list

This is v3 of the patchset. The main change is that this patch doesn't
use a hand-rolled bubble sort, but rather the heapsort library in
lib/sort.c. This is mostly for conciseness since the array sizes are
generally so small that it shouldn't make any perf difference.

Note that this does mean that we need to keep track on the number of
elements in the array that the server sends, so there are a few
ancillary changes to deal with that as well.

Only lightly tested by mounting a server that sends both flexfiles and
block layouts. The client successfully selected the block layout in most
cases, but if I blacklist blocklayoutdriver then it selects flexfiles
instead.

I'm sending these together, but I'd expect Bruce to pick up the nfsd
patches and Trond or Anna to pick up the client-side ones.

The nfsd patch is based on top of Tom's nfsd flexfile layout patches.

Jeff Layton (3):
nfsd: allow nfsd to advertise multiple layout types
pnfs: track multiple layout types in fsinfo structure
pnfs: add a new mechanism to select a layout driver according to an
ordered list

fs/nfs/client.c | 3 ++-
fs/nfs/nfs4proc.c | 3 ++-
fs/nfs/nfs4xdr.c | 40 +++++++++++++++--------------
fs/nfs/pnfs.c | 67 ++++++++++++++++++++++++++++++++++++++++---------
fs/nfs/pnfs.h | 5 ++--
fs/nfsd/export.c | 4 +--
fs/nfsd/export.h | 2 +-
fs/nfsd/nfs4layouts.c | 6 ++---
fs/nfsd/nfs4proc.c | 4 +--
fs/nfsd/nfs4xdr.c | 30 +++++++++++-----------
include/linux/nfs_xdr.h | 8 +++++-
11 files changed, 112 insertions(+), 60 deletions(-)

--
2.5.5



2016-07-10 19:56:04

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 2/3][resend] pnfs: track multiple layout types in fsinfo structure

From: Jeff Layton <[email protected]>

Current NFSv4.1/pNFS client assumes that MDS supports only one layout
type. While it's true for most existing servers, nevertheless, this can
be change in the near future.

For now, this patch just plumbs in the ability to track a list of
layouts in the fsinfo structure. The existing behavior of the client
is preserved, by having it just select the first entry in the list.

Signed-off-by: Tigran Mkrtchyan <[email protected]>
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/client.c | 2 +-
fs/nfs/nfs4xdr.c | 23 ++++++++++-------------
fs/nfs/pnfs.c | 27 ++++++++++++++++-----------
fs/nfs/pnfs.h | 4 ++--
include/linux/nfs_xdr.h | 7 ++++++-
5 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0c96528db94a..067f489aab3f 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -787,7 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
}

fsinfo.fattr = fattr;
- fsinfo.layouttype = 0;
+ memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
if (error < 0)
goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 661e753fe1c9..b2c698499ad9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4720,14 +4720,13 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
}

/*
- * Decode potentially multiple layout types. Currently we only support
- * one layout driver per file system.
+ * Decode potentially multiple layout types.
*/
-static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
+static int decode_pnfs_layout_types(struct xdr_stream *xdr,
uint32_t *layouttype)
{
__be32 *p;
- int num;
+ uint32_t num, i;

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
@@ -4736,18 +4735,17 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,

/* pNFS is not supported by the underlying file system */
if (num == 0) {
- *layouttype = 0;
return 0;
}
- if (num > 1)
- printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
- "drivers per filesystem not supported\n", __func__);
+ if (num > NFS_MAX_LAYOUT_TYPES)
+ printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num);

/* Decode and set first layout type, move xdr->p past unused types */
p = xdr_inline_decode(xdr, num * 4);
if (unlikely(!p))
goto out_overflow;
- *layouttype = be32_to_cpup(p);
+ for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
+ layouttype[i] = be32_to_cpup(p++);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -4767,10 +4765,9 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
return -EIO;
if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
- status = decode_first_pnfs_layout_type(xdr, layouttype);
+ status = decode_pnfs_layout_types(xdr, layouttype);
bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
- } else
- *layouttype = 0;
+ }
return status;
}

@@ -4851,7 +4848,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
if (status != 0)
goto xdr_error;
- status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
+ status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype);
if (status != 0)
goto xdr_error;

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..2fc1b9354345 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -102,32 +102,37 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
* Try to set the server's pnfs module to the pnfs layout type specified by id.
* Currently only one pNFS layout driver per filesystem is supported.
*
- * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
+ * @ids array of layout types supported by MDS.
*/
void
set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
- u32 id)
+ u32 *ids)
{
struct pnfs_layoutdriver_type *ld_type = NULL;
+ u32 id;

- if (id == 0)
- goto out_no_driver;
if (!(server->nfs_client->cl_exchange_flags &
(EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
- printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
- __func__, id, server->nfs_client->cl_exchange_flags);
+ printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
+ __func__, server->nfs_client->cl_exchange_flags);
goto out_no_driver;
}
+
+ id = ids[0];
+ if (!id)
+ goto out_no_driver;
+
ld_type = find_pnfs_driver(id);
if (!ld_type) {
request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
ld_type = find_pnfs_driver(id);
- if (!ld_type) {
- dprintk("%s: No pNFS module found for %u.\n",
- __func__, id);
- goto out_no_driver;
- }
}
+
+ if (!ld_type) {
+ dprintk("%s: No pNFS module found for %u.\n", __func__, id);
+ goto out_no_driver;
+ }
+
server->pnfs_curr_ld = ld_type;
if (ld_type->set_layoutdriver
&& ld_type->set_layoutdriver(server, mntfh)) {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index b21bd0bee784..500473c87e82 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);

-void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
+void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
@@ -656,7 +656,7 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
}

static inline void set_pnfs_layoutdriver(struct nfs_server *s,
- const struct nfs_fh *mntfh, u32 id)
+ const struct nfs_fh *mntfh, u32 *ids)
{
}

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c304a11b5b1a..4eef590200c3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -125,6 +125,11 @@ struct nfs_fattr {
| NFS_ATTR_FATTR_V4_SECURITY_LABEL)

/*
+ * Maximal number of supported layout drivers.
+ */
+#define NFS_MAX_LAYOUT_TYPES 8
+
+/*
* Info on the file system
*/
struct nfs_fsinfo {
@@ -139,7 +144,7 @@ struct nfs_fsinfo {
__u64 maxfilesize;
struct timespec time_delta; /* server time granularity */
__u32 lease_time; /* in seconds */
- __u32 layouttype; /* supported pnfs layout driver */
+ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
};
--
2.5.5


2016-07-10 19:56:05

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 1/3][resend] nfsd: allow nfsd to advertise multiple layout types

From: Jeff Layton <[email protected]>

If the underlying filesystem supports multiple layout types, then there
is little reason not to advertise that fact to clients and let them
choose what type to use.

Turn the ex_layout_type field into a bitfield. For each supported
layout type, we set a bit in that field. When the client requests a
layout, ensure that the bit for that layout type is set. When the
client requests attributes, send back a list of supported types.

Signed-off-by: Jeff Layton <[email protected]>
Reviewed-by: Weston Andros Adamson <[email protected]>
---
fs/nfsd/export.c | 4 ++--
fs/nfsd/export.h | 2 +-
fs/nfsd/nfs4layouts.c | 6 +++---
fs/nfsd/nfs4proc.c | 4 ++--
fs/nfsd/nfs4xdr.c | 30 ++++++++++++++----------------
5 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b4d84b579f20..f97ba49d5e66 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -706,7 +706,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0;
- new->ex_layout_type = 0;
+ new->ex_layout_types = 0;
new->ex_uuid = NULL;
new->cd = item->cd;
}
@@ -731,7 +731,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
item->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = item->ex_fslocs.migrated;
item->ex_fslocs.migrated = 0;
- new->ex_layout_type = item->ex_layout_type;
+ new->ex_layout_types = item->ex_layout_types;
new->ex_nflavors = item->ex_nflavors;
for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i];
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 2e315072bf3f..730f15eeb7ed 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -57,7 +57,7 @@ struct svc_export {
struct nfsd4_fs_locations ex_fslocs;
uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
- enum pnfs_layouttype ex_layout_type;
+ u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
};
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 6d98d16b3354..2be9602b0221 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -139,21 +139,21 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
* otherwise advertise the block layout.
*/
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
- exp->ex_layout_type = LAYOUT_FLEX_FILES;
+ exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES;
#endif
#ifdef CONFIG_NFSD_BLOCKLAYOUT
/* overwrite flex file layout selection if needed */
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
- exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+ exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME;
#endif
#ifdef CONFIG_NFSD_SCSILAYOUT
/* overwrite block layout selection if needed */
if (sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks &&
sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
- exp->ex_layout_type = LAYOUT_SCSI;
+ exp->ex_layout_types |= 1 << LAYOUT_SCSI;
#endif
}

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2ee2dc170327..719c620753e2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1219,12 +1219,12 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static const struct nfsd4_layout_ops *
nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
{
- if (!exp->ex_layout_type) {
+ if (!exp->ex_layout_types) {
dprintk("%s: export does not support pNFS\n", __func__);
return NULL;
}

- if (exp->ex_layout_type != layout_type) {
+ if (!(exp->ex_layout_types & (1 << layout_type))) {
dprintk("%s: layout type %d not supported\n",
__func__, layout_type);
return NULL;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9df898ba648f..4d3ae75ad4c8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2164,22 +2164,20 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
}

static inline __be32
-nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type)
+nfsd4_encode_layout_types(struct xdr_stream *xdr, u32 layout_types)
{
- __be32 *p;
+ __be32 *p;
+ unsigned long i = hweight_long(layout_types);

- if (layout_type) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(1);
- *p++ = cpu_to_be32(layout_type);
- } else {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0);
- }
+ p = xdr_reserve_space(xdr, 4 + 4 * i);
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(i);
+
+ for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i)
+ if (layout_types & (1 << i))
+ *p++ = cpu_to_be32(i);

return 0;
}
@@ -2754,13 +2752,13 @@ out_acl:
}
#ifdef CONFIG_NFSD_PNFS
if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
- status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+ status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
if (status)
goto out;
}

if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
- status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+ status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
if (status)
goto out;
}
--
2.5.5


2016-07-10 19:56:05

by Jeff Layton

[permalink] [raw]
Subject: [PATCH v3 3/3][resend] pnfs: add a new mechanism to select a layout driver according to an ordered list

From: Jeff Layton <[email protected]>

Currently, the layout driver selection code always chooses the first one
from the list. That's not really ideal however, as the server can send
the list of layout types in any order that it likes. It's up to the
client to select the best one for its needs.

This patch adds an ordered list of preferred driver types and has the
selection code sort the list of avaiable layout drivers according to it.
Any unrecognized layout type is sorted to the end of the list.

For now, the order of preference is hardcoded, but it should be possible
to make this configurable in the future.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfs/client.c | 1 +
fs/nfs/nfs4proc.c | 3 ++-
fs/nfs/nfs4xdr.c | 29 +++++++++++++++-----------
fs/nfs/pnfs.c | 54 +++++++++++++++++++++++++++++++++++++++++--------
fs/nfs/pnfs.h | 5 +++--
include/linux/nfs_xdr.h | 1 +
6 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 067f489aab3f..2525d8f770d2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -787,6 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
}

fsinfo.fattr = fattr;
+ fsinfo.nlayouttypes = 0;
memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
if (error < 0)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..a1d6d4bd0d50 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4252,7 +4252,8 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
if (error == 0) {
/* block layout checks this! */
server->pnfs_blksize = fsinfo->blksize;
- set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
+ set_pnfs_layoutdriver(server, fhandle, fsinfo->nlayouttypes,
+ fsinfo->layouttype);
}

return error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b2c698499ad9..4eee3a32e070 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4723,28 +4723,32 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
* Decode potentially multiple layout types.
*/
static int decode_pnfs_layout_types(struct xdr_stream *xdr,
- uint32_t *layouttype)
+ uint32_t *nlayouttypes, uint32_t *layouttype)
{
__be32 *p;
- uint32_t num, i;
+ uint32_t i;

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- num = be32_to_cpup(p);
+ *nlayouttypes = be32_to_cpup(p);

/* pNFS is not supported by the underlying file system */
- if (num == 0) {
+ if (*nlayouttypes == 0)
return 0;
- }
- if (num > NFS_MAX_LAYOUT_TYPES)
- printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num);

/* Decode and set first layout type, move xdr->p past unused types */
- p = xdr_inline_decode(xdr, num * 4);
+ p = xdr_inline_decode(xdr, *nlayouttypes * 4);
if (unlikely(!p))
goto out_overflow;
- for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
+
+ /* If we get too many, then just cap it at the max */
+ if (*nlayouttypes > NFS_MAX_LAYOUT_TYPES) {
+ printk(KERN_INFO "NFS: %s: Warning: Too many (%u) pNFS layout types\n", __func__, *nlayouttypes);
+ *nlayouttypes = NFS_MAX_LAYOUT_TYPES;
+ }
+
+ for(i = 0; i < *nlayouttypes; ++i)
layouttype[i] = be32_to_cpup(p++);
return 0;
out_overflow:
@@ -4757,7 +4761,7 @@ out_overflow:
* Note we must ensure that layouttype is set in any non-error case.
*/
static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
- uint32_t *layouttype)
+ uint32_t *nlayouttypes, uint32_t *layouttype)
{
int status = 0;

@@ -4765,7 +4769,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
return -EIO;
if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
- status = decode_pnfs_layout_types(xdr, layouttype);
+ status = decode_pnfs_layout_types(xdr, nlayouttypes, layouttype);
bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
}
return status;
@@ -4848,7 +4852,8 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
if (status != 0)
goto xdr_error;
- status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype);
+ status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->nlayouttypes,
+ fsinfo->layouttype);
if (status != 0)
goto xdr_error;

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2fc1b9354345..11e56225e6d2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -30,6 +30,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/sort.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
@@ -99,6 +100,38 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
}

/*
+ * When the server sends a list of layout types, we choose one in the order
+ * given in the list below.
+ *
+ * FIXME: should this list be configurable via module_param or mount option?
+ */
+static const u32 ld_prefs[] = {
+ LAYOUT_SCSI,
+ LAYOUT_BLOCK_VOLUME,
+ LAYOUT_OSD2_OBJECTS,
+ LAYOUT_FLEX_FILES,
+ LAYOUT_NFSV4_1_FILES,
+ 0
+};
+
+static int
+ld_cmp(const void *e1, const void *e2)
+{
+ u32 ld1 = *((u32 *)e1);
+ u32 ld2 = *((u32 *)e2);
+ int i;
+
+ for (i = 0; ld_prefs[i] != 0; i++) {
+ if (ld1 == ld_prefs[i])
+ return -1;
+
+ if (ld2 == ld_prefs[i])
+ return 1;
+ }
+ return 0;
+}
+
+/*
* Try to set the server's pnfs module to the pnfs layout type specified by id.
* Currently only one pNFS layout driver per filesystem is supported.
*
@@ -106,10 +139,11 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
*/
void
set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
- u32 *ids)
+ u32 nids, u32 *ids)
{
struct pnfs_layoutdriver_type *ld_type = NULL;
u32 id;
+ int i;

if (!(server->nfs_client->cl_exchange_flags &
(EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
@@ -118,18 +152,22 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
goto out_no_driver;
}

- id = ids[0];
- if (!id)
- goto out_no_driver;
+ sort(ids, nids, sizeof(*ids), ld_cmp, NULL);

- ld_type = find_pnfs_driver(id);
- if (!ld_type) {
- request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
+ for (i = 0; i < nids; i++) {
+ id = ids[i];
ld_type = find_pnfs_driver(id);
+ if (!ld_type) {
+ request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
+ id);
+ ld_type = find_pnfs_driver(id);
+ }
+ if (ld_type)
+ break;
}

if (!ld_type) {
- dprintk("%s: No pNFS module found for %u.\n", __func__, id);
+ dprintk("%s: No pNFS module found!\n", __func__);
goto out_no_driver;
}

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 500473c87e82..a879e747f708 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);

-void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *);
+void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32, u32 *);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
@@ -656,7 +656,8 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
}

static inline void set_pnfs_layoutdriver(struct nfs_server *s,
- const struct nfs_fh *mntfh, u32 *ids)
+ const struct nfs_fh *mntfh,
+ u32 nids, u32 *ids)
{
}

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4eef590200c3..a7e6739ac936 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -144,6 +144,7 @@ struct nfs_fsinfo {
__u64 maxfilesize;
struct timespec time_delta; /* server time granularity */
__u32 lease_time; /* in seconds */
+ __u32 nlayouttypes; /* number of layouttypes */
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
--
2.5.5


2016-07-11 18:04:31

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v3 0/3][resend] pnfs/nfsd: have client and server support multiple layout types

On Sun, Jul 10, 2016 at 03:55:57PM -0400, Jeff Layton wrote:
> From: Jeff Layton <[email protected]>
>
> v3:
> - use sort() from lib/sort.c to sort the list
>
> v2:
> - rework Tigran's patch to preserve existing selection behaviour
> - simplify layout driver selection by sorting the list
>
> This is v3 of the patchset. The main change is that this patch doesn't
> use a hand-rolled bubble sort, but rather the heapsort library in
> lib/sort.c. This is mostly for conciseness since the array sizes are
> generally so small that it shouldn't make any perf difference.
>
> Note that this does mean that we need to keep track on the number of
> elements in the array that the server sends, so there are a few
> ancillary changes to deal with that as well.
>
> Only lightly tested by mounting a server that sends both flexfiles and
> block layouts. The client successfully selected the block layout in most
> cases, but if I blacklist blocklayoutdriver then it selects flexfiles
> instead.
>
> I'm sending these together, but I'd expect Bruce to pick up the nfsd
> patches and Trond or Anna to pick up the client-side ones.

Patches still look fine to me. Feel free to add a Reviewed-by: J. Bruce
Fields <[email protected]> for the client-side patches. I'll take the
server-side patch for 4.8 assuming no objections to this approach from
the client side.

--b.

>
> The nfsd patch is based on top of Tom's nfsd flexfile layout patches.
>
> Jeff Layton (3):
> nfsd: allow nfsd to advertise multiple layout types
> pnfs: track multiple layout types in fsinfo structure
> pnfs: add a new mechanism to select a layout driver according to an
> ordered list
>
> fs/nfs/client.c | 3 ++-
> fs/nfs/nfs4proc.c | 3 ++-
> fs/nfs/nfs4xdr.c | 40 +++++++++++++++--------------
> fs/nfs/pnfs.c | 67 ++++++++++++++++++++++++++++++++++++++++---------
> fs/nfs/pnfs.h | 5 ++--
> fs/nfsd/export.c | 4 +--
> fs/nfsd/export.h | 2 +-
> fs/nfsd/nfs4layouts.c | 6 ++---
> fs/nfsd/nfs4proc.c | 4 +--
> fs/nfsd/nfs4xdr.c | 30 +++++++++++-----------
> include/linux/nfs_xdr.h | 8 +++++-
> 11 files changed, 112 insertions(+), 60 deletions(-)
>
> --
> 2.5.5

2016-07-11 20:33:04

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v3 0/3][resend] pnfs/nfsd: have client and server support multiple layout types

On Mon, 2016-07-11 at 14:04 -0400, J. Bruce Fields wrote:
> On Sun, Jul 10, 2016 at 03:55:57PM -0400, Jeff Layton wrote:
> > From: Jeff Layton <[email protected]>
> >
> > v3:
> > - use sort() from lib/sort.c to sort the list
> >
> > v2:
> > - rework Tigran's patch to preserve existing selection behaviour
> > - simplify layout driver selection by sorting the list
> >
> > This is v3 of the patchset. The main change is that this patch doesn't
> > use a hand-rolled bubble sort, but rather the heapsort library in
> > lib/sort.c. This is mostly for conciseness since the array sizes are
> > generally so small that it shouldn't make any perf difference.
> >
> > Note that this does mean that we need to keep track on the number of
> > elements in the array that the server sends, so there are a few
> > ancillary changes to deal with that as well.
> >
> > Only lightly tested by mounting a server that sends both flexfiles and
> > block layouts. The client successfully selected the block layout in most
> > cases, but if I blacklist blocklayoutdriver then it selects flexfiles
> > instead.
> >
> > I'm sending these together, but I'd expect Bruce to pick up the nfsd
> > patches and Trond or Anna to pick up the client-side ones.
>
> Patches still look fine to me.  Feel free to add a Reviewed-by: J. Bruce
> Fields <[email protected]> for the client-side patches.  I'll take the
> server-side patch for 4.8 assuming no objections to this approach from
> the client side.
>
> --b.
>

Sorry, I think you added a Reviewed-by tag on the earlier set, and I
forgot to add it. I'll put it in now. BTW, these patches are in the
nfsd-4.8 branch of my tree if that makes it easier to pick them:

https://git.samba.org/?p=jlayton/linux.git;a=shortlog;h=refs/heads/nfsd-4.8

I still haven't heard from Trond or Anna on the latest iteration of the
series. I think I've addressed Trond's concerns with the earlier set
though.

The only part that I'm not sure of with the server-side patch is that
it orders the list by numerical layouttype value. In principle, it
shouldn't matter since the client should pick the best one for its
needs, but older clients will just look at the first one on the list.

In the case where you have both SCSI and FlexFile layouts, older
clients will pick flexfiles over scsi. That may not be what we want.
Should we take more care to order the list by some measure of
"usefulness"?

> >
> > The nfsd patch is based on top of Tom's nfsd flexfile layout patches.
> >
> > Jeff Layton (3):
> >   nfsd: allow nfsd to advertise multiple layout types
> >   pnfs: track multiple layout types in fsinfo structure
> >   pnfs: add a new mechanism to select a layout driver according to an
> >     ordered list
> >
> >  fs/nfs/client.c         |  3 ++-
> >  fs/nfs/nfs4proc.c       |  3 ++-
> >  fs/nfs/nfs4xdr.c        | 40 +++++++++++++++--------------
> >  fs/nfs/pnfs.c           | 67 ++++++++++++++++++++++++++++++++++++++++---------
> >  fs/nfs/pnfs.h           |  5 ++--
> >  fs/nfsd/export.c        |  4 +--
> >  fs/nfsd/export.h        |  2 +-
> >  fs/nfsd/nfs4layouts.c   |  6 ++---
> >  fs/nfsd/nfs4proc.c      |  4 +--
> >  fs/nfsd/nfs4xdr.c       | 30 +++++++++++-----------
> >  include/linux/nfs_xdr.h |  8 +++++-
> >  11 files changed, 112 insertions(+), 60 deletions(-)
> >
> > -- 
> > 2.5.5
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--

Jeff Layton <[email protected]>

2016-07-12 12:40:56

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v3 0/3][resend] pnfs/nfsd: have client and server support multiple layout types

On Mon, Jul 11, 2016 at 04:32:58PM -0400, Jeff Layton wrote:
> On Mon, 2016-07-11 at 14:04 -0400, J. Bruce Fields wrote:
> > On Sun, Jul 10, 2016 at 03:55:57PM -0400, Jeff Layton wrote:
> > > From: Jeff Layton <[email protected]>
> > >
> > > v3:
> > > - use sort() from lib/sort.c to sort the list
> > >
> > > v2:
> > > - rework Tigran's patch to preserve existing selection behaviour
> > > - simplify layout driver selection by sorting the list
> > >
> > > This is v3 of the patchset. The main change is that this patch doesn't
> > > use a hand-rolled bubble sort, but rather the heapsort library in
> > > lib/sort.c. This is mostly for conciseness since the array sizes are
> > > generally so small that it shouldn't make any perf difference.
> > >
> > > Note that this does mean that we need to keep track on the number of
> > > elements in the array that the server sends, so there are a few
> > > ancillary changes to deal with that as well.
> > >
> > > Only lightly tested by mounting a server that sends both flexfiles and
> > > block layouts. The client successfully selected the block layout in most
> > > cases, but if I blacklist blocklayoutdriver then it selects flexfiles
> > > instead.
> > >
> > > I'm sending these together, but I'd expect Bruce to pick up the nfsd
> > > patches and Trond or Anna to pick up the client-side ones.
> >
> > Patches still look fine to me.  Feel free to add a Reviewed-by: J. Bruce
> > Fields <[email protected]> for the client-side patches.  I'll take the
> > server-side patch for 4.8 assuming no objections to this approach from
> > the client side.
> >
> > --b.
> >
>
> Sorry, I think you added a Reviewed-by tag on the earlier set, and I
> forgot to add it. I'll put it in now. BTW, these patches are in the
> nfsd-4.8 branch of my tree if that makes it easier to pick them:
>
> https://git.samba.org/?p=jlayton/linux.git;a=shortlog;h=refs/heads/nfsd-4.8
>
> I still haven't heard from Trond or Anna on the latest iteration of the
> series. I think I've addressed Trond's concerns with the earlier set
> though.
>
> The only part that I'm not sure of with the server-side patch is that
> it orders the list by numerical layouttype value. In principle, it
> shouldn't matter since the client should pick the best one for its
> needs, but older clients will just look at the first one on the list.
>
> In the case where you have both SCSI and FlexFile layouts, older
> clients will pick flexfiles over scsi. That may not be what we want.
> Should we take more care to order the list by some measure of
> "usefulness"?

That would be OK with me, but I don't think it's worth a lot of effort
at this point. Very few will want the current server flexfiles
implementation, and they can live with just the compile-time option.

Longer term this doesn't seem like something we can decide
algorithmically. The suggestion I've heard is to extend the export
syntax to allow "pnfs=scsi,flex" or whatever, and take the ordering as
the user's priority.

I agree that we want this sorted out eventually, and certainly before
flexfiles becomes useful in production.

--b.

2016-08-08 20:44:24

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v3 3/3][resend] pnfs: add a new mechanism to select a layout driver according to an ordered list

Hi Jeff,

On 07/10/2016 03:56 PM, Jeff Layton wrote:
> From: Jeff Layton <[email protected]>
>
> Currently, the layout driver selection code always chooses the first one
> from the list. That's not really ideal however, as the server can send
> the list of layout types in any order that it likes. It's up to the
> client to select the best one for its needs.
>
> This patch adds an ordered list of preferred driver types and has the
> selection code sort the list of avaiable layout drivers according to it.
> Any unrecognized layout type is sorted to the end of the list.
>
> For now, the order of preference is hardcoded, but it should be possible
> to make this configurable in the future.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfs/client.c | 1 +
> fs/nfs/nfs4proc.c | 3 ++-
> fs/nfs/nfs4xdr.c | 29 +++++++++++++++-----------
> fs/nfs/pnfs.c | 54 +++++++++++++++++++++++++++++++++++++++++--------
> fs/nfs/pnfs.h | 5 +++--
> include/linux/nfs_xdr.h | 1 +
> 6 files changed, 70 insertions(+), 23 deletions(-)
>
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 067f489aab3f..2525d8f770d2 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -787,6 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> }
>
> fsinfo.fattr = fattr;
> + fsinfo.nlayouttypes = 0;
> memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
> error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> if (error < 0)
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index de97567795a5..a1d6d4bd0d50 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -4252,7 +4252,8 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> if (error == 0) {
> /* block layout checks this! */
> server->pnfs_blksize = fsinfo->blksize;
> - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> + set_pnfs_layoutdriver(server, fhandle, fsinfo->nlayouttypes,
> + fsinfo->layouttype);

I think it might be a little cleaner to pass the fsinfo structure here instead of adding more variables to the function.

> }
>
> return error;
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index b2c698499ad9..4eee3a32e070 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -4723,28 +4723,32 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> * Decode potentially multiple layout types.
> */
> static int decode_pnfs_layout_types(struct xdr_stream *xdr,
> - uint32_t *layouttype)
> + uint32_t *nlayouttypes, uint32_t *layouttype)

Same thing here with decoding.

> {
> __be32 *p;
> - uint32_t num, i;
> + uint32_t i;
>
> p = xdr_inline_decode(xdr, 4);
> if (unlikely(!p))
> goto out_overflow;
> - num = be32_to_cpup(p);
> + *nlayouttypes = be32_to_cpup(p);
>
> /* pNFS is not supported by the underlying file system */
> - if (num == 0) {
> + if (*nlayouttypes == 0)
> return 0;
> - }
> - if (num > NFS_MAX_LAYOUT_TYPES)
> - printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num);
>
> /* Decode and set first layout type, move xdr->p past unused types */
> - p = xdr_inline_decode(xdr, num * 4);
> + p = xdr_inline_decode(xdr, *nlayouttypes * 4);
> if (unlikely(!p))
> goto out_overflow;
> - for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
> +
> + /* If we get too many, then just cap it at the max */
> + if (*nlayouttypes > NFS_MAX_LAYOUT_TYPES) {
> + printk(KERN_INFO "NFS: %s: Warning: Too many (%u) pNFS layout types\n", __func__, *nlayouttypes);
> + *nlayouttypes = NFS_MAX_LAYOUT_TYPES;
> + }
> +
> + for(i = 0; i < *nlayouttypes; ++i)
> layouttype[i] = be32_to_cpup(p++);
> return 0;
> out_overflow:
> @@ -4757,7 +4761,7 @@ out_overflow:
> * Note we must ensure that layouttype is set in any non-error case.
> */
> static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> - uint32_t *layouttype)
> + uint32_t *nlayouttypes, uint32_t *layouttype)
> {
> int status = 0;
>
> @@ -4765,7 +4769,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
> return -EIO;
> if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
> - status = decode_pnfs_layout_types(xdr, layouttype);
> + status = decode_pnfs_layout_types(xdr, nlayouttypes, layouttype);
> bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
> }
> return status;
> @@ -4848,7 +4852,8 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
> status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
> if (status != 0)
> goto xdr_error;
> - status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype);
> + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->nlayouttypes,
> + fsinfo->layouttype);
> if (status != 0)
> goto xdr_error;
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 2fc1b9354345..11e56225e6d2 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -30,6 +30,7 @@
> #include <linux/nfs_fs.h>
> #include <linux/nfs_page.h>
> #include <linux/module.h>
> +#include <linux/sort.h>
> #include "internal.h"
> #include "pnfs.h"
> #include "iostat.h"
> @@ -99,6 +100,38 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> }
>
> /*
> + * When the server sends a list of layout types, we choose one in the order
> + * given in the list below.
> + *
> + * FIXME: should this list be configurable via module_param or mount option?
> + */
> +static const u32 ld_prefs[] = {
> + LAYOUT_SCSI,
> + LAYOUT_BLOCK_VOLUME,
> + LAYOUT_OSD2_OBJECTS,
> + LAYOUT_FLEX_FILES,
> + LAYOUT_NFSV4_1_FILES,
> + 0
> +};
> +
> +static int
> +ld_cmp(const void *e1, const void *e2)
> +{
> + u32 ld1 = *((u32 *)e1);
> + u32 ld2 = *((u32 *)e2);

Looks like this conversion is used pretty frequently. I wish there was a "POINTER_TO_UINT" macro to make it clearer what's going on.

Thanks,
Anna

> + int i;
> +
> + for (i = 0; ld_prefs[i] != 0; i++) {
> + if (ld1 == ld_prefs[i])
> + return -1;
> +
> + if (ld2 == ld_prefs[i])
> + return 1;
> + }
> + return 0;
> +}
> +
> +/*
> * Try to set the server's pnfs module to the pnfs layout type specified by id.
> * Currently only one pNFS layout driver per filesystem is supported.
> *
> @@ -106,10 +139,11 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> */
> void
> set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> - u32 *ids)
> + u32 nids, u32 *ids)
> {
> struct pnfs_layoutdriver_type *ld_type = NULL;
> u32 id;
> + int i;
>
> if (!(server->nfs_client->cl_exchange_flags &
> (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
> @@ -118,18 +152,22 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> goto out_no_driver;
> }
>
> - id = ids[0];
> - if (!id)
> - goto out_no_driver;
> + sort(ids, nids, sizeof(*ids), ld_cmp, NULL);
>
> - ld_type = find_pnfs_driver(id);
> - if (!ld_type) {
> - request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> + for (i = 0; i < nids; i++) {
> + id = ids[i];
> ld_type = find_pnfs_driver(id);
> + if (!ld_type) {
> + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
> + id);
> + ld_type = find_pnfs_driver(id);
> + }
> + if (ld_type)
> + break;
> }
>
> if (!ld_type) {
> - dprintk("%s: No pNFS module found for %u.\n", __func__, id);
> + dprintk("%s: No pNFS module found!\n", __func__);
> goto out_no_driver;
> }
>
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 500473c87e82..a879e747f708 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
> void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
> void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
>
> -void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *);
> +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32, u32 *);
> void unset_pnfs_layoutdriver(struct nfs_server *);
> void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
> int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> @@ -656,7 +656,8 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
> }
>
> static inline void set_pnfs_layoutdriver(struct nfs_server *s,
> - const struct nfs_fh *mntfh, u32 *ids)
> + const struct nfs_fh *mntfh,
> + u32 nids, u32 *ids)
> {
> }
>
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 4eef590200c3..a7e6739ac936 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -144,6 +144,7 @@ struct nfs_fsinfo {
> __u64 maxfilesize;
> struct timespec time_delta; /* server time granularity */
> __u32 lease_time; /* in seconds */
> + __u32 nlayouttypes; /* number of layouttypes */
> __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
> __u32 blksize; /* preferred pnfs io block size */
> __u32 clone_blksize; /* granularity of a CLONE operation */
>


2016-08-10 19:13:30

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v3 3/3][resend] pnfs: add a new mechanism to select a layout driver according to an ordered list

On Mon, 2016-08-08 at 16:43 -0400, Anna Schumaker wrote:
> Hi Jeff,
>
> On 07/10/2016 03:56 PM, Jeff Layton wrote:
> >
> > > > From: Jeff Layton <[email protected]>
> >
> > Currently, the layout driver selection code always chooses the first one
> > from the list. That's not really ideal however, as the server can send
> > the list of layout types in any order that it likes. It's up to the
> > client to select the best one for its needs.
> >
> > This patch adds an ordered list of preferred driver types and has the
> > selection code sort the list of avaiable layout drivers according to it.
> > Any unrecognized layout type is sorted to the end of the list.
> >
> > For now, the order of preference is hardcoded, but it should be possible
> > to make this configurable in the future.
> >
> > > > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> >  fs/nfs/client.c         |  1 +
> >  fs/nfs/nfs4proc.c       |  3 ++-
> >  fs/nfs/nfs4xdr.c        | 29 +++++++++++++++-----------
> >  fs/nfs/pnfs.c           | 54 +++++++++++++++++++++++++++++++++++++++++--------
> >  fs/nfs/pnfs.h           |  5 +++--
> >  include/linux/nfs_xdr.h |  1 +
> >  6 files changed, 70 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 067f489aab3f..2525d8f770d2 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -787,6 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
> > > >   }
> >  
> > > >   fsinfo.fattr = fattr;
> > > > + fsinfo.nlayouttypes = 0;
> > > >   memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
> > > >   error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
> > > >   if (error < 0)
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index de97567795a5..a1d6d4bd0d50 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -4252,7 +4252,8 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
> > > >   if (error == 0) {
> > > >   /* block layout checks this! */
> > > >   server->pnfs_blksize = fsinfo->blksize;
> > > > - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
> > > > + set_pnfs_layoutdriver(server, fhandle, fsinfo->nlayouttypes,
> > > > + fsinfo->layouttype);
>
> I think it might be a little cleaner to pass the fsinfo structure here instead of adding more variables to the function.
>
> >
> > > >   }
> >  
> > > >   return error;
> > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > index b2c698499ad9..4eee3a32e070 100644
> > --- a/fs/nfs/nfs4xdr.c
> > +++ b/fs/nfs/nfs4xdr.c
> > @@ -4723,28 +4723,32 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
> >   * Decode potentially multiple layout types.
> >   */
> >  static int decode_pnfs_layout_types(struct xdr_stream *xdr,
> > > > -  uint32_t *layouttype)
> > > > + uint32_t *nlayouttypes, uint32_t *layouttype)
>
> Same thing here with decoding.
>

Ok, no problem. I'll send an updated patchset soon.

> >
> >  {
> > > >   __be32 *p;
> > > > - uint32_t num, i;
> > > > + uint32_t i;
> >  
> > > >   p = xdr_inline_decode(xdr, 4);
> > > >   if (unlikely(!p))
> > > >   goto out_overflow;
> > > > - num = be32_to_cpup(p);
> > > > + *nlayouttypes = be32_to_cpup(p);
> >  
> > > >   /* pNFS is not supported by the underlying file system */
> > > > - if (num == 0) {
> > > > + if (*nlayouttypes == 0)
> > > >   return 0;
> > > > - }
> > > > - if (num > NFS_MAX_LAYOUT_TYPES)
> > > > - printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num);
> >  
> > > >   /* Decode and set first layout type, move xdr->p past unused types */
> > > > - p = xdr_inline_decode(xdr, num * 4);
> > > > + p = xdr_inline_decode(xdr, *nlayouttypes * 4);
> > > >   if (unlikely(!p))
> > > >   goto out_overflow;
> > > > - for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
> > +
> > > > + /* If we get too many, then just cap it at the max */
> > > > + if (*nlayouttypes > NFS_MAX_LAYOUT_TYPES) {
> > > > + printk(KERN_INFO "NFS: %s: Warning: Too many (%u) pNFS layout types\n", __func__, *nlayouttypes);
> > > > + *nlayouttypes = NFS_MAX_LAYOUT_TYPES;
> > > > + }
> > +
> > > > + for(i = 0; i < *nlayouttypes; ++i)
> > > >   layouttype[i] = be32_to_cpup(p++);
> > > >   return 0;
> >  out_overflow:
> > @@ -4757,7 +4761,7 @@ out_overflow:
> >   * Note we must ensure that layouttype is set in any non-error case.
> >   */
> >  static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> > > > - uint32_t *layouttype)
> > > > + uint32_t *nlayouttypes, uint32_t *layouttype)
> >  {
> > > >   int status = 0;
> >  
> > @@ -4765,7 +4769,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
> > > >   if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
> > > >   return -EIO;
> > > >   if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
> > > > - status = decode_pnfs_layout_types(xdr, layouttype);
> > > > + status = decode_pnfs_layout_types(xdr, nlayouttypes, layouttype);
> > > >   bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
> > > >   }
> > > >   return status;
> > @@ -4848,7 +4852,8 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
> > > >   status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
> > > >   if (status != 0)
> > > >   goto xdr_error;
> > > > - status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype);
> > > > + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->nlayouttypes,
> > > > + fsinfo->layouttype);
> > > >   if (status != 0)
> > > >   goto xdr_error;
> >  
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 2fc1b9354345..11e56225e6d2 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -30,6 +30,7 @@
> >  #include <linux/nfs_fs.h>
> >  #include <linux/nfs_page.h>
> >  #include <linux/module.h>
> > +#include <linux/sort.h>
> >  #include "internal.h"
> >  #include "pnfs.h"
> >  #include "iostat.h"
> > @@ -99,6 +100,38 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >  }
> >  
> >  /*
> > + * When the server sends a list of layout types, we choose one in the order
> > + * given in the list below.
> > + *
> > + * FIXME: should this list be configurable via module_param or mount option?
> > + */
> > +static const u32 ld_prefs[] = {
> > > > + LAYOUT_SCSI,
> > > > + LAYOUT_BLOCK_VOLUME,
> > > > + LAYOUT_OSD2_OBJECTS,
> > > > + LAYOUT_FLEX_FILES,
> > > > + LAYOUT_NFSV4_1_FILES,
> > > > + 0
> > +};
> > +
> > +static int
> > +ld_cmp(const void *e1, const void *e2)
> > +{
> > > > + u32 ld1 = *((u32 *)e1);
> > > > + u32 ld2 = *((u32 *)e2);
>
> Looks like this conversion is used pretty frequently.  I wish there was a "POINTER_TO_UINT" macro to make it clearer what's going on.
>
> Thanks,
> Anna
>

Yeah that is worth considering. I don't think we should add that in the
context of this patch though.

> >
> > > > + int i;
> > +
> > > > + for (i = 0; ld_prefs[i] != 0; i++) {
> > > > + if (ld1 == ld_prefs[i])
> > > > + return -1;
> > +
> > > > + if (ld2 == ld_prefs[i])
> > > > + return 1;
> > > > + }
> > > > + return 0;
> > +}
> > +
> > +/*
> >   * Try to set the server's pnfs module to the pnfs layout type specified by id.
> >   * Currently only one pNFS layout driver per filesystem is supported.
> >   *
> > @@ -106,10 +139,11 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
> >   */
> >  void
> >  set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> > > > -       u32 *ids)
> > > > +       u32 nids, u32 *ids)
> >  {
> > > >   struct pnfs_layoutdriver_type *ld_type = NULL;
> > > >   u32 id;
> > > > + int i;
> >  
> > > >   if (!(server->nfs_client->cl_exchange_flags &
> > > >    (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
> > @@ -118,18 +152,22 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
> > > >   goto out_no_driver;
> > > >   }
> >  
> > > > - id = ids[0];
> > > > - if (!id)
> > > > - goto out_no_driver;
> > > > + sort(ids, nids, sizeof(*ids), ld_cmp, NULL);
> >  
> > > > - ld_type = find_pnfs_driver(id);
> > > > - if (!ld_type) {
> > > > - request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
> > > > + for (i = 0; i < nids; i++) {
> > > > + id = ids[i];
> > > >   ld_type = find_pnfs_driver(id);
> > > > + if (!ld_type) {
> > > > + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
> > > > + id);
> > > > + ld_type = find_pnfs_driver(id);
> > > > + }
> > > > + if (ld_type)
> > > > + break;
> > > >   }
> >  
> > > >   if (!ld_type) {
> > > > - dprintk("%s: No pNFS module found for %u.\n", __func__, id);
> > > > + dprintk("%s: No pNFS module found!\n", __func__);
> > > >   goto out_no_driver;
> > > >   }
> >  
> > diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> > index 500473c87e82..a879e747f708 100644
> > --- a/fs/nfs/pnfs.h
> > +++ b/fs/nfs/pnfs.h
> > @@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
> >  void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
> >  void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
> >  
> > -void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *);
> > +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32, u32 *);
> >  void unset_pnfs_layoutdriver(struct nfs_server *);
> >  void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
> >  int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> > @@ -656,7 +656,8 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
> >  }
> >  
> >  static inline void set_pnfs_layoutdriver(struct nfs_server *s,
> > > > -  const struct nfs_fh *mntfh, u32 *ids)
> > > > +  const struct nfs_fh *mntfh,
> > > > +  u32 nids, u32 *ids)
> >  {
> >  }
> >  
> > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> > index 4eef590200c3..a7e6739ac936 100644
> > --- a/include/linux/nfs_xdr.h
> > +++ b/include/linux/nfs_xdr.h
> > @@ -144,6 +144,7 @@ struct nfs_fsinfo {
> > > > > >   __u64 maxfilesize;
> > > > > >   struct timespec time_delta; /* server time granularity */
> > > > > >   __u32 lease_time; /* in seconds */
> > > > > > + __u32 nlayouttypes; /* number of layouttypes */
> > > > > >   __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
> > > > > >   __u32 blksize; /* preferred pnfs io block size */
> > > > > >   __u32 clone_blksize; /* granularity of a CLONE operation */
> >
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Jeff Layton <[email protected]>

2016-08-10 19:26:38

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v3 3/3][resend] pnfs: add a new mechanism to select a layout driver according to an ordered list

On 08/10/2016 03:13 PM, Jeff Layton wrote:
> On Mon, 2016-08-08 at 16:43 -0400, Anna Schumaker wrote:
>> Hi Jeff,
>>
>> On 07/10/2016 03:56 PM, Jeff Layton wrote:
>>>
>>>>> From: Jeff Layton <[email protected]>
>>>
>>> Currently, the layout driver selection code always chooses the first one
>>> from the list. That's not really ideal however, as the server can send
>>> the list of layout types in any order that it likes. It's up to the
>>> client to select the best one for its needs.
>>>
>>> This patch adds an ordered list of preferred driver types and has the
>>> selection code sort the list of avaiable layout drivers according to it.
>>> Any unrecognized layout type is sorted to the end of the list.
>>>
>>> For now, the order of preference is hardcoded, but it should be possible
>>> to make this configurable in the future.
>>>
>>>>> Signed-off-by: Jeff Layton <[email protected]>
>>> ---
>>> fs/nfs/client.c | 1 +
>>> fs/nfs/nfs4proc.c | 3 ++-
>>> fs/nfs/nfs4xdr.c | 29 +++++++++++++++-----------
>>> fs/nfs/pnfs.c | 54 +++++++++++++++++++++++++++++++++++++++++--------
>>> fs/nfs/pnfs.h | 5 +++--
>>> include/linux/nfs_xdr.h | 1 +
>>> 6 files changed, 70 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
>>> index 067f489aab3f..2525d8f770d2 100644
>>> --- a/fs/nfs/client.c
>>> +++ b/fs/nfs/client.c
>>> @@ -787,6 +787,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
>>>>> }
>>>
>>>>> fsinfo.fattr = fattr;
>>>>> + fsinfo.nlayouttypes = 0;
>>>>> memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
>>>>> error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
>>>>> if (error < 0)
>>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>>> index de97567795a5..a1d6d4bd0d50 100644
>>> --- a/fs/nfs/nfs4proc.c
>>> +++ b/fs/nfs/nfs4proc.c
>>> @@ -4252,7 +4252,8 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
>>>>> if (error == 0) {
>>>>> /* block layout checks this! */
>>>>> server->pnfs_blksize = fsinfo->blksize;
>>>>> - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
>>>>> + set_pnfs_layoutdriver(server, fhandle, fsinfo->nlayouttypes,
>>>>> + fsinfo->layouttype);
>>
>> I think it might be a little cleaner to pass the fsinfo structure here instead of adding more variables to the function.
>>
>>>
>>>>> }
>>>
>>>>> return error;
>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>> index b2c698499ad9..4eee3a32e070 100644
>>> --- a/fs/nfs/nfs4xdr.c
>>> +++ b/fs/nfs/nfs4xdr.c
>>> @@ -4723,28 +4723,32 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
>>> * Decode potentially multiple layout types.
>>> */
>>> static int decode_pnfs_layout_types(struct xdr_stream *xdr,
>>>>> - uint32_t *layouttype)
>>>>> + uint32_t *nlayouttypes, uint32_t *layouttype)
>>
>> Same thing here with decoding.
>>
>
> Ok, no problem. I'll send an updated patchset soon.

Thanks!

>
>>>
>>> {
>>>>> __be32 *p;
>>>>> - uint32_t num, i;
>>>>> + uint32_t i;
>>>
>>>>> p = xdr_inline_decode(xdr, 4);
>>>>> if (unlikely(!p))
>>>>> goto out_overflow;
>>>>> - num = be32_to_cpup(p);
>>>>> + *nlayouttypes = be32_to_cpup(p);
>>>
>>>>> /* pNFS is not supported by the underlying file system */
>>>>> - if (num == 0) {
>>>>> + if (*nlayouttypes == 0)
>>>>> return 0;
>>>>> - }
>>>>> - if (num > NFS_MAX_LAYOUT_TYPES)
>>>>> - printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num);
>>>
>>>>> /* Decode and set first layout type, move xdr->p past unused types */
>>>>> - p = xdr_inline_decode(xdr, num * 4);
>>>>> + p = xdr_inline_decode(xdr, *nlayouttypes * 4);
>>>>> if (unlikely(!p))
>>>>> goto out_overflow;
>>>>> - for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
>>> +
>>>>> + /* If we get too many, then just cap it at the max */
>>>>> + if (*nlayouttypes > NFS_MAX_LAYOUT_TYPES) {
>>>>> + printk(KERN_INFO "NFS: %s: Warning: Too many (%u) pNFS layout types\n", __func__, *nlayouttypes);
>>>>> + *nlayouttypes = NFS_MAX_LAYOUT_TYPES;
>>>>> + }
>>> +
>>>>> + for(i = 0; i < *nlayouttypes; ++i)
>>>>> layouttype[i] = be32_to_cpup(p++);
>>>>> return 0;
>>> out_overflow:
>>> @@ -4757,7 +4761,7 @@ out_overflow:
>>> * Note we must ensure that layouttype is set in any non-error case.
>>> */
>>> static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
>>>>> - uint32_t *layouttype)
>>>>> + uint32_t *nlayouttypes, uint32_t *layouttype)
>>> {
>>>>> int status = 0;
>>>
>>> @@ -4765,7 +4769,7 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
>>>>> if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
>>>>> return -EIO;
>>>>> if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
>>>>> - status = decode_pnfs_layout_types(xdr, layouttype);
>>>>> + status = decode_pnfs_layout_types(xdr, nlayouttypes, layouttype);
>>>>> bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
>>>>> }
>>>>> return status;
>>> @@ -4848,7 +4852,8 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
>>>>> status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
>>>>> if (status != 0)
>>>>> goto xdr_error;
>>>>> - status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype);
>>>>> + status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->nlayouttypes,
>>>>> + fsinfo->layouttype);
>>>>> if (status != 0)
>>>>> goto xdr_error;
>>>
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index 2fc1b9354345..11e56225e6d2 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -30,6 +30,7 @@
>>> #include <linux/nfs_fs.h>
>>> #include <linux/nfs_page.h>
>>> #include <linux/module.h>
>>> +#include <linux/sort.h>
>>> #include "internal.h"
>>> #include "pnfs.h"
>>> #include "iostat.h"
>>> @@ -99,6 +100,38 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
>>> }
>>>
>>> /*
>>> + * When the server sends a list of layout types, we choose one in the order
>>> + * given in the list below.
>>> + *
>>> + * FIXME: should this list be configurable via module_param or mount option?
>>> + */
>>> +static const u32 ld_prefs[] = {
>>>>> + LAYOUT_SCSI,
>>>>> + LAYOUT_BLOCK_VOLUME,
>>>>> + LAYOUT_OSD2_OBJECTS,
>>>>> + LAYOUT_FLEX_FILES,
>>>>> + LAYOUT_NFSV4_1_FILES,
>>>>> + 0
>>> +};
>>> +
>>> +static int
>>> +ld_cmp(const void *e1, const void *e2)
>>> +{
>>>>> + u32 ld1 = *((u32 *)e1);
>>>>> + u32 ld2 = *((u32 *)e2);
>>
>> Looks like this conversion is used pretty frequently. I wish there was a "POINTER_TO_UINT" macro to make it clearer what's going on.
>>
>> Thanks,
>> Anna
>>
>
> Yeah that is worth considering. I don't think we should add that in the
> context of this patch though.

Agreed, since it would be more of a kernel wide change. I was just thinking out loud after seeing what glib/gtk do :) https://developer.gnome.org/glib/stable/glib-Type-Conversion-Macros.html

Anna

>
>>>
>>>>> + int i;
>>> +
>>>>> + for (i = 0; ld_prefs[i] != 0; i++) {
>>>>> + if (ld1 == ld_prefs[i])
>>>>> + return -1;
>>> +
>>>>> + if (ld2 == ld_prefs[i])
>>>>> + return 1;
>>>>> + }
>>>>> + return 0;
>>> +}
>>> +
>>> +/*
>>> * Try to set the server's pnfs module to the pnfs layout type specified by id.
>>> * Currently only one pNFS layout driver per filesystem is supported.
>>> *
>>> @@ -106,10 +139,11 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
>>> */
>>> void
>>> set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
>>>>> - u32 *ids)
>>>>> + u32 nids, u32 *ids)
>>> {
>>>>> struct pnfs_layoutdriver_type *ld_type = NULL;
>>>>> u32 id;
>>>>> + int i;
>>>
>>>>> if (!(server->nfs_client->cl_exchange_flags &
>>>>> (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
>>> @@ -118,18 +152,22 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
>>>>> goto out_no_driver;
>>>>> }
>>>
>>>>> - id = ids[0];
>>>>> - if (!id)
>>>>> - goto out_no_driver;
>>>>> + sort(ids, nids, sizeof(*ids), ld_cmp, NULL);
>>>
>>>>> - ld_type = find_pnfs_driver(id);
>>>>> - if (!ld_type) {
>>>>> - request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
>>>>> + for (i = 0; i < nids; i++) {
>>>>> + id = ids[i];
>>>>> ld_type = find_pnfs_driver(id);
>>>>> + if (!ld_type) {
>>>>> + request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX,
>>>>> + id);
>>>>> + ld_type = find_pnfs_driver(id);
>>>>> + }
>>>>> + if (ld_type)
>>>>> + break;
>>>>> }
>>>
>>>>> if (!ld_type) {
>>>>> - dprintk("%s: No pNFS module found for %u.\n", __func__, id);
>>>>> + dprintk("%s: No pNFS module found!\n", __func__);
>>>>> goto out_no_driver;
>>>>> }
>>>
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index 500473c87e82..a879e747f708 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -236,7 +236,7 @@ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
>>> void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
>>> void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
>>>
>>> -void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *);
>>> +void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32, u32 *);
>>> void unset_pnfs_layoutdriver(struct nfs_server *);
>>> void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
>>> int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
>>> @@ -656,7 +656,8 @@ pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
>>> }
>>>
>>> static inline void set_pnfs_layoutdriver(struct nfs_server *s,
>>>>> - const struct nfs_fh *mntfh, u32 *ids)
>>>>> + const struct nfs_fh *mntfh,
>>>>> + u32 nids, u32 *ids)
>>> {
>>> }
>>>
>>> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
>>> index 4eef590200c3..a7e6739ac936 100644
>>> --- a/include/linux/nfs_xdr.h
>>> +++ b/include/linux/nfs_xdr.h
>>> @@ -144,6 +144,7 @@ struct nfs_fsinfo {
>>>>>>> __u64 maxfilesize;
>>>>>>> struct timespec time_delta; /* server time granularity */
>>>>>>> __u32 lease_time; /* in seconds */
>>>>>>> + __u32 nlayouttypes; /* number of layouttypes */
>>>>>>> __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
>>>>>>> __u32 blksize; /* preferred pnfs io block size */
>>>>>>> __u32 clone_blksize; /* granularity of a CLONE operation */
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>