2009-12-07 09:25:46

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 0/35] Initial pnfsd file layout support

Bruce,

The following patches implement initial pnfsd server support for
the files layout and the dlm-based file systems, including GETDEVICELIST,
GETDEVICEINFO, and LAYOUTGET.
LAYOUTCOMMIT and LAYOUTRETURN generic implementation provides the
complete implementation that was tested by Andy in the Austin Fall
2010 Bakeathon.

The patchset is based onto your nfsd-next branch at
d1ecbbf Merge branch 'for-2.6.33-incoming' into HEAD
post Boaz' headers cleanup patchset.

v2 includes the fixes posted here:
http://linux-nfs.org/pipermail/pnfs/2009-December/009607.html
as well as some cosmetic cleanups and cleanup of CONFIG_PNFSD usage
on the lines Christoph suggested aimed at minimizing its use
See diff -w at the bottom of this email for details

[PATCH v2 01/35] pnfsd: Define CONFIG_PNFSD
[PATCH v2 02/35] pnfsd: define NFSDDBG_PNFS
[PATCH v2 03/35] pnfsd, pnfs: protocol level pnfs constants
[PATCH v2 04/35] pnfsd: return pnfs flags on exchange_id
[PATCH v2 05/35] pnfsd: don't set up back channel on create_session for ds
[PATCH v2 06/35] pnfsd: introduce pnfsd header files
[PATCH v2 07/35] pnfsd: define pnfs_export_operations
[PATCH v2 08/35] pnfsd: add pnfs export option
[PATCH v2 09/35] pnfsd: layout verify
[PATCH v2 10/35] pnfsd: introduce exp_xdr.h
[PATCH v2 11/35] pnfsd: get device list/info
[PATCH v2 12/35] pnfsd: filelayout: get device list/info
[PATCH v2 13/35] pnfsd: layout get
[PATCH v2 14/35] pnfsd: filelayout: layout encoding
[PATCH v2 15/35] pnfsd: Helper functions for layout stateid processing.
[PATCH v2 16/35] pnfsd: helper function for stateid checking
[PATCH v2 17/35] pnfsd: process the layout stateid
[PATCH v2 18/35] pnfsd: add helper functions for identifying DS stateids.
[PATCH v2 19/35] pnfsd: accept all ds stateids
[PATCH v2 20/35] pnfsd: LAYOUTGET layout stateid processing
[PATCH v2 21/35] pnfsd: destroy layout on expire_client
[PATCH v2 22/35] pnfsd: support layout_type attribute
[PATCH v2 23/35] pnfsd: per block device dlm data server list cache
[PATCH v2 24/35] pnfsd: new nfsd filesystem file: pnfs_dlm_device
[PATCH v2 25/35] pnfsd: nfsd4_pnfs_dlm_getdeviter
[PATCH v2 26/35] pnfsd: nfsd4_pnfs_dlm_getdevinfo
[PATCH v2 27/35] pnfsd: nfsd4_pnfs_dlm_layoutget
[PATCH v2 28/35] pnfsd: add dlm file layout layout-type
[PATCH v2 29/35] pnfsd: dlm pnfs_export_operations
[PATCH v2 30/35] pnfsd: gfs2: use generic file layout pnfs operations vector
[PATCH v2 31/35] posix_acl: resolve compile dependency in posix_acl.h
[PATCH v2 32/35] nfs: resolve compile dependency in nfs_xdr.h
[PATCH v2 33/35] pnfsd: layout commit
[PATCH v2 34/35] pnfsd: layout return
[PATCH v2 35/35] pnfsd: layoutreturn stateid processing

diff from v1 + posted fixes:

git diff --stat -p -M -w origin/pnfsd-files pnfsd-files
fs/exportfs/Makefile | 4 ++--
fs/exportfs/nfs4filelayoutxdr.c | 6 ------
fs/gfs2/export.c | 1 +
fs/nfsd/nfs4pnfsd.c | 17 +----------------
fs/nfsd/nfs4pnfsdlm.c | 4 ++--
fs/nfsd/nfs4state.c | 17 +++++++++++++----
fs/nfsd/nfsfh.c | 6 +-----
fs/nfsd/pnfsd.h | 4 ----
include/linux/nfs4.h | 2 --
include/linux/nfsd/nfsd.h | 8 --------
include/linux/nfsd/nfsd4_pnfs.h | 11 +++++++++++
include/linux/nfsd/state.h | 2 +-
include/linux/nfsd/xdr4.h | 8 --------
13 files changed, 32 insertions(+), 58 deletions(-)

diff --git a/fs/exportfs/Makefile b/fs/exportfs/Makefile
index f820d80..658207d 100644
--- a/fs/exportfs/Makefile
+++ b/fs/exportfs/Makefile
@@ -3,5 +3,5 @@

obj-$(CONFIG_EXPORTFS) += exportfs.o

-exportfs-objs := expfs.o
-exportfs-objs += nfs4filelayoutxdr.o
+exportfs-y := expfs.o
+exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT) += nfs4filelayoutxdr.o
diff --git a/fs/exportfs/nfs4filelayoutxdr.c b/fs/exportfs/nfs4filelayoutxdr.c
index f076908..782b673 100644
--- a/fs/exportfs/nfs4filelayoutxdr.c
+++ b/fs/exportfs/nfs4filelayoutxdr.c
@@ -1,6 +1,4 @@
/*
-* linux/fs/nfsd/nfs4filelayout_xdr.c
-*
* Copyright (c) 2006 The Regents of the University of Michigan.
* All rights reserved.
*
@@ -33,8 +31,6 @@
*
*
*/
-#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
-
#include <linux/module.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
@@ -226,5 +222,3 @@ out:
return error;
}
EXPORT_SYMBOL(filelayout_encode_layout);
-
-#endif /* CONFIG_EXPORTFS_FILE_LAYOUT */
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9cea712..d15876e 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -254,3 +254,4 @@ const struct export_operations gfs2_export_ops = {
.get_name = gfs2_get_name,
.get_parent = gfs2_get_parent,
};
+
diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index 4849463..aa7abad 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -21,8 +21,6 @@
*
*****************************************************************************/

-#if defined(CONFIG_PNFSD)
-
#include <linux/param.h>
#include <linux/slab.h>
#include <linux/sunrpc/svc.h>
@@ -66,18 +64,6 @@ nfsd4_init_pnfs_slabs(void)
return 0;
}

-static struct nfs4_file *
-find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
-{
- struct nfs4_file *fp;
-
- fp = find_file(ino);
- if (fp)
- return fp;
-
- return alloc_init_file(ino, current_fh);
-}
-
static struct nfs4_layout_state *
alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
stateid_t *stateid)
@@ -717,8 +703,8 @@ int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
if (!clp)
goto out;

- fp = find_file(ino);
if (lrp->args.lr_return_type == RETURN_FILE) {
+ fp = find_file(ino);
if (!fp) {
printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
"ino %p:%lu\n",
@@ -778,4 +764,3 @@ void pnfs_expire_client(struct nfs4_client *clp)
}
spin_unlock(&layout_lock);
}
-#endif /* CONFIG_PNFSD */
diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
index b3027fe..ed2e940 100644
--- a/fs/nfsd/nfs4pnfsdlm.c
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -40,7 +40,7 @@ struct dlm_device_entry {
struct list_head dlm_dev_list;
char disk_name[DISK_NAME_LEN];
int num_ds;
- char ds_list[NFSD_PNFS_DS_LIST_MAX];
+ char ds_list[NFSD_DLM_DS_LIST_MAX];
};

static struct dlm_device_entry *
@@ -108,7 +108,7 @@ nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
/* data server list */
/* FIXME: need to check for comma separated valid ip format */
len = strcspn(bufp, ":");
- if (len > NFSD_PNFS_DS_LIST_MAX)
+ if (len > NFSD_DLM_DS_LIST_MAX)
goto out_free;
memcpy(new->ds_list, bufp, len);

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d99caae..bc359ea 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1726,7 +1726,7 @@ out:
}

/* OPEN Share state helper functions */
-inline struct nfs4_file *
+static inline struct nfs4_file *
alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
{
struct nfs4_file *fp;
@@ -1941,6 +1941,18 @@ find_file(struct inode *ino)
return NULL;
}

+struct nfs4_file *
+find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
+{
+ struct nfs4_file *fp;
+
+ fp = find_file(ino);
+ if (fp)
+ return fp;
+
+ return alloc_init_file(ino, current_fh);
+}
+
static inline int access_valid(u32 x, u32 minorversion)
{
if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
@@ -2863,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
if (grace_disallows_io(ino))
return nfserr_grace;

-#if defined(CONFIG_PNFSD)
if (pnfs_fh_is_ds(&current_fh->fh_handle))
return 0;
-#endif /* CONFIG_PNFSD */

if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;
@@ -4183,4 +4193,3 @@ nfs4_reset_lease(time_t leasetime)
{
user_lease_time = leasetime;
}
-
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 7d624f4..8300b2f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -22,12 +22,8 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/nfsd/nfsd.h>
-#include "auth.h"
-
-#if defined(CONFIG_PNFSD)
-#include <linux/nfsd/state.h>
#include <linux/nfsd/nfsd4_pnfs.h>
-#endif /* CONFIG_PNFSD */
+#include "auth.h"

#define NFSDDBG_FACILITY NFSDDBG_FH

diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 7d255f5..c3354e8 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -34,8 +34,6 @@
#ifndef LINUX_NFSD_PNFSD_H
#define LINUX_NFSD_PNFSD_H

-#if defined(CONFIG_PNFSD)
-
#include <linux/nfsd/state.h>
#include <linux/nfsd/nfsd4_pnfs.h>

@@ -64,6 +62,4 @@ int nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *)
int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
struct nfsd4_pnfs_layoutreturn *);

-#endif /* CONFIG_PNFSD */
-
#endif /* LINUX_NFSD_PNFSD_H */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 3c251f4..a899cff 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -548,7 +548,6 @@ enum state_protect_how4 {
SP4_SSV = 2
};

-#if defined(CONFIG_PNFS) || defined(CONFIG_PNFSD)
enum pnfs_layouttype {
LAYOUT_NFSV4_FILES = 1,
LAYOUT_OSD2_OBJECTS = 2,
@@ -591,7 +590,6 @@ enum filelayout_hint_care4 {
NFLH4_CARE_STRIPE_UNIT_SIZE = 0x00000040,
NFLH4_CARE_STRIPE_COUNT = 0x00000080
};
-#endif /* defined(CONFIG_PNFS) || defined(CONFIG_PNFSD) */

#endif
#endif
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 6eb5c6e..c61e220 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -181,14 +181,6 @@ static inline void nfs4_reset_lease(time_t leasetime) { }
static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
#endif

-#ifdef CONFIG_PNFSD
-/* Length of comma separated pnfs data server IPv4 addresses. Enough room for
- * 32 addresses.
- */
-#define NFSD_PNFS_DS_LIST_MAX 512
-
-#endif
-
/*
* lockd binding
*/
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index e96c2d4..dbed31a 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -169,6 +169,8 @@ struct pnfs_export_operations {
int (*can_merge_layouts) (u32 layout_type);
};

+#if defined(CONFIG_PNFSD)
+
/*
* fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
* to a DS by LAYOUTGET. nfs4_preprocess_stateid_op() uses this to decide how
@@ -196,4 +198,13 @@ static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
return fsid_type;
}

+#else /* CONFIG_PNFSD */
+
+static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PNFSD */
+
#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 8cf7e51..16ee98c 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -415,7 +415,7 @@ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
extern void nfsd4_free_slab(struct kmem_cache **);
extern struct nfs4_file *find_file(struct inode *);
-extern struct nfs4_file *alloc_init_file(struct inode *, struct svc_fh *);
+extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
extern void put_nfs4_file(struct nfs4_file *);
extern void get_nfs4_file(struct nfs4_file *);
extern struct nfs4_client *find_confirmed_client(clientid_t *);
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 8e36ac3..831151f 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -515,14 +515,6 @@ struct nfsd4_compoundres {
struct nfsd4_compound_state cstate;
};

-static inline __be32 *
-nfsd4_xdr_reserve_space(struct nfsd4_compoundres *resp, size_t nbytes)
-{
- __be32 *p = resp->p;
- BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end);
- return p;
-}
-
static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
{
struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;


2009-12-09 10:24:49

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 0/9] fixes to Initial pnfsd file layout support v2, comments {3,4,5}/35

Bruce,

The following SQUASHME patches address your review comments for:
[PATCH v2 03/35] pnfsd, pnfs: protocol level pnfs constants
[PATCH v2 04/35] pnfsd: return pnfs flags on exchange_id
[PATCH v2 05/35] pnfsd: don't set up back channel on create_session for ds

I'm also posting the respective patches to the rest of the
linux-pnfs tree:

pnfsd-files-next:
[PATCH 1/3] SQUASHME: pnfsd: unify enum pnfs_layout{return,recall}_type
[PATCH 2/3] SQUASHME: pnfsd: always set both MDS and DS exchangeid capability flags
[PATCH 3/3] SQUASHME: pnfsd: define a is_ds_only_session helper

pnfsd:
[PATCH 4/4] SQUASHME: pnfsd: use only RETURN_* constants

spnfs:
[PATCH 5/5] SQUASHME: spnfs: use only RETURN_* constants

spnfs-blocks
[PATCH 6/6] SQUASHME: spnfs-block: use only RETURN_* constants

pnfsd-exofs:
[PATCH 7/7] SQUASHME: pnfsd-exofs: use only pnfs_layoutreturn_type

pnfs:
[PATCH 8/9] SQUASHME: pnfs: use only pnfs_layoutreturn_type
[PATCH 9/9] SQUASHME: pnfs: filelayout: mask out server's MDS capability flag for DSs

2009-12-09 22:21:35

by Benny Halevy

[permalink] [raw]
Subject: Re: [pnfs] [PATCH 9/9] SQUASHME: pnfs: filelayout: mask out server's MDS capability flag for DSs

On Dec. 09, 2009, 12:28 +0200, Benny Halevy <[email protected]> wrote:
> When establishing a session with a DS mask out its MDS
> exchange_id flag so we know we use it as DS-only.
>
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> fs/nfs/nfs4filelayoutdev.c | 3 +++
> 1 files changed, 3 insertions(+), 0 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> index 128715a..cb8ae88 100644
> --- a/fs/nfs/nfs4filelayoutdev.c
> +++ b/fs/nfs/nfs4filelayoutdev.c
> @@ -272,6 +272,9 @@ nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
> if (err)
> goto out_put;
>
> + /* mask out the server's MDS capability flag */
> + clp->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_DS;
> +

Duh, that should be
clp->cl_exchange_flags &= ~EXCHGID4_FLAG_USE_PNFS_MDS;

Or I'm just utterly tired ;-)

Benny

> if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
> printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
> ds->r_addr);

2009-12-07 19:25:24

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2 03/35] pnfsd, pnfs: protocol level pnfs constants

On Mon, Dec 07, 2009 at 11:30:24AM +0200, Benny Halevy wrote:
> [extracted from pnfsd: Initial pNFS server implementation.]
> Signed-off-by: Benny Halevy <[email protected]>
> [pnfsd: update pNFS server ops to draft 13]
> Signed-off-by: Marc Eshel <[email protected]>
> Signed-off-by: Benny Halevy <[email protected]>
> [pnfs: Move getdeviceinfo to draft-19 on client] [for NOTIFY_DEVICEID4_* defs]
> Signed-off-by: Benny Halevy <[email protected]>
> [pnfs: pnfs_notify_deviceid_type4 should define bitmasks not offsets]
> The notify_deviceid_type4 values in the spec denote bit offsets, not
> bitmask values. Since we use these constants as bitmasks, just define them
> this way.
> [removed LAYOUT_PVFS2's definition]
> Signed-off-by: Benny Halevy <[email protected]>
> [compile fixes for pnfs branch]
> Signed-off-by: Fred Isaman <[email protected]>
> [removed #ifdef CONFIG_PNFS{,D}]
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> include/linux/nfs4.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 45 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index c4c0602..a899cff 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -469,6 +469,8 @@ enum lock_type4 {
> #define FATTR4_WORD1_TIME_MODIFY (1UL << 21)
> #define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22)
> #define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
> +#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
> +#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
>
> #define NFSPROC4_NULL 0
> #define NFSPROC4_COMPOUND 1
> @@ -546,6 +548,49 @@ enum state_protect_how4 {
> SP4_SSV = 2
> };
>
> +enum pnfs_layouttype {
> + LAYOUT_NFSV4_FILES = 1,
> + LAYOUT_OSD2_OBJECTS = 2,
> + LAYOUT_BLOCK_VOLUME = 3,
> +};
> +
> +/* FIXME: should recall and return types be combined? */

Is there any reason not to?

--b.

> +enum pnfs_layoutrecall_type {
> + RECALL_FILE = 1,
> + RECALL_FSID = 2,
> + RECALL_ALL = 3
> +};
> +
> +enum pnfs_layoutreturn_type {
> + RETURN_FILE = 1,
> + RETURN_FSID = 2,
> + RETURN_ALL = 3
> +};
> +
> +enum pnfs_iomode {
> + IOMODE_READ = 1,
> + IOMODE_RW = 2,
> + IOMODE_ANY = 3,
> +};
> +
> +enum pnfs_notify_deviceid_type4 {
> + NOTIFY_DEVICEID4_CHANGE = 1 << 1,
> + NOTIFY_DEVICEID4_DELETE = 1 << 2,
> +};
> +
> +#define NFL4_UFLG_MASK 0x0000003F
> +#define NFL4_UFLG_DENSE 0x00000001
> +#define NFL4_UFLG_COMMIT_THRU_MDS 0x00000002
> +#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK 0xFFFFFFC0
> +
> +/* Encoded in the loh_body field of type layouthint4 */
> +enum filelayout_hint_care4 {
> + NFLH4_CARE_DENSE = NFL4_UFLG_DENSE,
> + NFLH4_CARE_COMMIT_THRU_MDS = NFL4_UFLG_COMMIT_THRU_MDS,
> + NFLH4_CARE_STRIPE_UNIT_SIZE = 0x00000040,
> + NFLH4_CARE_STRIPE_COUNT = 0x00000080
> +};
> +
> #endif
> #endif
>
> --
> 1.6.5.1
>

2009-12-07 19:51:32

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2 04/35] pnfsd: return pnfs flags on exchange_id

On Mon, Dec 07, 2009 at 11:30:37AM +0200, Benny Halevy wrote:
> Set the cl_exchange_flags to be non_pnfs if we do not set
> either pnfs or ds (in the plain old nfs41 case).
>
> pnfsd: set EXCHGID4_FLAG_USE_NON_PNFS when !CONFIG_PNFSD:
> EXCHGID4_FLAG_USE_NON_PNFS should be set when the server does not support
> operations (e.g. LAYOUTGET) or attributes that pertain to pNFS.
>
> [extraced from pnfsd: Initial pNFS server implementation.]
> Signed-off-by: Benny Halevy <[email protected]>
> [pnfsd: Fixup nfsd4_set_ex_flags.]
> Signed-off-by: Dean Hildebrand <[email protected]>
> [pnfsd: set EXCHGID4_FLAG_USE_NON_PNFS when !CONFIG_PNFSD]
> [pnfsd: fix compiler warning in nfsd4_set_ex_flags when CONFIG_PNFSD is not defined]
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 15 +++++++++++++++
> 1 files changed, 15 insertions(+), 0 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 2923e6c..9ecbc25 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1098,8 +1098,23 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
> static void
> nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
> {
> +#if defined(CONFIG_PNFSD)
> + int mds_and_ds = EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS;
> + int mds_or_ds = 0;

initialization isn't used.

> +
> + /* Save the client's MDS or DS flags, or set them both.
> + * XXX We currently do not have a method of determining
> + * what a server supports prior to receiving a filehandle
> + * e.g. at exchange id time. */
> + mds_or_ds = clid->flags & mds_and_ds;
> + if (mds_or_ds)
> + new->cl_exchange_flags |= mds_or_ds;
> + else
> + new->cl_exchange_flags |= mds_and_ds;

Why do we need to do this? If the fact is that we're going to accept
either DS or MDS traffic over this session, let's just always set both
flags. That doesn't stop the client from dedicating sessions to one or
the other use if that's what it wants.

> +#else /* CONFIG_PNFSD */
> /* pNFS is not supported */
> new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
> +#endif /* CONFIG_PNFSD */

The ifdef should be easy enough to hide in the usual way, with a
conditionally defined

new->cl_exchange_flags |= get_pnfs_flags(clid->flags)

(or something to that effect).

--b.

>
> /* Referrals are supported, Migration is not. */
> new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
> --
> 1.6.5.1
>

2009-12-07 20:10:38

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2 05/35] pnfsd: don't set up back channel on create_session for ds

On Mon, Dec 07, 2009 at 11:30:51AM +0200, Benny Halevy wrote:
> From: Dean Hildebrand <[email protected]>
>
> [was pnfsd: Add use of pnfs exchange flags]
> Should this code be surrounded by CONFIG_PNFSD?

Might be nice, but I suppose it's not important.

>
> Signed-off-by: Dean Hildebrand <[email protected]>
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 4 ++++
> 1 files changed, 4 insertions(+), 0 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 9ecbc25..dc9d553 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1349,6 +1349,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
> cr_ses->flags &= ~SESSION4_PERSIST;
> cr_ses->flags &= ~SESSION4_RDMA;
>
> + if (!(unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
> + (unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS))
> + cr_ses->flags &= ~SESSION4_BACK_CHAN;
> +

Might be easier to read as:

if (is_ds_only_session(unconf))
cr_ses->flags &= ~SESSION4_BACK_CHAN;

with is_ds_only_session() defined in the obvious way.

--b.

> if (cr_ses->flags & SESSION4_BACK_CHAN) {
> unconf->cl_cb_xprt = rqstp->rq_xprt;
> svc_xprt_get(unconf->cl_cb_xprt);
> --
> 1.6.5.1
>

2009-12-08 14:31:45

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH v2 03/35] pnfsd, pnfs: protocol level pnfs constants

On Dec. 07, 2009, 21:25 +0200, " J. Bruce Fields" <[email protected]> wrote:
> On Mon, Dec 07, 2009 at 11:30:24AM +0200, Benny Halevy wrote:

<snip>

>> +
>> +/* FIXME: should recall and return types be combined? */
>
> Is there any reason not to?

Not really. In fact on the client side we use almost exclusively
the RECALL_* constants, whether they come from the layoutrecall path
or from a voluntary return path.

Benny

>
> --b.
>
>> +enum pnfs_layoutrecall_type {
>> + RECALL_FILE = 1,
>> + RECALL_FSID = 2,
>> + RECALL_ALL = 3
>> +};
>> +
>> +enum pnfs_layoutreturn_type {
>> + RETURN_FILE = 1,
>> + RETURN_FSID = 2,
>> + RETURN_ALL = 3
>> +};
>> +

2009-12-08 14:46:27

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH v2 04/35] pnfsd: return pnfs flags on exchange_id

On Dec. 07, 2009, 21:51 +0200, " J. Bruce Fields" <[email protected]> wrote:
> On Mon, Dec 07, 2009 at 11:30:37AM +0200, Benny Halevy wrote:
>> Set the cl_exchange_flags to be non_pnfs if we do not set
>> either pnfs or ds (in the plain old nfs41 case).
>>
>> pnfsd: set EXCHGID4_FLAG_USE_NON_PNFS when !CONFIG_PNFSD:
>> EXCHGID4_FLAG_USE_NON_PNFS should be set when the server does not support
>> operations (e.g. LAYOUTGET) or attributes that pertain to pNFS.
>>
>> [extraced from pnfsd: Initial pNFS server implementation.]
>> Signed-off-by: Benny Halevy <[email protected]>
>> [pnfsd: Fixup nfsd4_set_ex_flags.]
>> Signed-off-by: Dean Hildebrand <[email protected]>
>> [pnfsd: set EXCHGID4_FLAG_USE_NON_PNFS when !CONFIG_PNFSD]
>> [pnfsd: fix compiler warning in nfsd4_set_ex_flags when CONFIG_PNFSD is not defined]
>> Signed-off-by: Benny Halevy <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 15 +++++++++++++++
>> 1 files changed, 15 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 2923e6c..9ecbc25 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1098,8 +1098,23 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
>> static void
>> nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
>> {
>> +#if defined(CONFIG_PNFSD)
>> + int mds_and_ds = EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS;
>> + int mds_or_ds = 0;
>
> initialization isn't used.
>
>> +
>> + /* Save the client's MDS or DS flags, or set them both.
>> + * XXX We currently do not have a method of determining
>> + * what a server supports prior to receiving a filehandle
>> + * e.g. at exchange id time. */
>> + mds_or_ds = clid->flags & mds_and_ds;
>> + if (mds_or_ds)
>> + new->cl_exchange_flags |= mds_or_ds;
>> + else
>> + new->cl_exchange_flags |= mds_and_ds;
>
> Why do we need to do this? If the fact is that we're going to accept
> either DS or MDS traffic over this session, let's just always set both
> flags. That doesn't stop the client from dedicating sessions to one or
> the other use if that's what it wants.

The original motivation come from the client side who treats DSs differently
than MDSs with regards to renewing the lease.
If we return both flags set unconditionally, then the client will need
to remember internally what it uses the server for - an MDS or a DS.

Something along these lines (needs testing):

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 128715a..cb8ae88 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -272,6 +272,9 @@ nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
if (err)
goto out_put;

+ /* mask out the server's MDS capability flag */
+ clp->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_DS;
+
if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
ds->r_addr);


Benny

>
>> +#else /* CONFIG_PNFSD */
>> /* pNFS is not supported */
>> new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
>> +#endif /* CONFIG_PNFSD */
>
> The ifdef should be easy enough to hide in the usual way, with a
> conditionally defined
>
> new->cl_exchange_flags |= get_pnfs_flags(clid->flags)
>
> (or something to that effect).
>
> --b.
>
>>
>> /* Referrals are supported, Migration is not. */
>> new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
>> --
>> 1.6.5.1
>>

2009-12-08 14:50:46

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH v2 05/35] pnfsd: don't set up back channel on create_session for ds

On Dec. 07, 2009, 22:10 +0200, " J. Bruce Fields" <[email protected]> wrote:
> On Mon, Dec 07, 2009 at 11:30:51AM +0200, Benny Halevy wrote:
>> From: Dean Hildebrand <[email protected]>
>>
>> [was pnfsd: Add use of pnfs exchange flags]
>> Should this code be surrounded by CONFIG_PNFSD?
>
> Might be nice, but I suppose it's not important.
>
>> Signed-off-by: Dean Hildebrand <[email protected]>
>> Signed-off-by: Benny Halevy <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 4 ++++
>> 1 files changed, 4 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 9ecbc25..dc9d553 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1349,6 +1349,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
>> cr_ses->flags &= ~SESSION4_PERSIST;
>> cr_ses->flags &= ~SESSION4_RDMA;
>>
>> + if (!(unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
>> + (unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS))
>> + cr_ses->flags &= ~SESSION4_BACK_CHAN;
>> +
>
> Might be easier to read as:
>
> if (is_ds_only_session(unconf))
> cr_ses->flags &= ~SESSION4_BACK_CHAN;
>
> with is_ds_only_session() defined in the obvious way.

Yeah. Actually we have exchgid_is_ds_only() defined for the client
as follows:

static inline bool exchgid_is_ds_only(struct nfs_client *clp)
{
u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;

return (clp->cl_exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
}

we can define it as common code working on the exchange flags
rather than on the high level client structure...

Benny

>
> --b.
>
>> if (cr_ses->flags & SESSION4_BACK_CHAN) {
>> unconf->cl_cb_xprt = rqstp->rq_xprt;
>> svc_xprt_get(unconf->cl_cb_xprt);
>> --
>> 1.6.5.1
>>

2009-12-09 10:27:03

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 1/3] SQUASHME: pnfsd: unify enum pnfs_layout{return,recall}_type

Use only layoutreturn constant for both returns and recalls.
(return_* works better for recall_type rather the other way around)

Signed-off-by: Benny Halevy <[email protected]>
---
include/linux/nfs4.h | 8 +-------
1 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index a899cff..8a53213 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -554,13 +554,7 @@ enum pnfs_layouttype {
LAYOUT_BLOCK_VOLUME = 3,
};

-/* FIXME: should recall and return types be combined? */
-enum pnfs_layoutrecall_type {
- RECALL_FILE = 1,
- RECALL_FSID = 2,
- RECALL_ALL = 3
-};
-
+/* used for both layout return and recall */
enum pnfs_layoutreturn_type {
RETURN_FILE = 1,
RETURN_FSID = 2,
--
1.6.5.1


2009-12-09 10:27:15

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 2/3] SQUASHME: pnfsd: always set both MDS and DS exchangeid capability flags

We always support both modes when CONFIG_PNFSD is enabled.
The client needs to remember what the session is used for
if it cares to distiguish between DSs and MDSs.

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4state.c | 15 ++-------------
1 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 108cb3e..aa2e9c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1103,20 +1103,9 @@ static void
nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
{
#if defined(CONFIG_PNFSD)
- int mds_and_ds = EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS;
- int mds_or_ds = 0;
-
- /* Save the client's MDS or DS flags, or set them both.
- * XXX We currently do not have a method of determining
- * what a server supports prior to receiving a filehandle
- * e.g. at exchange id time. */
- mds_or_ds = clid->flags & mds_and_ds;
- if (mds_or_ds)
- new->cl_exchange_flags |= mds_or_ds;
- else
- new->cl_exchange_flags |= mds_and_ds;
+ new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
+ EXCHGID4_FLAG_USE_PNFS_DS;
#else /* CONFIG_PNFSD */
- /* pNFS is not supported */
new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
#endif /* CONFIG_PNFSD */

--
1.6.5.1


2009-12-09 10:27:29

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 3/3] SQUASHME: pnfsd: define a is_ds_only_session helper

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4state.c | 3 +--
include/linux/nfs4.h | 7 +++++++
2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index aa2e9c2..019b936 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1342,8 +1342,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cr_ses->flags &= ~SESSION4_PERSIST;
cr_ses->flags &= ~SESSION4_RDMA;

- if (!(unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
- (unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS))
+ if (is_ds_only_session(unconf->cl_exchange_flags))
cr_ses->flags &= ~SESSION4_BACK_CHAN;

if (cr_ses->flags & SESSION4_BACK_CHAN) {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8a53213..e84b442 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -119,6 +119,13 @@
#define EXCHGID4_FLAG_MASK_A 0x40070003
#define EXCHGID4_FLAG_MASK_R 0x80070003

+static inline bool
+is_ds_only_session(u32 exchange_flags)
+{
+ u32 mask = EXCHGID4_FLAG_USE_PNFS_DS | EXCHGID4_FLAG_USE_PNFS_MDS;
+ return (exchange_flags & mask) == EXCHGID4_FLAG_USE_PNFS_DS;
+}
+
#define SEQ4_STATUS_CB_PATH_DOWN 0x00000001
#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002
#define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED 0x00000004
--
1.6.5.1


2009-12-09 10:27:42

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 4/4] SQUASHME: pnfsd: use only RETURN_* constants

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4callback.c | 4 ++--
fs/nfsd/nfs4pnfsd.c | 32 ++++++++++++++++----------------
2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 0bc800d..c5d2a29 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -328,7 +328,7 @@ encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
WRITE32(clr->cb.cbl_seg.iomode);
WRITE32(clr->cb.cbl_layoutchanged);
WRITE32(clr->cb.cbl_recall_type);
- if (unlikely(clr->cb.cbl_recall_type == RECALL_FSID)) {
+ if (unlikely(clr->cb.cbl_recall_type == RETURN_FSID)) {
struct nfs4_fsid fsid = clr->cb.cbl_fsid;

RESERVE_SPACE(16);
@@ -339,7 +339,7 @@ encode_cb_layout(struct xdr_stream *xdr, struct nfs4_layoutrecall *clr,
__func__, clr->cb.cbl_seg.layout_type,
clr->cb.cbl_seg.iomode, clr->cb.cbl_layoutchanged,
clr->cb.cbl_recall_type, fsid.major, fsid.minor);
- } else if (clr->cb.cbl_recall_type == RECALL_FILE) {
+ } else if (clr->cb.cbl_recall_type == RETURN_FILE) {
int len = clr->clr_file->fi_fhlen;

RESERVE_SPACE(20 + len);
diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index e1e8062..a98d71b 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -586,15 +586,15 @@ is_layout_recalled(struct nfs4_client *clp,
list_for_each_entry (clr, &clp->cl_layoutrecalls, clr_perclnt) {
if (clr->cb.cbl_seg.layout_type != seg->layout_type)
continue;
- if (clr->cb.cbl_recall_type == RECALL_ALL)
+ if (clr->cb.cbl_recall_type == RETURN_ALL)
goto found;
- if (clr->cb.cbl_recall_type == RECALL_FSID) {
+ if (clr->cb.cbl_recall_type == RETURN_FSID) {
if (same_fsid(&clr->cb.cbl_fsid, current_fh))
goto found;
else
continue;
}
- BUG_ON(clr->cb.cbl_recall_type != RECALL_FILE);
+ BUG_ON(clr->cb.cbl_recall_type != RETURN_FILE);
if (clr->cb.cbl_seg.clientid == seg->clientid &&
lo_seg_overlapping(&clr->cb.cbl_seg, seg))
goto found;
@@ -889,15 +889,15 @@ recall_return_perfect_match(struct nfs4_layoutrecall *clr,
clr->cb.cbl_recall_type != lrp->args.lr_return_type)
return 0;

- return (clr->cb.cbl_recall_type == RECALL_FILE &&
+ return (clr->cb.cbl_recall_type == RETURN_FILE &&
clr->clr_file == fp &&
clr->cb.cbl_seg.offset == lrp->args.lr_seg.offset &&
clr->cb.cbl_seg.length == lrp->args.lr_seg.length) ||

- (clr->cb.cbl_recall_type == RECALL_FSID &&
+ (clr->cb.cbl_recall_type == RETURN_FSID &&
same_fsid(&clr->cb.cbl_fsid, current_fh)) ||

- clr->cb.cbl_recall_type == RECALL_ALL;
+ clr->cb.cbl_recall_type == RETURN_ALL;
}

static int
@@ -912,12 +912,12 @@ recall_return_partial_match(struct nfs4_layoutrecall *clr,
lrp->args.lr_seg.iomode != IOMODE_ANY)
return 0;

- if (clr->cb.cbl_recall_type == RECALL_ALL ||
+ if (clr->cb.cbl_recall_type == RETURN_ALL ||
lrp->args.lr_return_type == RETURN_ALL)
return 1;

/* fsid matches? */
- if (clr->cb.cbl_recall_type == RECALL_FSID ||
+ if (clr->cb.cbl_recall_type == RETURN_FSID ||
lrp->args.lr_return_type == RETURN_FSID)
return same_fsid(&clr->cb.cbl_fsid, current_fh);

@@ -1123,9 +1123,9 @@ cl_has_layout(struct nfs4_client *clp, struct nfsd4_pnfs_cb_layout *cbl,
struct nfs4_file *lrfile, stateid_t *lsid)
{
switch (cbl->cbl_recall_type) {
- case RECALL_FILE:
+ case RETURN_FILE:
return cl_has_file_layout(clp, lrfile, lsid);
- case RECALL_FSID:
+ case RETURN_FSID:
return cl_has_fsid_layout(clp, &cbl->cbl_fsid);
default:
return cl_has_any_layout(clp);
@@ -1156,7 +1156,7 @@ nomatching_layout(struct nfs4_layoutrecall *clr)
dprintk("%s: clp %p fp %p: simulating layout_return\n", __func__,
clr->clr_client, clr->clr_file);

- if (clr->cb.cbl_recall_type == RECALL_FILE)
+ if (clr->cb.cbl_recall_type == RETURN_FILE)
pnfs_return_file_layouts(clr->clr_client, clr->clr_file, &lr);
else
pnfs_return_client_layouts(clr->clr_client, &lr,
@@ -1398,10 +1398,10 @@ int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,

dprintk("NFSD nfsd_layout_recall_cb: inode %p cbl %p\n", inode, cbl);
BUG_ON(!cbl);
- BUG_ON(cbl->cbl_recall_type != RECALL_FILE &&
- cbl->cbl_recall_type != RECALL_FSID &&
- cbl->cbl_recall_type != RECALL_ALL);
- BUG_ON(cbl->cbl_recall_type == RECALL_FILE && !inode);
+ BUG_ON(cbl->cbl_recall_type != RETURN_FILE &&
+ cbl->cbl_recall_type != RETURN_FSID &&
+ cbl->cbl_recall_type != RETURN_ALL);
+ BUG_ON(cbl->cbl_recall_type == RETURN_FILE && !inode);
BUG_ON(cbl->cbl_seg.iomode != IOMODE_READ &&
cbl->cbl_seg.iomode != IOMODE_RW &&
cbl->cbl_seg.iomode != IOMODE_ANY);
@@ -1420,7 +1420,7 @@ int nfsd_layout_recall_cb(struct super_block *sb, struct inode *inode,
"nfs4_file not found\n");
goto err;
}
- if (cbl->cbl_recall_type == RECALL_FSID)
+ if (cbl->cbl_recall_type == RETURN_FSID)
cbl->cbl_fsid = lrfile->fi_fsid;
}

--
1.6.5.1


2009-12-09 10:27:54

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 5/5] SQUASHME: spnfs: use only RETURN_* constants

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/spnfs_ops.c | 14 +++++++-------
1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/spnfs_ops.c b/fs/nfsd/spnfs_ops.c
index 322fc12..2838a85 100644
--- a/fs/nfsd/spnfs_ops.c
+++ b/fs/nfsd/spnfs_ops.c
@@ -199,17 +199,17 @@ spnfs_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
struct nfsd4_pnfs_cb_layout lr;

switch (type) {
- case RECALL_FILE:
+ case RETURN_FILE:
sb = inode->i_sb;
dprintk("%s: recalling layout for ino = %lu\n",
__func__, inode->i_ino);
break;
- case RECALL_FSID:
+ case RETURN_FSID:
sb = inode->i_sb;
dprintk("%s: recalling layout for fsid x (unimplemented)\n",
__func__);
return 0;
- case RECALL_ALL:
+ case RETURN_ALL:
/* XXX figure out how to get a sb since there's no inode ptr */
dprintk("%s: recalling all layouts (unimplemented)\n",
__func__);
@@ -244,19 +244,19 @@ spnfs_test_layoutrecall(char *path, u64 offset, u64 len)

if (strcmp(path, "all") == 0) {
inode = NULL;
- type = RECALL_ALL;
+ type = RETURN_ALL;
} else {
rc = path_lookup(path, 0, &nd);
if (rc != 0)
return -ENOENT;

/*
- * XXX todo: add a RECALL_FSID scenario here...maybe if
+ * XXX todo: add a RETURN_FSID scenario here...maybe if
* inode is a dir...
*/

inode = nd.path.dentry->d_inode;
- type = RECALL_FILE;
+ type = RETURN_FILE;
}

if (len == 0)
@@ -264,7 +264,7 @@ spnfs_test_layoutrecall(char *path, u64 offset, u64 len)

rc = spnfs_layoutrecall(inode, type, offset, len);

- if (type != RECALL_ALL)
+ if (type != RETURN_ALL)
path_put(&nd.path);
return rc;
}
--
1.6.5.1


2009-12-09 10:28:07

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 6/6] SQUASHME: spnfs-block: use only RETURN_* constants

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/bl_ops.c | 6 +++---
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/vfs.c | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/bl_ops.c b/fs/nfsd/bl_ops.c
index 86c292b..92dc27b 100644
--- a/fs/nfsd/bl_ops.c
+++ b/fs/nfsd/bl_ops.c
@@ -565,18 +565,18 @@ bl_layoutrecall(struct inode *inode, int type, u64 offset, u64 len)
dprintk("--> %s\n", __func__);
BUG_ON(!len);
switch (type) {
- case RECALL_FILE:
+ case RETURN_FILE:
sb = inode->i_sb;
dprintk(" recalling layout [0x%x:%lu], %Lu:%Lu\n",
inode->i_sb->s_dev, inode->i_ino,
_2SECTS(offset), _2SECTS(len));
break;
- case RECALL_FSID:
+ case RETURN_FSID:
sb = inode->i_sb;
dprintk("%s: recalling layout for fsid x (unimplemented)\n",
__func__);
return 0;
- case RECALL_ALL:
+ case RETURN_ALL:
/*
* XXX figure out how to get a sb since there's no
* inode ptr
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index dd8846a..07fb7a4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -892,7 +892,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
#if defined(CONFIG_SPNFS_BLOCK)
if (pnfs_block_enabled(cstate->current_fh.fh_dentry->d_inode, 0)) {
status = bl_layoutrecall(cstate->current_fh.fh_dentry->d_inode,
- RECALL_FILE, write->wr_offset, write->wr_buflen);
+ RETURN_FILE, write->wr_offset, write->wr_buflen);
if (!status) {
status = nfsd_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, write->wr_vlen,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 97d6ffc..62358df 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -353,7 +353,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
goto out;
#if defined(CONFIG_SPNFS_BLOCK)
if (pnfs_block_enabled(inode, 0)) {
- err = bl_layoutrecall(inode, RECALL_FILE,
+ err = bl_layoutrecall(inode, RETURN_FILE,
iap->ia_size, inode->i_size - iap->ia_size);
}
#endif /* CONFIG_SPNFS_BLOCK */
--
1.6.5.1


2009-12-09 10:28:20

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 7/7] SQUASHME: pnfsd-exofs: use only pnfs_layoutreturn_type

Signed-off-by: Benny Halevy <[email protected]>
---
fs/exofs/export.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/exofs/export.c b/fs/exofs/export.c
index 4c1c7e3..9a27673 100644
--- a/fs/exofs/export.c
+++ b/fs/exofs/export.c
@@ -343,7 +343,7 @@ static int is_layout_returned(struct exofs_i_info *oi)
}

memset(&cbl, 0, sizeof(cbl));
- cbl.cbl_recall_type = RECALL_FILE;
+ cbl.cbl_recall_type = RETURN_FILE;
cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
cbl.cbl_seg.iomode = IOMODE_RW;
cbl.cbl_seg.length = NFS4_MAX_UINT64;
@@ -401,7 +401,7 @@ int exofs_inode_recall_layout(struct inode *inode, exofs_recall_fn todo)
goto err;

memset(&cbl, 0, sizeof(cbl));
- cbl.cbl_recall_type = RECALL_FILE;
+ cbl.cbl_recall_type = RETURN_FILE;
cbl.cbl_seg.layout_type = LAYOUT_OSD2_OBJECTS;
cbl.cbl_seg.iomode = IOMODE_ANY;
cbl.cbl_seg.length = NFS4_MAX_UINT64;
--
1.6.5.1


2009-12-09 10:28:32

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 8/9] SQUASHME: pnfs: use only pnfs_layoutreturn_type

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfs/callback_proc.c | 12 ++++++------
fs/nfs/callback_xdr.c | 4 ++--
fs/nfs/inode.c | 2 +-
fs/nfs/nfs4proc.c | 4 ++--
fs/nfs/nfs4state.c | 2 +-
fs/nfs/pnfs.c | 14 +++++++-------
fs/nfs/pnfs.h | 6 +++---
7 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7c5e8ee..a2ea863 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -132,10 +132,10 @@ nfs_layoutrecall_find_inode(struct nfs_client *clp,
list_for_each_entry(nfsi, &clp->cl_lo_inodes, lo_inodes) {
dprintk("%s: Searching inode=%lu\n",
__func__, nfsi->vfs_inode.i_ino);
- if (args->cbl_recall_type == RECALL_FILE) {
+ if (args->cbl_recall_type == RETURN_FILE) {
if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
continue;
- } else if (args->cbl_recall_type == RECALL_FSID) {
+ } else if (args->cbl_recall_type == RETURN_FSID) {
server = NFS_SERVER(&nfsi->vfs_inode);
if (server->fsid.major != args->cbl_fsid.major ||
server->fsid.minor != args->cbl_fsid.minor)
@@ -195,11 +195,11 @@ static int pnfs_recall_layout(void *data)
then return layouts, resume after layoutreturns complete
*/

- if (rl.cbl_recall_type == RECALL_FILE) {
+ if (rl.cbl_recall_type == RETURN_FILE) {
status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
- RECALL_FILE);
+ RETURN_FILE);
if (status)
- dprintk("%s RECALL_FILE error: %d\n", __func__, status);
+ dprintk("%s RETURN_FILE error: %d\n", __func__, status);
goto out;
}

@@ -209,7 +209,7 @@ static int pnfs_recall_layout(void *data)
/* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
/* XXX need to check status on pnfs_return_layout */
- pnfs_return_layout(ino, &rl.cbl_seg, NULL, RECALL_FILE);
+ pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE);
iput(ino);
}

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 1cbf81b..453c7c8 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -248,7 +248,7 @@ static unsigned decode_pnfs_layoutrecall_args(struct svc_rqst *rqstp,
args->cbl_layoutchanged = ntohl(*p++);
args->cbl_recall_type = ntohl(*p++);

- if (likely(args->cbl_recall_type == RECALL_FILE)) {
+ if (likely(args->cbl_recall_type == RETURN_FILE)) {
status = decode_fh(xdr, &args->cbl_fh);
if (unlikely(status != 0))
goto out;
@@ -259,7 +259,7 @@ static unsigned decode_pnfs_layoutrecall_args(struct svc_rqst *rqstp,
status = decode_stateid(xdr, &args->cbl_stateid);
if (unlikely(status != 0))
goto out;
- } else if (args->cbl_recall_type == RECALL_FSID) {
+ } else if (args->cbl_recall_type == RETURN_FSID) {
p = read_buf(xdr, 2 * sizeof(uint64_t));
p = xdr_decode_hyper(p, &args->cbl_fsid.major);
p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e71e842..cdaf9f1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1386,7 +1386,7 @@ void nfs4_clear_inode(struct inode *inode)
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
#ifdef CONFIG_PNFS
- pnfs_return_layout(inode, NULL, NULL, RECALL_FILE);
+ pnfs_return_layout(inode, NULL, NULL, RETURN_FILE);
#endif /* CONFIG_PNFS */
}
#endif
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87f58bd..c897a85 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2254,7 +2254,7 @@ pnfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,

if (pnfs_enabled_sb(server) && has_layout(nfsi) &&
pnfs_ld_layoutret_on_setattr(server->pnfs_curr_ld))
- pnfs_return_layout(inode, NULL, NULL, RECALL_FILE);
+ pnfs_return_layout(inode, NULL, NULL, RETURN_FILE);
return nfs4_proc_setattr(dentry, fattr, sattr);
}
#endif /* CONFIG_PNFS */
@@ -5438,7 +5438,7 @@ static void nfs4_pnfs_layoutreturn_release(void *calldata)
dprintk("--> %s return_type %d lo %p\n", __func__,
lrp->args.return_type, lrp->lo);

- if (lrp->lo && (lrp->args.return_type == RECALL_FILE)) {
+ if (lrp->lo && (lrp->args.return_type == RETURN_FILE)) {
if (!lrp->res.lrs_present)
pnfs_set_layout_stateid(lrp->lo, &zero_stateid);
pnfs_layout_release(lrp->lo, &lrp->lo->lretcount,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 63f7785..f519fe3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -519,7 +519,7 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm
range.iomode = nfsi->layout.roc_iomode;
range.offset = 0;
range.length = NFS4_MAX_UINT64;
- pnfs_return_layout(state->inode, &range, NULL, RECALL_FILE);
+ pnfs_return_layout(state->inode, &range, NULL, RETURN_FILE);
}
#endif /* CONFIG_PNFS */
nfs4_do_close(path, state, wait);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6b88c68..3a05422 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -680,7 +680,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
static int
return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
const nfs4_stateid *stateid, /* optional */
- enum pnfs_layoutrecall_type type, struct pnfs_layout_type *lo)
+ enum pnfs_layoutreturn_type type, struct pnfs_layout_type *lo)
{
struct nfs4_pnfs_layoutreturn *lrp;
struct nfs_server *server = NFS_SERVER(ino);
@@ -690,7 +690,7 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,

lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (lrp == NULL) {
- if (lo && (type == RECALL_FILE))
+ if (lo && (type == RETURN_FILE))
pnfs_layout_release(lo, &lo->lretcount, NULL);
goto out;
}
@@ -714,7 +714,7 @@ out:
int
_pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
const nfs4_stateid *stateid, /* optional */
- enum pnfs_layoutrecall_type type)
+ enum pnfs_layoutreturn_type type)
{
struct pnfs_layout_type *lo = NULL;
struct nfs_inode *nfsi = NFS_I(ino);
@@ -730,7 +730,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
arg.offset = 0;
arg.length = ~0;
}
- if (type == RECALL_FILE) {
+ if (type == RETURN_FILE) {
if (nfsi->layoutcommit_ctx) {
status = pnfs_layoutcommit_inode(ino, 1);
if (status) {
@@ -1697,7 +1697,7 @@ pnfs_writeback_done(struct nfs_write_data *data)
.length = data->args.count,
};
dprintk("%s: retrying\n", __func__);
- _pnfs_return_layout(data->inode, &range, NULL, RECALL_FILE);
+ _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
pnfs_initiate_write(data, NFS_CLIENT(data->inode),
pdata->call_ops, pdata->how);
}
@@ -1828,7 +1828,7 @@ pnfs_read_done(struct nfs_read_data *data)
.length = data->args.count,
};
dprintk("%s: retrying\n", __func__);
- _pnfs_return_layout(data->inode, &range, NULL, RECALL_FILE);
+ _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
pnfs_initiate_read(data, NFS_CLIENT(data->inode),
pdata->call_ops);
}
@@ -2054,7 +2054,7 @@ pnfs_commit_done(struct nfs_write_data *data)
.length = data->args.count,
};
dprintk("%s: retrying\n", __func__);
- _pnfs_return_layout(data->inode, &range, NULL, RECALL_FILE);
+ _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE);
pnfs_initiate_commit(data, NFS_CLIENT(data->inode),
pdata->call_ops, pdata->how);
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 9810ff3..a8933b4 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -39,7 +39,7 @@ int pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,

int _pnfs_return_layout(struct inode *, struct nfs4_pnfs_layout_segment *,
const nfs4_stateid *stateid, /* optional */
- enum pnfs_layoutrecall_type);
+ enum pnfs_layoutreturn_type);
void set_pnfs_layoutdriver(struct super_block *sb, struct nfs_fh *fh, u32 id);
void unmount_pnfs_layoutdriver(struct super_block *sb);
int pnfs_use_read(struct inode *inode, ssize_t count);
@@ -227,13 +227,13 @@ static inline void pnfs_modify_new_request(struct nfs_page *req,
static inline int pnfs_return_layout(struct inode *ino,
struct nfs4_pnfs_layout_segment *lseg,
const nfs4_stateid *stateid, /* optional */
- enum pnfs_layoutrecall_type type)
+ enum pnfs_layoutreturn_type type)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *nfss = NFS_SERVER(ino);

if (pnfs_enabled_sb(nfss) &&
- (type != RECALL_FILE || has_layout(nfsi)))
+ (type != RETURN_FILE || has_layout(nfsi)))
return _pnfs_return_layout(ino, lseg, stateid, type);

return 0;
--
1.6.5.1


2009-12-09 10:28:50

by Benny Halevy

[permalink] [raw]
Subject: [PATCH 9/9] SQUASHME: pnfs: filelayout: mask out server's MDS capability flag for DSs

When establishing a session with a DS mask out its MDS
exchange_id flag so we know we use it as DS-only.

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfs/nfs4filelayoutdev.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 128715a..cb8ae88 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -272,6 +272,9 @@ nfs4_pnfs_ds_create(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
if (err)
goto out_put;

+ /* mask out the server's MDS capability flag */
+ clp->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_DS;
+
if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS)) {
printk(KERN_INFO "ip:port %s is not a pNFS Data Server\n",
ds->r_addr);
--
1.6.5.1


2009-12-09 18:54:03

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 2/3] SQUASHME: pnfsd: always set both MDS and DS exchangeid capability flags

On Wed, Dec 09, 2009 at 12:27:20PM +0200, Benny Halevy wrote:
> We always support both modes when CONFIG_PNFSD is enabled.
> The client needs to remember what the session is used for
> if it cares to distiguish between DSs and MDSs.

OK, thanks.

>
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 15 ++-------------
> 1 files changed, 2 insertions(+), 13 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 108cb3e..aa2e9c2 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1103,20 +1103,9 @@ static void
> nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
> {
> #if defined(CONFIG_PNFSD)
> - int mds_and_ds = EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS;
> - int mds_or_ds = 0;
> -
> - /* Save the client's MDS or DS flags, or set them both.
> - * XXX We currently do not have a method of determining
> - * what a server supports prior to receiving a filehandle
> - * e.g. at exchange id time. */
> - mds_or_ds = clid->flags & mds_and_ds;
> - if (mds_or_ds)
> - new->cl_exchange_flags |= mds_or_ds;
> - else
> - new->cl_exchange_flags |= mds_and_ds;
> + new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS |
> + EXCHGID4_FLAG_USE_PNFS_DS;
> #else /* CONFIG_PNFSD */
> - /* pNFS is not supported */
> new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
> #endif /* CONFIG_PNFSD */

As usual, let's also hide the ifdef's e.g.:

new->cl_exchange_flags |= PNFS_EXCHGID_FLAGS

with the define conditional on CONFIG_PNFSD.

--b.

2009-12-10 17:29:46

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2 11/35] pnfsd: get device list/info

On Mon, Dec 07, 2009 at 11:32:10AM +0200, Benny Halevy wrote:
> +static __be32
> +nfsd4_getdevinfo(struct svc_rqst *rqstp,
> + struct nfsd4_compound_state *cstate,
> + struct nfsd4_pnfs_getdevinfo *gdp)
> +{
> + struct super_block *sb;
> + struct svc_export *exp = NULL;
> + u32 fsidv = gdp->gd_devid.fsid;
> + int status;
> +
> + dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
> + __func__, gdp->gd_layout_type, gdp->gd_devid.fsid,
> + gdp->gd_devid.devid, gdp->gd_maxcount);
> +
> + status = nfserr_inval;
> + exp = rqst_exp_find(rqstp, FSID_NUM, &fsidv);

As I said before, this seems to require an fsid= option on every pnfs
export. We shouldn't need that.

--b.


> + dprintk("%s: exp %p\n", __func__, exp);
> + if (IS_ERR(exp)) {
> + status = nfserrno(PTR_ERR(exp));
> + exp = NULL;
> + goto out;
> + }
> + sb = exp->ex_path.dentry->d_inode->i_sb;
> + dprintk("%s: sb %p\n", __func__, sb);
> + if (!sb)
> + goto out;
> +
> + /* Ensure underlying file system supports pNFS and,
> + * if so, the requested layout type
> + */
> + status = nfsd4_layout_verify(sb, exp, gdp->gd_layout_type);
> + if (status)
> + goto out;
> +
> + /* Set up arguments so device can be retrieved at encode time */
> + gdp->gd_sb = sb;
> +out:
> + if (exp)
> + exp_put(exp);
> + return status;
> +}
> #endif /* CONFIG_PNFSD */
>
> /*
> @@ -1330,6 +1420,17 @@ static struct nfsd4_operation nfsd4_ops[] = {
> .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
> .op_name = "OP_SEQUENCE",
> },
> +#if defined(CONFIG_PNFSD)
> + [OP_GETDEVICELIST] = {
> + .op_func = (nfsd4op_func)nfsd4_getdevlist,
> + .op_name = "OP_GETDEVICELIST",
> + },
> + [OP_GETDEVICEINFO] = {
> + .op_func = (nfsd4op_func)nfsd4_getdevinfo,
> + .op_flags = ALLOWED_WITHOUT_FH,
> + .op_name = "OP_GETDEVICEINFO",
> + },
> +#endif /* CONFIG_PNFSD */
> };
>
> static const char *nfsd4_op_name(unsigned opnum)
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index a8587e9..955f583 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -46,6 +46,7 @@
> #include <linux/nfsd_idmap.h>
> #include <linux/nfs4_acl.h>
> #include <linux/sunrpc/svcauth_gss.h>
> +#include <linux/exportfs.h>
>
> #include "xdr4.h"
> #include "vfs.h"
> @@ -1233,6 +1234,42 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> DECODE_TAIL;
> }
>
> +#if defined(CONFIG_PNFSD)
> +static __be32
> +nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
> + struct nfsd4_pnfs_getdevlist *gdevl)
> +{
> + DECODE_HEAD;
> +
> + READ_BUF(16 + sizeof(nfs4_verifier));
> + READ32(gdevl->gd_layout_type);
> + READ32(gdevl->gd_maxdevices);
> + READ64(gdevl->gd_cookie);
> + COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
> +
> + DECODE_TAIL;
> +}
> +
> +static __be32
> +nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
> + struct nfsd4_pnfs_getdevinfo *gdev)
> +{
> + u32 num;
> + DECODE_HEAD;
> +
> + READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
> + READ64(gdev->gd_devid.fsid);
> + READ64(gdev->gd_devid.devid);
> + READ32(gdev->gd_layout_type);
> + READ32(gdev->gd_maxcount);
> + READ32(num);
> + if (num)
> + READ_BUF(4); /* TODO: for now, just skip notify_types */
> +
> + DECODE_TAIL;
> +}
> +#endif /* CONFIG_PNFSD */
> +
> static __be32
> nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
> {
> @@ -1334,11 +1371,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
> [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
> [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
> +#if defined(CONFIG_PNFSD)
> + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdevinfo,
> + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_getdevlist,
> + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
> + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
> + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
> +#else /* CONFIG_PNFSD */
> [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
> +#endif /* CONFIG_PNFSD */
> [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
> [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
> @@ -3062,6 +3107,207 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
> return 0;
> }
>
> +#if defined(CONFIG_PNFSD)
> +
> +/* Uses the export interface to iterate through the available devices
> + * and encodes them on the response stream.
> + */
> +static __be32
> +nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
> + struct nfsd4_pnfs_getdevlist *gdevl,
> + unsigned int *dev_count)
> +{
> + struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
> + __be32 nfserr;
> + int status;
> + __be32 *p;
> + struct nfsd4_pnfs_dev_iter_res res = {
> + .gd_cookie = gdevl->gd_cookie,
> + .gd_verf = gdevl->gd_verf,
> + .gd_eof = 0
> + };
> +
> + dprintk("%s: Begin\n", __func__);
> +
> + *dev_count = 0;
> + do {
> + status = sb->s_pnfs_op->get_device_iter(sb,
> + gdevl->gd_layout_type,
> + &res);
> + if (status) {
> + if (status == -ENOENT) {
> + res.gd_eof = 1;
> + /* return success */
> + break;
> + }
> + nfserr = nfserrno(status);
> + goto out_err;
> + }
> +
> + /* Encode device id and layout type */
> + RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
> + WRITE64((__be64)gdevl->gd_fhp->fh_export->ex_fsid);
> + WRITE64(res.gd_devid); /* devid minor */
> + ADJUST_ARGS();
> + (*dev_count)++;
> + } while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
> + gdevl->gd_cookie = res.gd_cookie;
> + gdevl->gd_verf = res.gd_verf;
> + gdevl->gd_eof = res.gd_eof;
> + nfserr = nfs_ok;
> +out_err:
> + dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
> + return nfserr;
> +}
> +
> +/* Encodes the response of get device list.
> +*/
> +static __be32
> +nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, int nfserr,
> + struct nfsd4_pnfs_getdevlist *gdevl)
> +{
> + unsigned int dev_count = 0, lead_count;
> + u32 *p_in = resp->p;
> + __be32 *p;
> +
> + dprintk("%s: err %d\n", __func__, nfserr);
> + if (nfserr)
> + return nfserr;
> +
> + /* Ensure we have room for cookie, verifier, and devlist len,
> + * which we will backfill in after we encode as many devices as possible
> + */
> + lead_count = 8 + sizeof(nfs4_verifier) + 4;
> + RESERVE_SPACE(lead_count);
> + /* skip past these values */
> + p += XDR_QUADLEN(lead_count);
> + ADJUST_ARGS();
> +
> + /* Iterate over as many device ids as possible on the xdr stream */
> + nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
> + if (nfserr)
> + goto out_err;
> +
> + /* Backfill in cookie, verf and number of devices encoded */
> + p = p_in;
> + WRITE64(gdevl->gd_cookie);
> + WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
> + WRITE32(dev_count);
> +
> + /* Skip over devices */
> + p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
> + ADJUST_ARGS();
> +
> + /* are we at the end of devices? */
> + RESERVE_SPACE(4);
> + WRITE32(gdevl->gd_eof);
> + ADJUST_ARGS();
> +
> + dprintk("%s: done.\n", __func__);
> +
> + nfserr = nfs_ok;
> +out:
> + return nfserr;
> +out_err:
> + p = p_in;
> + ADJUST_ARGS();
> + goto out;
> +}
> +
> +/* For a given device id, have the file system retrieve and encode the
> + * associated device. For file layout, the encoding function is
> + * passed down to the file system. The file system then has the option
> + * of using this encoding function or one of its own.
> + *
> + * Note: the file system must return the XDR size of struct device_addr4
> + * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
> + * gdir_mincount calculation.
> + */
> +static __be32
> +nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, int nfserr,
> + struct nfsd4_pnfs_getdevinfo *gdev)
> +{
> + struct super_block *sb;
> + int maxcount = 0, type_notify_len = 12;
> + __be32 *p, *p_save = NULL, *p_in = resp->p;
> + struct exp_xdr_stream xdr;
> +
> + dprintk("%s: err %d\n", __func__, nfserr);
> + if (nfserr)
> + return nfserr;
> +
> + sb = gdev->gd_sb;
> +
> + if (gdev->gd_maxcount != 0) {
> + /* FIXME: this will be bound by the session max response */
> + maxcount = svc_max_payload(resp->rqstp);
> + if (maxcount > gdev->gd_maxcount)
> + maxcount = gdev->gd_maxcount;
> +
> + /* Ensure have room for type and notify field */
> + maxcount -= type_notify_len;
> + if (maxcount < 0) {
> + nfserr = -ETOOSMALL;
> + goto toosmall;
> + }
> + }
> +
> + RESERVE_SPACE(4);
> + WRITE32(gdev->gd_layout_type);
> + ADJUST_ARGS();
> +
> + /* If maxcount is 0 then just update notifications */
> + if (gdev->gd_maxcount == 0)
> + goto handle_notifications;
> +
> + xdr.p = p_save = resp->p;
> + xdr.end = resp->end;
> + if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
> + xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
> +
> + nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
> + &gdev->gd_devid);
> + if (nfserr) {
> + /* Rewind to the beginning */
> + p = p_in;
> + ADJUST_ARGS();
> + if (nfserr == -ETOOSMALL)
> + goto toosmall;
> + printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
> + goto out;
> + }
> +
> + /* The file system should never write 0 bytes without
> + * returning an error
> + */
> + BUG_ON(xdr.p == p_save);
> + BUG_ON(xdr.p > xdr.end);
> +
> + /* Update the xdr stream with the number of bytes encoded
> + * by the file system.
> + */
> + p = xdr.p;
> + ADJUST_ARGS();
> +
> +handle_notifications:
> + /* Encode supported device notifications.
> + * Note: Currently none are supported.
> + */
> + RESERVE_SPACE(4);
> + WRITE32(0);
> + ADJUST_ARGS();
> +
> +out:
> + return nfserrno(nfserr);
> +toosmall:
> + dprintk("%s: maxcount too small\n", __func__);
> + RESERVE_SPACE(4);
> + WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
> + ADJUST_ARGS();
> + goto out;
> +}
> +#endif /* CONFIG_PNFSD */
> +
> static __be32
> nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
> {
> @@ -3122,11 +3368,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
> [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session,
> [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
> +#if defined(CONFIG_PNFSD)
> + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdevinfo,
> + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_getdevlist,
> + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
> + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
> + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
> +#else /* CONFIG_PNFSD */
> [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
> +#endif /* CONFIG_PNFSD */
> [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
> [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 83202a1..acb215a 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -39,6 +39,8 @@
> #ifndef _LINUX_NFSD_XDR4_H
> #define _LINUX_NFSD_XDR4_H
>
> +#include <linux/nfsd/nfsd4_pnfs.h>
> +
> #include "state.h"
> #include "nfsd.h"
>
> @@ -383,6 +385,22 @@ struct nfsd4_destroy_session {
> struct nfs4_sessionid sessionid;
> };
>
> +struct nfsd4_pnfs_getdevinfo {
> + struct nfsd4_pnfs_deviceid gd_devid; /* request */
> + u32 gd_layout_type; /* request */
> + u32 gd_maxcount; /* request */
> + struct super_block *gd_sb;
> +};
> +
> +struct nfsd4_pnfs_getdevlist {
> + u32 gd_layout_type; /* request */
> + u32 gd_maxdevices; /* request */
> + u64 gd_cookie; /* request - response */
> + u64 gd_verf; /* request - response */
> + struct svc_fh *gd_fhp; /* response */
> + u32 gd_eof; /* response */
> +};
> +
> struct nfsd4_op {
> int opnum;
> __be32 status;
> @@ -423,6 +441,10 @@ struct nfsd4_op {
> struct nfsd4_create_session create_session;
> struct nfsd4_destroy_session destroy_session;
> struct nfsd4_sequence sequence;
> +#if defined(CONFIG_PNFSD)
> + struct nfsd4_pnfs_getdevlist pnfs_getdevlist;
> + struct nfsd4_pnfs_getdevinfo pnfs_getdevinfo;
> +#endif /* CONFIG_PNFSD */
> } u;
> struct nfs4_replay * replay;
> };
> diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
> index c44e13d..d68fd14 100644
> --- a/include/linux/nfsd/nfsd4_pnfs.h
> +++ b/include/linux/nfsd/nfsd4_pnfs.h
> @@ -34,6 +34,21 @@
> #ifndef _LINUX_NFSD_NFSD4_PNFS_H
> #define _LINUX_NFSD_NFSD4_PNFS_H
>
> +#include <linux/exportfs.h>
> +#include <linux/exp_xdr.h>
> +
> +struct nfsd4_pnfs_deviceid {
> + u64 fsid; /* filesystem ID */
> + u64 devid; /* filesystem-wide unique device ID */
> +};
> +
> +struct nfsd4_pnfs_dev_iter_res {
> + u64 gd_cookie; /* request/repsonse */
> + u64 gd_verf; /* request/repsonse */
> + u64 gd_devid; /* response */
> + u32 gd_eof; /* response */
> +};
> +
> /*
> * pNFS export operations vector.
> *
> @@ -47,6 +62,25 @@
> struct pnfs_export_operations {
> /* Returns the supported pnfs_layouttype4. */
> int (*layout_type) (struct super_block *);
> +
> + /* Encode device info onto the xdr stream. */
> + int (*get_device_info) (struct super_block *,
> + struct exp_xdr_stream *,
> + u32 layout_type,
> + const struct nfsd4_pnfs_deviceid *);
> +
> + /* Retrieve all available devices via an iterator.
> + * arg->cookie == 0 indicates the beginning of the list,
> + * otherwise arg->verf is used to verify that the list hasn't changed
> + * while retrieved.
> + *
> + * On output, the filesystem sets the devid based on the current cookie
> + * and sets res->cookie and res->verf corresponding to the next entry.
> + * When the last entry in the list is retrieved, res->eof is set to 1.
> + */
> + int (*get_device_iter) (struct super_block *,
> + u32 layout_type,
> + struct nfsd4_pnfs_dev_iter_res *);
> };
>
> #endif /* _LINUX_NFSD_NFSD4_PNFS_H */
> --
> 1.6.5.1
>

2009-12-10 18:53:08

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH v2 11/35] pnfsd: get device list/info

On Dec. 10, 2009, 19:30 +0200, " J. Bruce Fields" <[email protected]> wrote:
> On Mon, Dec 07, 2009 at 11:32:10AM +0200, Benny Halevy wrote:
>> +static __be32
>> +nfsd4_getdevinfo(struct svc_rqst *rqstp,
>> + struct nfsd4_compound_state *cstate,
>> + struct nfsd4_pnfs_getdevinfo *gdp)
>> +{
>> + struct super_block *sb;
>> + struct svc_export *exp = NULL;
>> + u32 fsidv = gdp->gd_devid.fsid;
>> + int status;
>> +
>> + dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
>> + __func__, gdp->gd_layout_type, gdp->gd_devid.fsid,
>> + gdp->gd_devid.devid, gdp->gd_maxcount);
>> +
>> + status = nfserr_inval;
>> + exp = rqst_exp_find(rqstp, FSID_NUM, &fsidv);
>
> As I said before, this seems to require an fsid= option on every pnfs
> export. We shouldn't need that.

If there a better way to get to the sb given the fsid?
Since we don't have a current_fh for getdeviceinfo the alternative
might be to map the deviceids to sb's in the generic layer.

Benny

>
> --b.
>
>
>> + dprintk("%s: exp %p\n", __func__, exp);
>> + if (IS_ERR(exp)) {
>> + status = nfserrno(PTR_ERR(exp));
>> + exp = NULL;
>> + goto out;
>> + }
>> + sb = exp->ex_path.dentry->d_inode->i_sb;
>> + dprintk("%s: sb %p\n", __func__, sb);
>> + if (!sb)
>> + goto out;
>> +
>> + /* Ensure underlying file system supports pNFS and,
>> + * if so, the requested layout type
>> + */
>> + status = nfsd4_layout_verify(sb, exp, gdp->gd_layout_type);
>> + if (status)
>> + goto out;
>> +
>> + /* Set up arguments so device can be retrieved at encode time */
>> + gdp->gd_sb = sb;
>> +out:
>> + if (exp)
>> + exp_put(exp);
>> + return status;
>> +}
>> #endif /* CONFIG_PNFSD */
>>
>> /*
>> @@ -1330,6 +1420,17 @@ static struct nfsd4_operation nfsd4_ops[] = {
>> .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
>> .op_name = "OP_SEQUENCE",
>> },
>> +#if defined(CONFIG_PNFSD)
>> + [OP_GETDEVICELIST] = {
>> + .op_func = (nfsd4op_func)nfsd4_getdevlist,
>> + .op_name = "OP_GETDEVICELIST",
>> + },
>> + [OP_GETDEVICEINFO] = {
>> + .op_func = (nfsd4op_func)nfsd4_getdevinfo,
>> + .op_flags = ALLOWED_WITHOUT_FH,
>> + .op_name = "OP_GETDEVICEINFO",
>> + },
>> +#endif /* CONFIG_PNFSD */
>> };
>>
>> static const char *nfsd4_op_name(unsigned opnum)
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index a8587e9..955f583 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -46,6 +46,7 @@
>> #include <linux/nfsd_idmap.h>
>> #include <linux/nfs4_acl.h>
>> #include <linux/sunrpc/svcauth_gss.h>
>> +#include <linux/exportfs.h>
>>
>> #include "xdr4.h"
>> #include "vfs.h"
>> @@ -1233,6 +1234,42 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>> DECODE_TAIL;
>> }
>>
>> +#if defined(CONFIG_PNFSD)
>> +static __be32
>> +nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
>> + struct nfsd4_pnfs_getdevlist *gdevl)
>> +{
>> + DECODE_HEAD;
>> +
>> + READ_BUF(16 + sizeof(nfs4_verifier));
>> + READ32(gdevl->gd_layout_type);
>> + READ32(gdevl->gd_maxdevices);
>> + READ64(gdevl->gd_cookie);
>> + COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
>> +
>> + DECODE_TAIL;
>> +}
>> +
>> +static __be32
>> +nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
>> + struct nfsd4_pnfs_getdevinfo *gdev)
>> +{
>> + u32 num;
>> + DECODE_HEAD;
>> +
>> + READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
>> + READ64(gdev->gd_devid.fsid);
>> + READ64(gdev->gd_devid.devid);
>> + READ32(gdev->gd_layout_type);
>> + READ32(gdev->gd_maxcount);
>> + READ32(num);
>> + if (num)
>> + READ_BUF(4); /* TODO: for now, just skip notify_types */
>> +
>> + DECODE_TAIL;
>> +}
>> +#endif /* CONFIG_PNFSD */
>> +
>> static __be32
>> nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
>> {
>> @@ -1334,11 +1371,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
>> [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
>> [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
>> [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
>> +#if defined(CONFIG_PNFSD)
>> + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdevinfo,
>> + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_getdevlist,
>> + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
>> + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
>> + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
>> +#else /* CONFIG_PNFSD */
>> [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
>> [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
>> [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
>> [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
>> [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
>> +#endif /* CONFIG_PNFSD */
>> [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp,
>> [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
>> [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
>> @@ -3062,6 +3107,207 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
>> return 0;
>> }
>>
>> +#if defined(CONFIG_PNFSD)
>> +
>> +/* Uses the export interface to iterate through the available devices
>> + * and encodes them on the response stream.
>> + */
>> +static __be32
>> +nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
>> + struct nfsd4_pnfs_getdevlist *gdevl,
>> + unsigned int *dev_count)
>> +{
>> + struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
>> + __be32 nfserr;
>> + int status;
>> + __be32 *p;
>> + struct nfsd4_pnfs_dev_iter_res res = {
>> + .gd_cookie = gdevl->gd_cookie,
>> + .gd_verf = gdevl->gd_verf,
>> + .gd_eof = 0
>> + };
>> +
>> + dprintk("%s: Begin\n", __func__);
>> +
>> + *dev_count = 0;
>> + do {
>> + status = sb->s_pnfs_op->get_device_iter(sb,
>> + gdevl->gd_layout_type,
>> + &res);
>> + if (status) {
>> + if (status == -ENOENT) {
>> + res.gd_eof = 1;
>> + /* return success */
>> + break;
>> + }
>> + nfserr = nfserrno(status);
>> + goto out_err;
>> + }
>> +
>> + /* Encode device id and layout type */
>> + RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
>> + WRITE64((__be64)gdevl->gd_fhp->fh_export->ex_fsid);
>> + WRITE64(res.gd_devid); /* devid minor */
>> + ADJUST_ARGS();
>> + (*dev_count)++;
>> + } while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
>> + gdevl->gd_cookie = res.gd_cookie;
>> + gdevl->gd_verf = res.gd_verf;
>> + gdevl->gd_eof = res.gd_eof;
>> + nfserr = nfs_ok;
>> +out_err:
>> + dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
>> + return nfserr;
>> +}
>> +
>> +/* Encodes the response of get device list.
>> +*/
>> +static __be32
>> +nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, int nfserr,
>> + struct nfsd4_pnfs_getdevlist *gdevl)
>> +{
>> + unsigned int dev_count = 0, lead_count;
>> + u32 *p_in = resp->p;
>> + __be32 *p;
>> +
>> + dprintk("%s: err %d\n", __func__, nfserr);
>> + if (nfserr)
>> + return nfserr;
>> +
>> + /* Ensure we have room for cookie, verifier, and devlist len,
>> + * which we will backfill in after we encode as many devices as possible
>> + */
>> + lead_count = 8 + sizeof(nfs4_verifier) + 4;
>> + RESERVE_SPACE(lead_count);
>> + /* skip past these values */
>> + p += XDR_QUADLEN(lead_count);
>> + ADJUST_ARGS();
>> +
>> + /* Iterate over as many device ids as possible on the xdr stream */
>> + nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
>> + if (nfserr)
>> + goto out_err;
>> +
>> + /* Backfill in cookie, verf and number of devices encoded */
>> + p = p_in;
>> + WRITE64(gdevl->gd_cookie);
>> + WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
>> + WRITE32(dev_count);
>> +
>> + /* Skip over devices */
>> + p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
>> + ADJUST_ARGS();
>> +
>> + /* are we at the end of devices? */
>> + RESERVE_SPACE(4);
>> + WRITE32(gdevl->gd_eof);
>> + ADJUST_ARGS();
>> +
>> + dprintk("%s: done.\n", __func__);
>> +
>> + nfserr = nfs_ok;
>> +out:
>> + return nfserr;
>> +out_err:
>> + p = p_in;
>> + ADJUST_ARGS();
>> + goto out;
>> +}
>> +
>> +/* For a given device id, have the file system retrieve and encode the
>> + * associated device. For file layout, the encoding function is
>> + * passed down to the file system. The file system then has the option
>> + * of using this encoding function or one of its own.
>> + *
>> + * Note: the file system must return the XDR size of struct device_addr4
>> + * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
>> + * gdir_mincount calculation.
>> + */
>> +static __be32
>> +nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, int nfserr,
>> + struct nfsd4_pnfs_getdevinfo *gdev)
>> +{
>> + struct super_block *sb;
>> + int maxcount = 0, type_notify_len = 12;
>> + __be32 *p, *p_save = NULL, *p_in = resp->p;
>> + struct exp_xdr_stream xdr;
>> +
>> + dprintk("%s: err %d\n", __func__, nfserr);
>> + if (nfserr)
>> + return nfserr;
>> +
>> + sb = gdev->gd_sb;
>> +
>> + if (gdev->gd_maxcount != 0) {
>> + /* FIXME: this will be bound by the session max response */
>> + maxcount = svc_max_payload(resp->rqstp);
>> + if (maxcount > gdev->gd_maxcount)
>> + maxcount = gdev->gd_maxcount;
>> +
>> + /* Ensure have room for type and notify field */
>> + maxcount -= type_notify_len;
>> + if (maxcount < 0) {
>> + nfserr = -ETOOSMALL;
>> + goto toosmall;
>> + }
>> + }
>> +
>> + RESERVE_SPACE(4);
>> + WRITE32(gdev->gd_layout_type);
>> + ADJUST_ARGS();
>> +
>> + /* If maxcount is 0 then just update notifications */
>> + if (gdev->gd_maxcount == 0)
>> + goto handle_notifications;
>> +
>> + xdr.p = p_save = resp->p;
>> + xdr.end = resp->end;
>> + if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
>> + xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
>> +
>> + nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
>> + &gdev->gd_devid);
>> + if (nfserr) {
>> + /* Rewind to the beginning */
>> + p = p_in;
>> + ADJUST_ARGS();
>> + if (nfserr == -ETOOSMALL)
>> + goto toosmall;
>> + printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
>> + goto out;
>> + }
>> +
>> + /* The file system should never write 0 bytes without
>> + * returning an error
>> + */
>> + BUG_ON(xdr.p == p_save);
>> + BUG_ON(xdr.p > xdr.end);
>> +
>> + /* Update the xdr stream with the number of bytes encoded
>> + * by the file system.
>> + */
>> + p = xdr.p;
>> + ADJUST_ARGS();
>> +
>> +handle_notifications:
>> + /* Encode supported device notifications.
>> + * Note: Currently none are supported.
>> + */
>> + RESERVE_SPACE(4);
>> + WRITE32(0);
>> + ADJUST_ARGS();
>> +
>> +out:
>> + return nfserrno(nfserr);
>> +toosmall:
>> + dprintk("%s: maxcount too small\n", __func__);
>> + RESERVE_SPACE(4);
>> + WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
>> + ADJUST_ARGS();
>> + goto out;
>> +}
>> +#endif /* CONFIG_PNFSD */
>> +
>> static __be32
>> nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
>> {
>> @@ -3122,11 +3368,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
>> [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session,
>> [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
>> [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
>> +#if defined(CONFIG_PNFSD)
>> + [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdevinfo,
>> + [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_getdevlist,
>> + [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
>> + [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
>> + [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
>> +#else /* CONFIG_PNFSD */
>> [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
>> [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
>> [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
>> [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
>> [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
>> +#endif /* CONFIG_PNFSD */
>> [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop,
>> [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
>> [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>> index 83202a1..acb215a 100644
>> --- a/fs/nfsd/xdr4.h
>> +++ b/fs/nfsd/xdr4.h
>> @@ -39,6 +39,8 @@
>> #ifndef _LINUX_NFSD_XDR4_H
>> #define _LINUX_NFSD_XDR4_H
>>
>> +#include <linux/nfsd/nfsd4_pnfs.h>
>> +
>> #include "state.h"
>> #include "nfsd.h"
>>
>> @@ -383,6 +385,22 @@ struct nfsd4_destroy_session {
>> struct nfs4_sessionid sessionid;
>> };
>>
>> +struct nfsd4_pnfs_getdevinfo {
>> + struct nfsd4_pnfs_deviceid gd_devid; /* request */
>> + u32 gd_layout_type; /* request */
>> + u32 gd_maxcount; /* request */
>> + struct super_block *gd_sb;
>> +};
>> +
>> +struct nfsd4_pnfs_getdevlist {
>> + u32 gd_layout_type; /* request */
>> + u32 gd_maxdevices; /* request */
>> + u64 gd_cookie; /* request - response */
>> + u64 gd_verf; /* request - response */
>> + struct svc_fh *gd_fhp; /* response */
>> + u32 gd_eof; /* response */
>> +};
>> +
>> struct nfsd4_op {
>> int opnum;
>> __be32 status;
>> @@ -423,6 +441,10 @@ struct nfsd4_op {
>> struct nfsd4_create_session create_session;
>> struct nfsd4_destroy_session destroy_session;
>> struct nfsd4_sequence sequence;
>> +#if defined(CONFIG_PNFSD)
>> + struct nfsd4_pnfs_getdevlist pnfs_getdevlist;
>> + struct nfsd4_pnfs_getdevinfo pnfs_getdevinfo;
>> +#endif /* CONFIG_PNFSD */
>> } u;
>> struct nfs4_replay * replay;
>> };
>> diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
>> index c44e13d..d68fd14 100644
>> --- a/include/linux/nfsd/nfsd4_pnfs.h
>> +++ b/include/linux/nfsd/nfsd4_pnfs.h
>> @@ -34,6 +34,21 @@
>> #ifndef _LINUX_NFSD_NFSD4_PNFS_H
>> #define _LINUX_NFSD_NFSD4_PNFS_H
>>
>> +#include <linux/exportfs.h>
>> +#include <linux/exp_xdr.h>
>> +
>> +struct nfsd4_pnfs_deviceid {
>> + u64 fsid; /* filesystem ID */
>> + u64 devid; /* filesystem-wide unique device ID */
>> +};
>> +
>> +struct nfsd4_pnfs_dev_iter_res {
>> + u64 gd_cookie; /* request/repsonse */
>> + u64 gd_verf; /* request/repsonse */
>> + u64 gd_devid; /* response */
>> + u32 gd_eof; /* response */
>> +};
>> +
>> /*
>> * pNFS export operations vector.
>> *
>> @@ -47,6 +62,25 @@
>> struct pnfs_export_operations {
>> /* Returns the supported pnfs_layouttype4. */
>> int (*layout_type) (struct super_block *);
>> +
>> + /* Encode device info onto the xdr stream. */
>> + int (*get_device_info) (struct super_block *,
>> + struct exp_xdr_stream *,
>> + u32 layout_type,
>> + const struct nfsd4_pnfs_deviceid *);
>> +
>> + /* Retrieve all available devices via an iterator.
>> + * arg->cookie == 0 indicates the beginning of the list,
>> + * otherwise arg->verf is used to verify that the list hasn't changed
>> + * while retrieved.
>> + *
>> + * On output, the filesystem sets the devid based on the current cookie
>> + * and sets res->cookie and res->verf corresponding to the next entry.
>> + * When the last entry in the list is retrieved, res->eof is set to 1.
>> + */
>> + int (*get_device_iter) (struct super_block *,
>> + u32 layout_type,
>> + struct nfsd4_pnfs_dev_iter_res *);
>> };
>>
>> #endif /* _LINUX_NFSD_NFSD4_PNFS_H */
>> --
>> 1.6.5.1
>>

2009-12-10 19:03:45

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2 11/35] pnfsd: get device list/info

On Thu, Dec 10, 2009 at 08:53:08PM +0200, Benny Halevy wrote:
> On Dec. 10, 2009, 19:30 +0200, " J. Bruce Fields" <[email protected]> wrote:
> > On Mon, Dec 07, 2009 at 11:32:10AM +0200, Benny Halevy wrote:
> >> +static __be32
> >> +nfsd4_getdevinfo(struct svc_rqst *rqstp,
> >> + struct nfsd4_compound_state *cstate,
> >> + struct nfsd4_pnfs_getdevinfo *gdp)
> >> +{
> >> + struct super_block *sb;
> >> + struct svc_export *exp = NULL;
> >> + u32 fsidv = gdp->gd_devid.fsid;
> >> + int status;
> >> +
> >> + dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
> >> + __func__, gdp->gd_layout_type, gdp->gd_devid.fsid,
> >> + gdp->gd_devid.devid, gdp->gd_maxcount);
> >> +
> >> + status = nfserr_inval;
> >> + exp = rqst_exp_find(rqstp, FSID_NUM, &fsidv);
> >
> > As I said before, this seems to require an fsid= option on every pnfs
> > export. We shouldn't need that.
>
> If there a better way to get to the sb given the fsid?
> Since we don't have a current_fh for getdeviceinfo the alternative
> might be to map the deviceids to sb's in the generic layer.

Does the deviceid even need to persist over reboots? If not, then just
an index into an in-memory table might be good enough. If it does need
to persist, then I guess we should look at the filehandle-decoding code
for inspiration.

--b.

2009-12-10 19:21:27

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH v2 11/35] pnfsd: get device list/info

On Dec. 10, 2009, 21:04 +0200, "J. Bruce Fields" <[email protected]> wrote:
> On Thu, Dec 10, 2009 at 08:53:08PM +0200, Benny Halevy wrote:
>> On Dec. 10, 2009, 19:30 +0200, " J. Bruce Fields" <[email protected]> wrote:
>>> On Mon, Dec 07, 2009 at 11:32:10AM +0200, Benny Halevy wrote:
>>>> +static __be32
>>>> +nfsd4_getdevinfo(struct svc_rqst *rqstp,
>>>> + struct nfsd4_compound_state *cstate,
>>>> + struct nfsd4_pnfs_getdevinfo *gdp)
>>>> +{
>>>> + struct super_block *sb;
>>>> + struct svc_export *exp = NULL;
>>>> + u32 fsidv = gdp->gd_devid.fsid;
>>>> + int status;
>>>> +
>>>> + dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
>>>> + __func__, gdp->gd_layout_type, gdp->gd_devid.fsid,
>>>> + gdp->gd_devid.devid, gdp->gd_maxcount);
>>>> +
>>>> + status = nfserr_inval;
>>>> + exp = rqst_exp_find(rqstp, FSID_NUM, &fsidv);
>>> As I said before, this seems to require an fsid= option on every pnfs
>>> export. We shouldn't need that.
>> If there a better way to get to the sb given the fsid?
>> Since we don't have a current_fh for getdeviceinfo the alternative
>> might be to map the deviceids to sb's in the generic layer.
>
> Does the deviceid even need to persist over reboots? If not, then just
> an index into an in-memory table might be good enough. If it does need
> to persist, then I guess we should look at the filehandle-decoding code
> for inspiration.

No, the deviceids need not persist across reboots and we're free to put
anything we want in the major part of the deviceid, leaving the minor
for the filesystem-unique ID.

Benny

>
> --b.

2009-12-10 19:42:54

by J.Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2 11/35] pnfsd: get device list/info

On Thu, Dec 10, 2009 at 09:21:30PM +0200, Benny Halevy wrote:
> On Dec. 10, 2009, 21:04 +0200, "J. Bruce Fields" <[email protected]> wrote:
> > On Thu, Dec 10, 2009 at 08:53:08PM +0200, Benny Halevy wrote:
> >> On Dec. 10, 2009, 19:30 +0200, " J. Bruce Fields" <[email protected]> wrote:
> >>> On Mon, Dec 07, 2009 at 11:32:10AM +0200, Benny Halevy wrote:
> >>>> +static __be32
> >>>> +nfsd4_getdevinfo(struct svc_rqst *rqstp,
> >>>> + struct nfsd4_compound_state *cstate,
> >>>> + struct nfsd4_pnfs_getdevinfo *gdp)
> >>>> +{
> >>>> + struct super_block *sb;
> >>>> + struct svc_export *exp = NULL;
> >>>> + u32 fsidv = gdp->gd_devid.fsid;
> >>>> + int status;
> >>>> +
> >>>> + dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
> >>>> + __func__, gdp->gd_layout_type, gdp->gd_devid.fsid,
> >>>> + gdp->gd_devid.devid, gdp->gd_maxcount);
> >>>> +
> >>>> + status = nfserr_inval;
> >>>> + exp = rqst_exp_find(rqstp, FSID_NUM, &fsidv);
> >>> As I said before, this seems to require an fsid= option on every pnfs
> >>> export. We shouldn't need that.
> >> If there a better way to get to the sb given the fsid?
> >> Since we don't have a current_fh for getdeviceinfo the alternative
> >> might be to map the deviceids to sb's in the generic layer.
> >
> > Does the deviceid even need to persist over reboots? If not, then just
> > an index into an in-memory table might be good enough. If it does need
> > to persist, then I guess we should look at the filehandle-decoding code
> > for inspiration.
>
> No, the deviceids need not persist across reboots and we're free to put
> anything we want in the major part of the deviceid, leaving the minor
> for the filesystem-unique ID.

In that case, managing deviceid's isn't any harder than stateid's or the
rest--just make up something and remember it in some in-memory data
structure.

--b.

2009-12-07 09:29:01

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 01/35] pnfsd: Define CONFIG_PNFSD

Signed-off-by: Ricardo Labiaga <[email protected]>
[rephrased text and moved down to fs/nfsd/Kconfig]
[remove CONFIG_PNFSD's dependency on NFSD_V4_1]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/Kconfig | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 503b9da..05f72e4 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -79,3 +79,13 @@ config NFSD_V4
available from http://linux-nfs.org/.

If unsure, say N.
+
+config PNFSD
+ bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
+ depends on NFSD_V4 && EXPERIMENTAL
+ help
+ This option enables support for the parallel NFS features of the
+ minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
+ in the kernel's NFS server.
+
+ Unless you're an NFS developer, say N.
--
1.6.5.1


2009-12-07 09:29:14

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 02/35] pnfsd: define NFSDDBG_PNFS

[extraced from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
---
include/linux/nfsd/debug.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h
index ee4aa91..9926c24 100644
--- a/include/linux/nfsd/debug.h
+++ b/include/linux/nfsd/debug.h
@@ -32,6 +32,7 @@
#define NFSDDBG_REPCACHE 0x0080
#define NFSDDBG_XDR 0x0100
#define NFSDDBG_LOCKD 0x0200
+#define NFSDDBG_PNFS 0x0400
#define NFSDDBG_ALL 0x7FFF
#define NFSDDBG_NOCHANGE 0xFFFF

--
1.6.5.1


2009-12-07 09:29:28

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 03/35] pnfsd, pnfs: protocol level pnfs constants

[extracted from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: update pNFS server ops to draft 13]
Signed-off-by: Marc Eshel <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
[pnfs: Move getdeviceinfo to draft-19 on client] [for NOTIFY_DEVICEID4_* defs]
Signed-off-by: Benny Halevy <[email protected]>
[pnfs: pnfs_notify_deviceid_type4 should define bitmasks not offsets]
The notify_deviceid_type4 values in the spec denote bit offsets, not
bitmask values. Since we use these constants as bitmasks, just define them
this way.
[removed LAYOUT_PVFS2's definition]
Signed-off-by: Benny Halevy <[email protected]>
[compile fixes for pnfs branch]
Signed-off-by: Fred Isaman <[email protected]>
[removed #ifdef CONFIG_PNFS{,D}]
Signed-off-by: Benny Halevy <[email protected]>
---
include/linux/nfs4.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index c4c0602..a899cff 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -469,6 +469,8 @@ enum lock_type4 {
#define FATTR4_WORD1_TIME_MODIFY (1UL << 21)
#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22)
#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
+#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
+#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)

#define NFSPROC4_NULL 0
#define NFSPROC4_COMPOUND 1
@@ -546,6 +548,49 @@ enum state_protect_how4 {
SP4_SSV = 2
};

+enum pnfs_layouttype {
+ LAYOUT_NFSV4_FILES = 1,
+ LAYOUT_OSD2_OBJECTS = 2,
+ LAYOUT_BLOCK_VOLUME = 3,
+};
+
+/* FIXME: should recall and return types be combined? */
+enum pnfs_layoutrecall_type {
+ RECALL_FILE = 1,
+ RECALL_FSID = 2,
+ RECALL_ALL = 3
+};
+
+enum pnfs_layoutreturn_type {
+ RETURN_FILE = 1,
+ RETURN_FSID = 2,
+ RETURN_ALL = 3
+};
+
+enum pnfs_iomode {
+ IOMODE_READ = 1,
+ IOMODE_RW = 2,
+ IOMODE_ANY = 3,
+};
+
+enum pnfs_notify_deviceid_type4 {
+ NOTIFY_DEVICEID4_CHANGE = 1 << 1,
+ NOTIFY_DEVICEID4_DELETE = 1 << 2,
+};
+
+#define NFL4_UFLG_MASK 0x0000003F
+#define NFL4_UFLG_DENSE 0x00000001
+#define NFL4_UFLG_COMMIT_THRU_MDS 0x00000002
+#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK 0xFFFFFFC0
+
+/* Encoded in the loh_body field of type layouthint4 */
+enum filelayout_hint_care4 {
+ NFLH4_CARE_DENSE = NFL4_UFLG_DENSE,
+ NFLH4_CARE_COMMIT_THRU_MDS = NFL4_UFLG_COMMIT_THRU_MDS,
+ NFLH4_CARE_STRIPE_UNIT_SIZE = 0x00000040,
+ NFLH4_CARE_STRIPE_COUNT = 0x00000080
+};
+
#endif
#endif

--
1.6.5.1


2009-12-07 09:30:37

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 04/35] pnfsd: return pnfs flags on exchange_id

Set the cl_exchange_flags to be non_pnfs if we do not set
either pnfs or ds (in the plain old nfs41 case).

pnfsd: set EXCHGID4_FLAG_USE_NON_PNFS when !CONFIG_PNFSD:
EXCHGID4_FLAG_USE_NON_PNFS should be set when the server does not support
operations (e.g. LAYOUTGET) or attributes that pertain to pNFS.

[extraced from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Fixup nfsd4_set_ex_flags.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: set EXCHGID4_FLAG_USE_NON_PNFS when !CONFIG_PNFSD]
[pnfsd: fix compiler warning in nfsd4_set_ex_flags when CONFIG_PNFSD is not defined]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4state.c | 15 +++++++++++++++
1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2923e6c..9ecbc25 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1098,8 +1098,23 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
static void
nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
{
+#if defined(CONFIG_PNFSD)
+ int mds_and_ds = EXCHGID4_FLAG_USE_PNFS_MDS | EXCHGID4_FLAG_USE_PNFS_DS;
+ int mds_or_ds = 0;
+
+ /* Save the client's MDS or DS flags, or set them both.
+ * XXX We currently do not have a method of determining
+ * what a server supports prior to receiving a filehandle
+ * e.g. at exchange id time. */
+ mds_or_ds = clid->flags & mds_and_ds;
+ if (mds_or_ds)
+ new->cl_exchange_flags |= mds_or_ds;
+ else
+ new->cl_exchange_flags |= mds_and_ds;
+#else /* CONFIG_PNFSD */
/* pNFS is not supported */
new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
+#endif /* CONFIG_PNFSD */

/* Referrals are supported, Migration is not. */
new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
--
1.6.5.1


2009-12-07 09:29:54

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 05/35] pnfsd: don't set up back channel on create_session for ds

From: Dean Hildebrand <[email protected]>

[was pnfsd: Add use of pnfs exchange flags]
Should this code be surrounded by CONFIG_PNFSD?

Signed-off-by: Dean Hildebrand <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4state.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ecbc25..dc9d553 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1349,6 +1349,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cr_ses->flags &= ~SESSION4_PERSIST;
cr_ses->flags &= ~SESSION4_RDMA;

+ if (!(unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
+ (unconf->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS))
+ cr_ses->flags &= ~SESSION4_BACK_CHAN;
+
if (cr_ses->flags & SESSION4_BACK_CHAN) {
unconf->cl_cb_xprt = rqstp->rq_xprt;
svc_xprt_get(unconf->cl_cb_xprt);
--
1.6.5.1


2009-12-07 09:30:08

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 06/35] pnfsd: introduce pnfsd header files

From: Andy Adamson <[email protected]>

pnfsd data structures used internally and over the export API.

[extracted from pnfsd: Initial pNFS server implementation.]
[pnfsd: remove CONFIG_PNFSD from nfsd4_pnfs.h]
Signed-off-by: Andy Adamson <[email protected]>
[moved {include/linux,fs}/nfsd/pnfsd.h]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/pnfsd.h | 37 +++++++++++++++++++++++++++++++++++++
include/linux/nfsd/nfsd4_pnfs.h | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+), 0 deletions(-)
create mode 100644 fs/nfsd/pnfsd.h
create mode 100644 include/linux/nfsd/nfsd4_pnfs.h

diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
new file mode 100644
index 0000000..65fb57e
--- /dev/null
+++ b/fs/nfsd/pnfsd.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2005 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef LINUX_NFSD_PNFSD_H
+#define LINUX_NFSD_PNFSD_H
+
+#endif /* LINUX_NFSD_PNFSD_H */
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
new file mode 100644
index 0000000..9e7d95e
--- /dev/null
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _LINUX_NFSD_NFSD4_PNFS_H
+#define _LINUX_NFSD_NFSD4_PNFS_H
+
+#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
--
1.6.5.1


2009-12-07 09:31:17

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 07/35] pnfsd: define pnfs_export_operations

struct pnfs_export_operations defines the VFS level API for pNFS,
not including callbacks. A pnfs-exportable filesystem sets
a pointer to its pnfs export vector in its struct super_block.s_pnfs_op.

The file system provides the per-superblock layout_type method that
determines if it supports pnfs for the filesystem identified by
the superblock, and if so, with which layout type (only one per-sb is
supported).

Device ops:
get_device_iter is used to fill-in the device list for GETDEVICELIST
and get_device_info is used to encode the device info for GETDEVICEINFO.

Layout ops:
layout_get, layout_commit, and layout_return implement the file system- and
layout type- specific parts of their respective protocol operations: LAYOUTGET,
LAYOUTCOMMIT, and LAYOUTRETURN.

Note: define pnfs export operations in a stub form in this patch.
Actual operations are defined along with their usage.

Note: the following methods are mandatory to be implemented:
layout_type, get_device_info, and layout_get.

[pnfsd: provide default no-op operations]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: compile fixes for pnfsd branch]
Signed-off-by: Fred Isaman <[email protected]>
[gfs2: set pnfs_dlm_export_ops only for CONFIG_PNFSD]
[pnfsd: handle s_pnfs_op==NULL]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/export.c | 1 +
include/linux/fs.h | 2 ++
include/linux/nfsd/nfsd4_pnfs.h | 14 ++++++++++++++
3 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cd377a7..f82ed90 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/exportfs.h>

+#include <linux/nfsd/nfsd4_pnfs.h>
#include <linux/nfsd/syscall.h>
#include <net/ipv6.h>

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2620a8c..9a9beb8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -382,6 +382,7 @@ struct inodes_stat_t {
#include <asm/byteorder.h>

struct export_operations;
+struct pnfs_export_operations;
struct hd_geometry;
struct iovec;
struct nameidata;
@@ -1325,6 +1326,7 @@ struct super_block {
const struct dquot_operations *dq_op;
const struct quotactl_ops *s_qcop;
const struct export_operations *s_export_op;
+ const struct pnfs_export_operations *s_pnfs_op;
unsigned long s_flags;
unsigned long s_magic;
struct dentry *s_root;
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index 9e7d95e..ff6613e 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -34,4 +34,18 @@
#ifndef _LINUX_NFSD_NFSD4_PNFS_H
#define _LINUX_NFSD_NFSD4_PNFS_H

+/*
+ * pNFS export operations vector.
+ *
+ * The filesystem must implement the following methods:
+ * layout_type
+ * get_device_info
+ * layout_get
+ *
+ * All other methods are optional and can be set to NULL if not implemented.
+ */
+struct pnfs_export_operations {
+ /* stub */
+};
+
#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
--
1.6.5.1


2009-12-07 09:30:34

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 08/35] pnfsd: add pnfs export option

From: Andy Adamson <[email protected]>

This is a boolean for now. When more layouttypes are supported, this can
change to "pnfs=", similar to "sec=".

The ctl interface is not enhanced.

Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: fix cosmetic checkpatch warnings]
[pnfsd: test pnfs export option in check_export]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/export.c | 21 ++++++++++++++++++---
include/linux/nfsd/export.h | 1 +
2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index f82ed90..ca745cc 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -356,7 +356,8 @@ static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);

-static int check_export(struct inode *inode, int flags, unsigned char *uuid)
+static int check_export(struct inode *inode, int flags, unsigned char *uuid,
+ bool ex_pnfs)
{

/*
@@ -388,6 +389,14 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid)
return -EINVAL;
}

+ dprintk("%s: s_pnfs_op %p ex_pnfs %d\n", __func__,
+ inode->i_sb->s_pnfs_op, ex_pnfs);
+
+ if (!ex_pnfs) {
+ inode->i_sb->s_pnfs_op = NULL;
+ return 0;
+ }
+
return 0;

}
@@ -579,6 +588,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (exp.ex_uuid == NULL)
err = -ENOMEM;
}
+ } else if (strcmp(buf, "pnfs") == 0) {
+ exp.ex_pnfs = 1;
} else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
else
@@ -592,7 +603,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
}

err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags,
- exp.ex_uuid);
+ exp.ex_uuid, exp.ex_pnfs);
if (err)
goto out4;
}
@@ -653,6 +664,8 @@ static int svc_export_show(struct seq_file *m,
seq_printf(m, "%02x", exp->ex_uuid[i]);
}
}
+ if (exp->ex_pnfs)
+ seq_puts(m, ",pnfs");
show_secinfo(m, exp);
}
seq_puts(m, ")\n");
@@ -680,6 +693,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0;
+ new->ex_pnfs = 0;
}

static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -692,6 +706,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
new->ex_anon_uid = item->ex_anon_uid;
new->ex_anon_gid = item->ex_anon_gid;
new->ex_fsid = item->ex_fsid;
+ new->ex_pnfs = item->ex_pnfs;
new->ex_uuid = item->ex_uuid;
item->ex_uuid = NULL;
new->ex_pathname = item->ex_pathname;
@@ -1030,7 +1045,7 @@ exp_export(struct nfsctl_export *nxp)
goto finish;
}

- err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL);
+ err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL, false);
if (err) goto finish;

err = -ENOMEM;
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 3f17228..268391e 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -98,6 +98,7 @@ struct svc_export {
uid_t ex_anon_uid;
gid_t ex_anon_gid;
int ex_fsid;
+ int ex_pnfs;
unsigned char * ex_uuid; /* 16 byte fsid */
struct nfsd4_fs_locations ex_fslocs;
int ex_nflavors;
--
1.6.5.1


2009-12-07 09:31:43

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 09/35] pnfsd: layout verify

Verify whether the server and file system support the given layout type.

[was pnfsd: Streamline error code checking for non-pnfs filesystems]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: Add super block to layout_type()]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Fix order of ops in nfsd4_layout_verify]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: convert generic code to use new pnfs api]
[pnfsd: define pnfs_export_operations]
[pnfsd: obliterate old vfs api]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: layout verify all layout types]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: tone nfsd4_layout_verify printk down to dprintk]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: check ex_pnfs in nfsd4_verify_layout]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: handle s_pnfs_op==NULL]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/export.c | 6 ++++++
fs/nfsd/nfs4proc.c | 34 ++++++++++++++++++++++++++++++++++
fs/nfsd/pnfsd.h | 2 ++
include/linux/nfsd/nfsd4_pnfs.h | 3 ++-
4 files changed, 44 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ca745cc..7c3fa87 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -397,6 +397,12 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid,
return 0;
}

+ if (inode->i_sb->s_pnfs_op &&
+ !inode->i_sb->s_pnfs_op->layout_type) {
+ dprintk("exp_export: export of invalid fs pnfs export ops.\n");
+ return -EINVAL;
+ }
+
return 0;

}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e2b5666..4c78642 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -39,6 +39,7 @@
#include "cache.h"
#include "xdr4.h"
#include "vfs.h"
+#include "pnfsd.h"

#define NFSDDBG_FACILITY NFSDDBG_PROC

@@ -936,6 +937,39 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status == nfserr_same ? nfs_ok : status;
}

+#if defined(CONFIG_PNFSD)
+static __be32
+nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
+ unsigned int layout_type)
+{
+ int status, type;
+
+ /* check to see if pNFS is supported. */
+ status = nfserr_layoutunavailable;
+ if (exp->ex_pnfs == 0 || !sb->s_pnfs_op || !sb->s_pnfs_op->layout_type) {
+ dprintk("%s: Underlying file system "
+ "does not support pNFS\n", __func__);
+ goto out;
+ }
+
+ type = sb->s_pnfs_op->layout_type(sb);
+
+ /* check to see if requested layout type is supported. */
+ status = nfserr_unknown_layouttype;
+ if (!type)
+ dprintk("BUG: %s: layout_type 0 is reserved and must not be "
+ "used by filesystem\n", __func__);
+ else if (type != layout_type)
+ dprintk("%s: requested layout type %d "
+ "does not match supported type %d\n",
+ __func__, layout_type, type);
+ else
+ status = nfs_ok;
+out:
+ return status;
+}
+#endif /* CONFIG_PNFSD */
+
/*
* NULL call.
*/
diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 65fb57e..7c46791 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -34,4 +34,6 @@
#ifndef LINUX_NFSD_PNFSD_H
#define LINUX_NFSD_PNFSD_H

+#include <linux/nfsd/nfsd4_pnfs.h>
+
#endif /* LINUX_NFSD_PNFSD_H */
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index ff6613e..c44e13d 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -45,7 +45,8 @@
* All other methods are optional and can be set to NULL if not implemented.
*/
struct pnfs_export_operations {
- /* stub */
+ /* Returns the supported pnfs_layouttype4. */
+ int (*layout_type) (struct super_block *);
};

#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
--
1.6.5.1


2009-12-07 09:31:00

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 10/35] pnfsd: introduce exp_xdr.h

Containing xdr encoding helpers.

[nfsd: fix exp_xdr_encode_u64 parameter type]
Reported-by: J. Bruce Fields <[email protected]>
[exportfs: exp_xdr.h: Use #include <linux/string.h> instead of <asm/string.h>]
Signed-off-by: Benny Halevy <[email protected]>
---
include/linux/exp_xdr.h | 141 +++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 141 insertions(+), 0 deletions(-)
create mode 100644 include/linux/exp_xdr.h

diff --git a/include/linux/exp_xdr.h b/include/linux/exp_xdr.h
new file mode 100644
index 0000000..b69c309
--- /dev/null
+++ b/include/linux/exp_xdr.h
@@ -0,0 +1,141 @@
+#ifndef _LINUX_EXP_XDR_H
+#define _LINUX_EXP_XDR_H
+
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+#include <linux/string.h>
+
+struct exp_xdr_stream {
+ __be32 *p;
+ __be32 *end;
+};
+
+/**
+ * exp_xdr_qwords - Calculate the number of quad-words holding nbytes
+ * @nbytes: number of bytes to encode
+ */
+static inline size_t
+exp_xdr_qwords(__u32 nbytes)
+{
+ return DIV_ROUND_UP(nbytes, 4);
+}
+
+/**
+ * exp_xdr_qbytes - Calculate the number of bytes holding qwords
+ * @qwords: number of quad-words to encode
+ */
+static inline size_t
+exp_xdr_qbytes(size_t qwords)
+{
+ return qwords << 2;
+}
+
+/**
+ * exp_xdr_reserve_space - Reserve buffer space for sending
+ * @xdr: pointer to exp_xdr_stream
+ * @nbytes: number of bytes to reserve
+ *
+ * Checks that we have enough buffer space to encode 'nbytes' more
+ * bytes of data. If so, update the xdr stream.
+ */
+static inline __be32 *
+exp_xdr_reserve_space(struct exp_xdr_stream *xdr, size_t nbytes)
+{
+ __be32 *p = xdr->p;
+ __be32 *q;
+
+ /* align nbytes on the next 32-bit boundary */
+ q = p + exp_xdr_qwords(nbytes);
+ if (unlikely(q > xdr->end || q < p))
+ return NULL;
+ xdr->p = q;
+ return p;
+}
+
+/**
+ * exp_xdr_reserve_qwords - Reserve buffer space for sending
+ * @xdr: pointer to exp_xdr_stream
+ * @nwords: number of quad words (u32's) to reserve
+ */
+static inline __be32 *
+exp_xdr_reserve_qwords(struct exp_xdr_stream *xdr, size_t qwords)
+{
+ return exp_xdr_reserve_space(xdr, exp_xdr_qbytes(qwords));
+}
+
+/**
+ * exp_xdr_encode_u32 - Encode an unsigned 32-bit value onto a xdr stream
+ * @p: pointer to encoding destination
+ * @val: value to encode
+ */
+static inline __be32 *
+exp_xdr_encode_u32(__be32 *p, __u32 val)
+{
+ *p = cpu_to_be32(val);
+ return p + 1;
+}
+
+/**
+ * exp_xdr_encode_u64 - Encode an unsigned 64-bit value onto a xdr stream
+ * @p: pointer to encoding destination
+ * @val: value to encode
+ */
+static inline __be32 *
+exp_xdr_encode_u64(__be32 *p, __u64 val)
+{
+ put_unaligned_be64(val, p);
+ return p + 2;
+}
+
+/**
+ * exp_xdr_encode_bytes - Encode an array of bytes onto a xdr stream
+ * @p: pointer to encoding destination
+ * @ptr: pointer to the array of bytes
+ * @nbytes: number of bytes to encode
+ */
+static inline __be32 *
+exp_xdr_encode_bytes(__be32 *p, const void *ptr, __u32 nbytes)
+{
+ if (likely(nbytes != 0)) {
+ unsigned int qwords = exp_xdr_qwords(nbytes);
+ unsigned int padding = exp_xdr_qbytes(qwords) - nbytes;
+
+ memcpy(p, ptr, nbytes);
+ if (padding != 0)
+ memset((char *)p + nbytes, 0, padding);
+ p += qwords;
+ }
+ return p;
+}
+
+/**
+ * exp_xdr_encode_opaque - Encode an opaque type onto a xdr stream
+ * @p: pointer to encoding destination
+ * @ptr: pointer to the opaque array
+ * @nbytes: number of bytes to encode
+ *
+ * Encodes the 32-bit opaque size in bytes followed by the opaque value.
+ */
+static inline __be32 *
+exp_xdr_encode_opaque(__be32 *p, const void *ptr, __u32 nbytes)
+{
+ p = exp_xdr_encode_u32(p, nbytes);
+ return exp_xdr_encode_bytes(p, ptr, nbytes);
+}
+
+/**
+ * exp_xdr_encode_opaque_qlen - Encode the opaque length onto a xdr stream
+ * @lenp: pointer to the opaque length destination
+ * @endp: pointer to the end of the opaque array
+ *
+ * Encodes the 32-bit opaque size in bytes given the start and end pointers
+ */
+static inline __be32 *
+exp_xdr_encode_opaque_len(__be32 *lenp, const void *endp)
+{
+ size_t nbytes = (char *)endp - (char *)(lenp + 1);
+
+ exp_xdr_encode_u32(lenp, nbytes);
+ return lenp + 1 + exp_xdr_qwords(nbytes);
+}
+#endif /* _LINUX_EXP_XDR_H */
--
1.6.5.1


2009-12-07 09:31:15

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 11/35] pnfsd: get device list/info

Implement the generic handling of GETDEVICELIST and GETDEVICEINFO.

After verifying that the requested layout type is supported,
getdevlist uses the get_device_iter pnfs export method
to encode the list of deviceids and get the cookie, verifier,
and eof flag to be used be the client to iterate through
the whole device list.

Getdevinfo uses the get_device_info pnfs export method
to encode the device info for the given deviceid.

The filesystem can choose to return valid cookie and cookieverf
on eof, pointing at the end of the device list so that subsequent
calls to GETDEVIE LIST will return an empty list.

Note that with the file layout, lots of devices are sent under a
single device id, so the client will need to send a relatively
large value of maxcount.

If maxcount is 0 then just update notifications.
The nfsv4.1 spec forbids returning ETOOSMALL in this case.
It is up to the implementor of the get_device_info method
to verify the deviceid in this case and return no
info for it.

If no notifications are given represent gdir_notification as an empty
bitmap array rather than one consisting of a single zeroed entry.
Thanks to Dean Hildebrand for suggesting this optimization
and to Peter Staubach for convincing that it's worth it.

[extracted from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: update pNFS server ops to draft 13]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Fix server getdevicelist update to draft 13]
Signed-off-by: Andy Adamson<[email protected]>
[pnfsd: update pNFS server ops to draft 13]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Fix server GETDEVICELIST to comply with NFSv4.1 Draft 13]
Signed-off-by: Ricardo Labiaga <[email protected]>
[pnfsd: Streamline error code checking for non-pnfs filesystems]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: Simplify device export ops.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfs: fix compile problems if CONFIG_PNFS turned off - exportfs.h]
Signed-off-by: Fred Isaman <[email protected]>
[pnfsd: Implement getdevlist maxcount checking.]
[pnfsd: use nfs error codes]
[pnfsd: Use 128 bit deviceid on server]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: fix warning in nfsd4_encode_devlist_iterator()]
Signed-off-by: Mike Sager <[email protected]>
[pnfsd: Update getdeviceinfo for draft-19]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: encode empty getdeviceinfo notify bitmap rather than zeroed]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: do not depend on the current file handle in getdeviceinfo]
[pnfsd: update export hold count]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Update getdevlist for draft 19]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: fix GETDEVICELIST encoding]
Signed-off-by: Mike Sager <[email protected]>
[pnfsd: use nfsd4_compoundres pointer in pnfs_xdr_info]
[pnfsd: fix NFS4ERR_TOOSMALL for getdeviceinfo]
[pnfsd: enable multipage getdeviceinfo da_addr_body]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: move vfs api structures to nfsd4_pnfs.h]
[pnfsd: convert generic code to use new pnfs api]
[pnfsd: define pnfs_export_operations]
[pnfsd: obliterate old vfs api]
[pnfsd: fixup ENCODE_HEAD for getdevicelist/info]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: get device list/info all layout types]
[pnfsd: check ex_pnfs in nfsd4_verify_layout]
Signed-off-by: Andy Adamson <[email protected]>
[removed nfsd4_pnfs_fl_getdev{info,iter} stubs]
[pnfsd: filelayout: convert to using exp_xdr]
[pnfsd: get rid of getdevinfo notify_types]
[pnfsd: copy getdevinfo deviceid in one piece]
[pnfsd: rename deviceid_t struct pnfs_deviceid]
[pnfsd: fix cosmetic checkpatch warnings]
[pnfsd: handle s_pnfs_op==NULL]
[pnfsd: move getdevinfo xdr structure to private header]
[pnfsd: clean up getdeviceinfo export op API]
[pnfsd: getdeviceinfo deviceid needs to be const.]
[pnfsd: allow returning empty device list.]
[pnfsd: return NFS4ERR_INVAL when maxdevices is zero.]
[pnfsd: move getdevlist xdr structure to private header]
[pnfsd: dev_iter: clean up export API]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/export.c | 3 +-
fs/nfsd/nfs4proc.c | 101 ++++++++++++++++
fs/nfsd/nfs4xdr.c | 254 +++++++++++++++++++++++++++++++++++++++
fs/nfsd/xdr4.h | 22 ++++
include/linux/nfsd/nfsd4_pnfs.h | 34 +++++
5 files changed, 413 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7c3fa87..d847dd2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -398,7 +398,8 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid,
}

if (inode->i_sb->s_pnfs_op &&
- !inode->i_sb->s_pnfs_op->layout_type) {
+ (!inode->i_sb->s_pnfs_op->layout_type ||
+ !inode->i_sb->s_pnfs_op->get_device_info)) {
dprintk("exp_export: export of invalid fs pnfs export ops.\n");
return -EINVAL;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4c78642..8747ddf 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -968,6 +968,96 @@ nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
out:
return status;
}
+
+static __be32
+nfsd4_getdevlist(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_pnfs_getdevlist *gdlp)
+{
+ struct super_block *sb;
+ struct svc_fh *current_fh = &cstate->current_fh;
+ int status;
+
+ dprintk("%s: type %u maxdevices %u cookie %llu verf %llu\n",
+ __func__, gdlp->gd_layout_type, gdlp->gd_maxdevices,
+ gdlp->gd_cookie, gdlp->gd_verf);
+
+
+ status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ goto out;
+
+ status = nfserr_inval;
+ sb = current_fh->fh_dentry->d_inode->i_sb;
+ if (!sb)
+ goto out;
+
+ /* We must be able to encode at list one device */
+ if (!gdlp->gd_maxdevices)
+ goto out;
+
+ /* Ensure underlying file system supports pNFS and,
+ * if so, the requested layout type
+ */
+ status = nfsd4_layout_verify(sb, current_fh->fh_export,
+ gdlp->gd_layout_type);
+ if (status)
+ goto out;
+
+ /* Do nothing if underlying file system does not support
+ * getdevicelist */
+ if (!sb->s_pnfs_op->get_device_iter) {
+ status = nfserr_notsupp;
+ goto out;
+ }
+
+ /* Set up arguments so device can be retrieved at encode time */
+ gdlp->gd_fhp = &cstate->current_fh;
+out:
+ return status;
+}
+
+static __be32
+nfsd4_getdevinfo(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_pnfs_getdevinfo *gdp)
+{
+ struct super_block *sb;
+ struct svc_export *exp = NULL;
+ u32 fsidv = gdp->gd_devid.fsid;
+ int status;
+
+ dprintk("%s: layout_type %u dev_id %llx:%llx maxcnt %u\n",
+ __func__, gdp->gd_layout_type, gdp->gd_devid.fsid,
+ gdp->gd_devid.devid, gdp->gd_maxcount);
+
+ status = nfserr_inval;
+ exp = rqst_exp_find(rqstp, FSID_NUM, &fsidv);
+ dprintk("%s: exp %p\n", __func__, exp);
+ if (IS_ERR(exp)) {
+ status = nfserrno(PTR_ERR(exp));
+ exp = NULL;
+ goto out;
+ }
+ sb = exp->ex_path.dentry->d_inode->i_sb;
+ dprintk("%s: sb %p\n", __func__, sb);
+ if (!sb)
+ goto out;
+
+ /* Ensure underlying file system supports pNFS and,
+ * if so, the requested layout type
+ */
+ status = nfsd4_layout_verify(sb, exp, gdp->gd_layout_type);
+ if (status)
+ goto out;
+
+ /* Set up arguments so device can be retrieved at encode time */
+ gdp->gd_sb = sb;
+out:
+ if (exp)
+ exp_put(exp);
+ return status;
+}
#endif /* CONFIG_PNFSD */

/*
@@ -1330,6 +1420,17 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_SEQUENCE",
},
+#if defined(CONFIG_PNFSD)
+ [OP_GETDEVICELIST] = {
+ .op_func = (nfsd4op_func)nfsd4_getdevlist,
+ .op_name = "OP_GETDEVICELIST",
+ },
+ [OP_GETDEVICEINFO] = {
+ .op_func = (nfsd4op_func)nfsd4_getdevinfo,
+ .op_flags = ALLOWED_WITHOUT_FH,
+ .op_name = "OP_GETDEVICEINFO",
+ },
+#endif /* CONFIG_PNFSD */
};

static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a8587e9..955f583 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -46,6 +46,7 @@
#include <linux/nfsd_idmap.h>
#include <linux/nfs4_acl.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/exportfs.h>

#include "xdr4.h"
#include "vfs.h"
@@ -1233,6 +1234,42 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
DECODE_TAIL;
}

+#if defined(CONFIG_PNFSD)
+static __be32
+nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
+ struct nfsd4_pnfs_getdevlist *gdevl)
+{
+ DECODE_HEAD;
+
+ READ_BUF(16 + sizeof(nfs4_verifier));
+ READ32(gdevl->gd_layout_type);
+ READ32(gdevl->gd_maxdevices);
+ READ64(gdevl->gd_cookie);
+ COPYMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
+
+ DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
+ struct nfsd4_pnfs_getdevinfo *gdev)
+{
+ u32 num;
+ DECODE_HEAD;
+
+ READ_BUF(12 + sizeof(struct nfsd4_pnfs_deviceid));
+ READ64(gdev->gd_devid.fsid);
+ READ64(gdev->gd_devid.devid);
+ READ32(gdev->gd_layout_type);
+ READ32(gdev->gd_maxcount);
+ READ32(num);
+ if (num)
+ READ_BUF(4); /* TODO: for now, just skip notify_types */
+
+ DECODE_TAIL;
+}
+#endif /* CONFIG_PNFSD */
+
static __be32
nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
{
@@ -1334,11 +1371,19 @@ static nfsd4_dec nfsd41_dec_ops[] = {
[OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
[OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+#if defined(CONFIG_PNFSD)
+ [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdevinfo,
+ [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_getdevlist,
+ [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
+#else /* CONFIG_PNFSD */
[OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
+#endif /* CONFIG_PNFSD */
[OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
[OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -3062,6 +3107,207 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
return 0;
}

+#if defined(CONFIG_PNFSD)
+
+/* Uses the export interface to iterate through the available devices
+ * and encodes them on the response stream.
+ */
+static __be32
+nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
+ struct nfsd4_pnfs_getdevlist *gdevl,
+ unsigned int *dev_count)
+{
+ struct super_block *sb = gdevl->gd_fhp->fh_dentry->d_inode->i_sb;
+ __be32 nfserr;
+ int status;
+ __be32 *p;
+ struct nfsd4_pnfs_dev_iter_res res = {
+ .gd_cookie = gdevl->gd_cookie,
+ .gd_verf = gdevl->gd_verf,
+ .gd_eof = 0
+ };
+
+ dprintk("%s: Begin\n", __func__);
+
+ *dev_count = 0;
+ do {
+ status = sb->s_pnfs_op->get_device_iter(sb,
+ gdevl->gd_layout_type,
+ &res);
+ if (status) {
+ if (status == -ENOENT) {
+ res.gd_eof = 1;
+ /* return success */
+ break;
+ }
+ nfserr = nfserrno(status);
+ goto out_err;
+ }
+
+ /* Encode device id and layout type */
+ RESERVE_SPACE(sizeof(struct nfsd4_pnfs_deviceid));
+ WRITE64((__be64)gdevl->gd_fhp->fh_export->ex_fsid);
+ WRITE64(res.gd_devid); /* devid minor */
+ ADJUST_ARGS();
+ (*dev_count)++;
+ } while (*dev_count < gdevl->gd_maxdevices && !res.gd_eof);
+ gdevl->gd_cookie = res.gd_cookie;
+ gdevl->gd_verf = res.gd_verf;
+ gdevl->gd_eof = res.gd_eof;
+ nfserr = nfs_ok;
+out_err:
+ dprintk("%s: Encoded %u devices\n", __func__, *dev_count);
+ return nfserr;
+}
+
+/* Encodes the response of get device list.
+*/
+static __be32
+nfsd4_encode_getdevlist(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_pnfs_getdevlist *gdevl)
+{
+ unsigned int dev_count = 0, lead_count;
+ u32 *p_in = resp->p;
+ __be32 *p;
+
+ dprintk("%s: err %d\n", __func__, nfserr);
+ if (nfserr)
+ return nfserr;
+
+ /* Ensure we have room for cookie, verifier, and devlist len,
+ * which we will backfill in after we encode as many devices as possible
+ */
+ lead_count = 8 + sizeof(nfs4_verifier) + 4;
+ RESERVE_SPACE(lead_count);
+ /* skip past these values */
+ p += XDR_QUADLEN(lead_count);
+ ADJUST_ARGS();
+
+ /* Iterate over as many device ids as possible on the xdr stream */
+ nfserr = nfsd4_encode_devlist_iterator(resp, gdevl, &dev_count);
+ if (nfserr)
+ goto out_err;
+
+ /* Backfill in cookie, verf and number of devices encoded */
+ p = p_in;
+ WRITE64(gdevl->gd_cookie);
+ WRITEMEM(&gdevl->gd_verf, sizeof(nfs4_verifier));
+ WRITE32(dev_count);
+
+ /* Skip over devices */
+ p += XDR_QUADLEN(dev_count * sizeof(struct nfsd4_pnfs_deviceid));
+ ADJUST_ARGS();
+
+ /* are we at the end of devices? */
+ RESERVE_SPACE(4);
+ WRITE32(gdevl->gd_eof);
+ ADJUST_ARGS();
+
+ dprintk("%s: done.\n", __func__);
+
+ nfserr = nfs_ok;
+out:
+ return nfserr;
+out_err:
+ p = p_in;
+ ADJUST_ARGS();
+ goto out;
+}
+
+/* For a given device id, have the file system retrieve and encode the
+ * associated device. For file layout, the encoding function is
+ * passed down to the file system. The file system then has the option
+ * of using this encoding function or one of its own.
+ *
+ * Note: the file system must return the XDR size of struct device_addr4
+ * da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL for the
+ * gdir_mincount calculation.
+ */
+static __be32
+nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_pnfs_getdevinfo *gdev)
+{
+ struct super_block *sb;
+ int maxcount = 0, type_notify_len = 12;
+ __be32 *p, *p_save = NULL, *p_in = resp->p;
+ struct exp_xdr_stream xdr;
+
+ dprintk("%s: err %d\n", __func__, nfserr);
+ if (nfserr)
+ return nfserr;
+
+ sb = gdev->gd_sb;
+
+ if (gdev->gd_maxcount != 0) {
+ /* FIXME: this will be bound by the session max response */
+ maxcount = svc_max_payload(resp->rqstp);
+ if (maxcount > gdev->gd_maxcount)
+ maxcount = gdev->gd_maxcount;
+
+ /* Ensure have room for type and notify field */
+ maxcount -= type_notify_len;
+ if (maxcount < 0) {
+ nfserr = -ETOOSMALL;
+ goto toosmall;
+ }
+ }
+
+ RESERVE_SPACE(4);
+ WRITE32(gdev->gd_layout_type);
+ ADJUST_ARGS();
+
+ /* If maxcount is 0 then just update notifications */
+ if (gdev->gd_maxcount == 0)
+ goto handle_notifications;
+
+ xdr.p = p_save = resp->p;
+ xdr.end = resp->end;
+ if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
+ xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
+
+ nfserr = sb->s_pnfs_op->get_device_info(sb, &xdr, gdev->gd_layout_type,
+ &gdev->gd_devid);
+ if (nfserr) {
+ /* Rewind to the beginning */
+ p = p_in;
+ ADJUST_ARGS();
+ if (nfserr == -ETOOSMALL)
+ goto toosmall;
+ printk(KERN_ERR "%s: export ERROR %d\n", __func__, nfserr);
+ goto out;
+ }
+
+ /* The file system should never write 0 bytes without
+ * returning an error
+ */
+ BUG_ON(xdr.p == p_save);
+ BUG_ON(xdr.p > xdr.end);
+
+ /* Update the xdr stream with the number of bytes encoded
+ * by the file system.
+ */
+ p = xdr.p;
+ ADJUST_ARGS();
+
+handle_notifications:
+ /* Encode supported device notifications.
+ * Note: Currently none are supported.
+ */
+ RESERVE_SPACE(4);
+ WRITE32(0);
+ ADJUST_ARGS();
+
+out:
+ return nfserrno(nfserr);
+toosmall:
+ dprintk("%s: maxcount too small\n", __func__);
+ RESERVE_SPACE(4);
+ WRITE32((p_save ? (xdr.p - p_save) * 4 : 0) + type_notify_len);
+ ADJUST_ARGS();
+ goto out;
+}
+#endif /* CONFIG_PNFSD */
+
static __be32
nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
{
@@ -3122,11 +3368,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session,
[OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
[OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+#if defined(CONFIG_PNFSD)
+ [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdevinfo,
+ [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_getdevlist,
+ [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+#else /* CONFIG_PNFSD */
[OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
[OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+#endif /* CONFIG_PNFSD */
[OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
[OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 83202a1..acb215a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -39,6 +39,8 @@
#ifndef _LINUX_NFSD_XDR4_H
#define _LINUX_NFSD_XDR4_H

+#include <linux/nfsd/nfsd4_pnfs.h>
+
#include "state.h"
#include "nfsd.h"

@@ -383,6 +385,22 @@ struct nfsd4_destroy_session {
struct nfs4_sessionid sessionid;
};

+struct nfsd4_pnfs_getdevinfo {
+ struct nfsd4_pnfs_deviceid gd_devid; /* request */
+ u32 gd_layout_type; /* request */
+ u32 gd_maxcount; /* request */
+ struct super_block *gd_sb;
+};
+
+struct nfsd4_pnfs_getdevlist {
+ u32 gd_layout_type; /* request */
+ u32 gd_maxdevices; /* request */
+ u64 gd_cookie; /* request - response */
+ u64 gd_verf; /* request - response */
+ struct svc_fh *gd_fhp; /* response */
+ u32 gd_eof; /* response */
+};
+
struct nfsd4_op {
int opnum;
__be32 status;
@@ -423,6 +441,10 @@ struct nfsd4_op {
struct nfsd4_create_session create_session;
struct nfsd4_destroy_session destroy_session;
struct nfsd4_sequence sequence;
+#if defined(CONFIG_PNFSD)
+ struct nfsd4_pnfs_getdevlist pnfs_getdevlist;
+ struct nfsd4_pnfs_getdevinfo pnfs_getdevinfo;
+#endif /* CONFIG_PNFSD */
} u;
struct nfs4_replay * replay;
};
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index c44e13d..d68fd14 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -34,6 +34,21 @@
#ifndef _LINUX_NFSD_NFSD4_PNFS_H
#define _LINUX_NFSD_NFSD4_PNFS_H

+#include <linux/exportfs.h>
+#include <linux/exp_xdr.h>
+
+struct nfsd4_pnfs_deviceid {
+ u64 fsid; /* filesystem ID */
+ u64 devid; /* filesystem-wide unique device ID */
+};
+
+struct nfsd4_pnfs_dev_iter_res {
+ u64 gd_cookie; /* request/repsonse */
+ u64 gd_verf; /* request/repsonse */
+ u64 gd_devid; /* response */
+ u32 gd_eof; /* response */
+};
+
/*
* pNFS export operations vector.
*
@@ -47,6 +62,25 @@
struct pnfs_export_operations {
/* Returns the supported pnfs_layouttype4. */
int (*layout_type) (struct super_block *);
+
+ /* Encode device info onto the xdr stream. */
+ int (*get_device_info) (struct super_block *,
+ struct exp_xdr_stream *,
+ u32 layout_type,
+ const struct nfsd4_pnfs_deviceid *);
+
+ /* Retrieve all available devices via an iterator.
+ * arg->cookie == 0 indicates the beginning of the list,
+ * otherwise arg->verf is used to verify that the list hasn't changed
+ * while retrieved.
+ *
+ * On output, the filesystem sets the devid based on the current cookie
+ * and sets res->cookie and res->verf corresponding to the next entry.
+ * When the last entry in the list is retrieved, res->eof is set to 1.
+ */
+ int (*get_device_iter) (struct super_block *,
+ u32 layout_type,
+ struct nfsd4_pnfs_dev_iter_res *);
};

#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
--
1.6.5.1


2009-12-07 09:32:25

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 12/35] pnfsd: filelayout: get device list/info

Calculate the size of the opaque device_addr4 da_addr_body. Use this size to
compare to the client's gdia_maxcount, and if it's not too small, to reserve
the xdr space once.

Require the file system get_device_info call to return the XDR size of the
device_addr4 da_addr_body in pnfs_xdr_info.bytes_written on NFS4ERR_TOOSMALL
for the gdir_mincount calculation.

Declare a call back into the file system for encoding a multipage stripe
indice.

[extraced from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: update pNFS server ops to draft 13]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Fix server GETDEVICELIST to comply with NFSv4.1 Draft 13]
Signed-off-by: Ricardo Labiaga <[email protected]>
[pnfsd: Simplify device export ops.]
[pnfsd: Remove device enc/free export ops]
[pnfsd: Use 128 bit deviceid on server]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: filelayout: use nfsd4_compoundres pointer in pnfs_xdr_info]
[pnfsd: filelayout: fix NFS4ERR_TOOSMALL for getdeviceinfo]
[pnfsd: fix filelayout getdeviceinfo devaddr4 length encoding]
[pnfsd: file layout mulitpage getdeviceinfo encode callback]
[Used gfs2_get_device_info from
pnfs-gfs2: initial GETDEVICE* work for pNFS/GFS2 integration]
Signed-off-by: David M. Richter <[email protected]>
Signed-off-by: Frank Filz <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
[pnfs-gfs2: return correct error value in GETDEVICEINFO]
Signed-off-by: David M. Richter <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: filelayout: get rid of xdr encoding macros for file layout xdr]
[pnfsd: filelayout: move xdr declarations to nfs4layoutxdr.h]
[pnfsd: get rid of devinfo encoding function vector]
[pnfsd: filelayout: strictly define filelayout_encode_devinfo]
[pnfsd: mv nfs4filelayoutxdr to fs/exportfs]
[pnfsd: filelayout: convert to using exp_xdr]
[exportfs: filelayout: disable dprintk]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: exportfs: fix build warning]
Signed-off-by: Boaz Harrosh <[email protected]>
[pnfsd: rename deviceid_t struct pnfs_deviceid]
[pnfsd: fix cosmetic checkpatch warnings]
[conditionally build nfs4filelayoutxdr using config option]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/Kconfig | 9 +++
fs/exportfs/Makefile | 3 +-
fs/exportfs/nfs4filelayoutxdr.c | 132 ++++++++++++++++++++++++++++++++++++
fs/nfsd/Kconfig | 1 +
fs/nfsd/nfs4proc.c | 1 +
fs/nfsd/nfs4xdr.c | 1 +
include/linux/exportfs.h | 9 +++
include/linux/nfsd/debug.h | 1 +
include/linux/nfsd/nfs4layoutxdr.h | 58 ++++++++++++++++
9 files changed, 214 insertions(+), 1 deletions(-)
create mode 100644 fs/exportfs/nfs4filelayoutxdr.c
create mode 100644 include/linux/nfsd/nfs4layoutxdr.h

diff --git a/fs/Kconfig b/fs/Kconfig
index 64d44ef..c894391 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -223,6 +223,15 @@ config LOCKD_V4
config EXPORTFS
tristate

+config EXPORTFS_FILE_LAYOUT
+ bool "Exportfs support for the NFSv4.1 files layout type"
+ depends on PNFSD && EXPORTFS
+ help
+ Say Y here if you want exportfs support for the NFSv4.1
+ files layout type.
+
+ If unsure, say N.
+
config NFS_ACL_SUPPORT
tristate
select FS_POSIX_ACL
diff --git a/fs/exportfs/Makefile b/fs/exportfs/Makefile
index d7c5d4d..658207d 100644
--- a/fs/exportfs/Makefile
+++ b/fs/exportfs/Makefile
@@ -3,4 +3,5 @@

obj-$(CONFIG_EXPORTFS) += exportfs.o

-exportfs-objs := expfs.o
+exportfs-y := expfs.o
+exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT) += nfs4filelayoutxdr.o
diff --git a/fs/exportfs/nfs4filelayoutxdr.c b/fs/exportfs/nfs4filelayoutxdr.c
new file mode 100644
index 0000000..b9c24d2
--- /dev/null
+++ b/fs/exportfs/nfs4filelayoutxdr.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/exp_xdr.h>
+#include <linux/nfsd/nfs4layoutxdr.h>
+
+/* We do our-own dprintk so filesystems are not dependent on sunrpc */
+#ifdef dprintk
+#undef dprintk
+#endif
+#define dprintk(fmt, args, ...) do { } while (0)
+
+/* Calculate the XDR length of the GETDEVICEINFO4resok structure
+ * excluding the gdir_notification and the gdir_device_addr da_layout_type.
+ */
+static int fl_devinfo_xdr_words(const struct pnfs_filelayout_device *fdev)
+{
+ struct pnfs_filelayout_devaddr *fl_addr;
+ struct pnfs_filelayout_multipath *mp;
+ int i, j, nwords;
+
+ /* da_addr_body length, indice length, indices,
+ * multipath_list4 length */
+ nwords = 1 + 1 + fdev->fl_stripeindices_length + 1;
+ for (i = 0; i < fdev->fl_device_length; i++) {
+ mp = &fdev->fl_device_list[i];
+ nwords++; /* multipath list length */
+ for (j = 0; j < mp->fl_multipath_length; j++) {
+ fl_addr = mp->fl_multipath_list;
+ nwords += 1 + exp_xdr_qwords(fl_addr->r_netid.len);
+ nwords += 1 + exp_xdr_qwords(fl_addr->r_addr.len);
+ }
+ }
+ dprintk("<-- %s nwords %d\n", __func__, nwords);
+ return nwords;
+}
+
+/* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
+ * on the response stream.
+ * Use linux error codes (not nfs) since these values are being
+ * returned to the file system.
+ */
+int
+filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
+ const struct pnfs_filelayout_device *fdev)
+{
+ unsigned int i, j, len = 0, opaque_words;
+ u32 *p_in;
+ u32 index_count = fdev->fl_stripeindices_length;
+ u32 dev_count = fdev->fl_device_length;
+ int error = 0;
+ __be32 *p;
+
+ opaque_words = fl_devinfo_xdr_words(fdev);
+ dprintk("%s: Begin indx_cnt: %u dev_cnt: %u total size %u\n",
+ __func__,
+ index_count,
+ dev_count,
+ opaque_words*4);
+
+ /* check space for opaque length */
+ p = p_in = exp_xdr_reserve_qwords(xdr, opaque_words);
+ if (!p) {
+ error = -ETOOSMALL;
+ goto out;
+ }
+
+ /* Fill in length later */
+ p++;
+
+ /* encode device list indices */
+ p = exp_xdr_encode_u32(p, index_count);
+ for (i = 0; i < index_count; i++)
+ p = exp_xdr_encode_u32(p, fdev->fl_stripeindices_list[i]);
+
+ /* encode device list */
+ p = exp_xdr_encode_u32(p, dev_count);
+ for (i = 0; i < dev_count; i++) {
+ struct pnfs_filelayout_multipath *mp = &fdev->fl_device_list[i];
+
+ p = exp_xdr_encode_u32(p, mp->fl_multipath_length);
+ for (j = 0; j < mp->fl_multipath_length; j++) {
+ struct pnfs_filelayout_devaddr *da =
+ &mp->fl_multipath_list[j];
+
+ /* Encode device info */
+ p = exp_xdr_encode_opaque(p, da->r_netid.data,
+ da->r_netid.len);
+ p = exp_xdr_encode_opaque(p, da->r_addr.data,
+ da->r_addr.len);
+ }
+ }
+
+ /* backfill in length. Subtract 4 for da_addr_body size */
+ len = (char *)p - (char *)p_in;
+ exp_xdr_encode_u32(p_in, len - 4);
+
+ error = 0;
+out:
+ dprintk("%s: End err %d xdrlen %d\n",
+ __func__, error, len);
+ return error;
+}
+EXPORT_SYMBOL(filelayout_encode_devinfo);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 05f72e4..448d14f 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -83,6 +83,7 @@ config NFSD_V4
config PNFSD
bool "NFSv4.1 server support for Parallel NFS (pNFS) (DEVELOPER ONLY)"
depends on NFSD_V4 && EXPERIMENTAL
+ select EXPORTFS_FILE_LAYOUT
help
This option enables support for the parallel NFS features of the
minor version 1 of the NFSv4 protocol (draft-ietf-nfsv4-minorversion1)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8747ddf..8f274bf 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -35,6 +35,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/file.h>
+#include <linux/nfsd/nfs4layoutxdr.h>

#include "cache.h"
#include "xdr4.h"
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 955f583..a374b1c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -47,6 +47,7 @@
#include <linux/nfs4_acl.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/exportfs.h>
+#include <linux/nfsd/nfs4layoutxdr.h>

#include "xdr4.h"
#include "vfs.h"
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index dc12f41..4a763a1 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -2,6 +2,7 @@
#define LINUX_EXPORTFS_H 1

#include <linux/types.h>
+#include <linux/exp_xdr.h>

struct dentry;
struct inode;
@@ -170,4 +171,12 @@ extern struct dentry *generic_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type,
struct inode *(*get_inode) (struct super_block *sb, u64 ino, u32 gen));

+#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
+struct pnfs_filelayout_device;
+struct pnfs_filelayout_layout;
+
+extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
+ const struct pnfs_filelayout_device *fdev);
+
+#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
#endif /* LINUX_EXPORTFS_H */
diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h
index 9926c24..aad7013 100644
--- a/include/linux/nfsd/debug.h
+++ b/include/linux/nfsd/debug.h
@@ -33,6 +33,7 @@
#define NFSDDBG_XDR 0x0100
#define NFSDDBG_LOCKD 0x0200
#define NFSDDBG_PNFS 0x0400
+#define NFSDDBG_FILELAYOUT 0x0800
#define NFSDDBG_ALL 0x7FFF
#define NFSDDBG_NOCHANGE 0xFFFF

diff --git a/include/linux/nfsd/nfs4layoutxdr.h b/include/linux/nfsd/nfs4layoutxdr.h
new file mode 100644
index 0000000..5da0c74
--- /dev/null
+++ b/include/linux/nfsd/nfs4layoutxdr.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2006 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Andy Adamson <[email protected]>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef NFSD_NFS4LAYOUTXDR_H
+#define NFSD_NFS4LAYOUTXDR_H
+
+#include <linux/sunrpc/xdr.h>
+
+/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
+struct pnfs_filelayout_devaddr {
+ struct xdr_netobj r_netid;
+ struct xdr_netobj r_addr;
+};
+
+/* list of multipath servers */
+struct pnfs_filelayout_multipath {
+ u32 fl_multipath_length;
+ struct pnfs_filelayout_devaddr *fl_multipath_list;
+};
+
+struct pnfs_filelayout_device {
+ u32 fl_stripeindices_length;
+ u32 *fl_stripeindices_list;
+ u32 fl_device_length;
+ struct pnfs_filelayout_multipath *fl_device_list;
+};
+
+#endif /* NFSD_NFS4LAYOUTXDR_H */
--
1.6.5.1


2009-12-07 09:31:43

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 13/35] pnfsd: layout get

Currently, always return a single record in the log_layout array.

[extracted from pnfsd: Initial pNFS server implementation.]
[pnfsd: nfsd layout cache: layout return changes]
[pnfsd: add debug printouts in return_layout path]
[pnfsd: refactor return_layout]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Streamline error code checking for non-pnfs filesystems]
[pnfsd: Use nfsd4_layout_seg instead of wrapper struct.]
[pnfsd: Move nfsd4_layout_seg to exportfs.h]
[pnfsd: Fix file layout layoutget export op for d13]
[pnfsd: Simplify layout get export interface.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: improve nfs4_pnfs_get_layout dprintks]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: initialize layoutget return_on_close]
Signed-off-by: Andy Adamson<[email protected]>
[pnfsd: update server layout xdr for draft 19.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: use stateid_t for layout stateid xdr data structs]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Update getdeviceinfo for draft-19]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: xdr encode layoutget response logr_layout array count as per draft-19]
[pnfsd: use stateid xdr {en,de}code functions for layoutget]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: use nfsd4_compoundres pointer in pnfs_xdr_info]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: move vfs api structures to nfsd4_pnfs.h]
[pnfsd: convert generic code to use new pnfs api]
[pnfsd: define pnfs_export_operations]
[pnfsd: obliterate old vfs api]
Signed-off-by: Benny Halevy <[email protected]>
[Split this patch into filelayout only (this patch) and all layout types]
(patch pnfsd: layout get all layout types).
Remove use of pnfs_export_operations.
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: fixup ENCODE_HEAD for layoutget]
[pnfsd: rewind xdr response pointer on nfsd4_encode_layoutget error]
Signed-off-by: Benny Halevy <[email protected]>
[Move pnfsd code from nfs4state.c to nfs4pnfsd.c]
[Move common state code from linux/nfsd/state.h to fs/nfsd/internal.h]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: Release lock during layout export ops.]
Signed-off-by: Dean Hildebrand <[email protected]>
[cosmetic changes from pnfsd: Helper functions for layout stateid processing.]
[pnfsd: layout get all layout types]
[pnfsd: check ex_pnfs in nfsd4_verify_layout]
Signed-off-by: Andy Adamson <[email protected]>
[removed the nfsd4_pnfs_fl_layoutget stub]
[pnfsd: get rid of layout encoding function vector]
[pnfsd: filelayout: convert to using exp_xdr]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Move pnfsd code out of nfs4state.c/h]
Signed-off-by: Boaz Harrosh <[email protected]>
[fixed !CONFIG_PNFSD and clean up for pnfsd-files]
[gfs2: set pnfs_dlm_export_ops only for CONFIG_PNFSD]
[moved pnfsd defs back into state.h]
[pnfsd: rename deviceid_t struct pnfs_deviceid]
[pnfsd: fix cosmetic checkpatch warnings]
[pnfsd: handle s_pnfs_op==NULL]
[pnfsd: move layoutget xdr structure to xdr4.h]
[pnfsd: clean up layoutget export API]
[pnfsd: moved find_alloc_file to nfs4state.c]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/Makefile | 1 +
fs/nfsd/export.c | 3 +-
fs/nfsd/nfs4pnfsd.c | 272 +++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4proc.c | 53 ++++++++
fs/nfsd/nfs4state.c | 61 +++++----
fs/nfsd/nfs4xdr.c | 109 +++++++++++++++-
fs/nfsd/pnfsd.h | 15 ++
fs/nfsd/state.h | 50 +++++++
fs/nfsd/xdr4.h | 11 ++
include/linux/exportfs.h | 3 +-
include/linux/nfsd/nfsd4_pnfs.h | 50 +++++++
11 files changed, 598 insertions(+), 30 deletions(-)
create mode 100644 fs/nfsd/nfs4pnfsd.c

diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9b118ee..4b4214c 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,3 +11,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
+nfsd-$(CONFIG_PNFSD) += nfs4pnfsd.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index d847dd2..217b226 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -399,7 +399,8 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid,

if (inode->i_sb->s_pnfs_op &&
(!inode->i_sb->s_pnfs_op->layout_type ||
- !inode->i_sb->s_pnfs_op->get_device_info)) {
+ !inode->i_sb->s_pnfs_op->get_device_info ||
+ !inode->i_sb->s_pnfs_op->layout_get)) {
dprintk("exp_export: export of invalid fs pnfs export ops.\n");
return -EINVAL;
}
diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
new file mode 100644
index 0000000..b0794e3
--- /dev/null
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -0,0 +1,272 @@
+/******************************************************************************
+ *
+ * (c) 2007 Network Appliance, Inc. All Rights Reserved.
+ * (c) 2009 NetApp. All Rights Reserved.
+ *
+ * NetApp provides this source code under the GPL v2 License.
+ * The GPL v2 license is available at
+ * http://opensource.org/licenses/gpl-license.php.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#include "pnfsd.h"
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+/*
+ * Layout state - NFSv4.1 pNFS
+ */
+static struct kmem_cache *pnfs_layout_slab;
+
+void
+nfsd4_free_pnfs_slabs(void)
+{
+ nfsd4_free_slab(&pnfs_layout_slab);
+}
+
+int
+nfsd4_init_pnfs_slabs(void)
+{
+ pnfs_layout_slab = kmem_cache_create("pnfs_layouts",
+ sizeof(struct nfs4_layout), 0, 0, NULL);
+ if (pnfs_layout_slab == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+static inline struct nfs4_layout *
+alloc_layout(void)
+{
+ return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
+}
+
+static inline void
+free_layout(struct nfs4_layout *lp)
+{
+ kmem_cache_free(pnfs_layout_slab, lp);
+}
+
+static void
+init_layout(struct nfs4_layout *lp,
+ struct nfs4_file *fp,
+ struct nfs4_client *clp,
+ struct svc_fh *current_fh,
+ struct nfsd4_layout_seg *seg)
+{
+ dprintk("pNFS %s: lp %p clp %p fp %p ino %p\n", __func__,
+ lp, clp, fp, fp->fi_inode);
+
+ get_nfs4_file(fp);
+ lp->lo_client = clp;
+ lp->lo_file = fp;
+ memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
+ list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
+ list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
+ dprintk("pNFS %s end\n", __func__);
+}
+
+/*
+ * are two octet ranges overlapping?
+ * start1 last1
+ * |-----------------|
+ * start2 last2
+ * |----------------|
+ */
+static inline int
+lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
+{
+ u64 start1 = l1->offset;
+ u64 last1 = last_byte_offset(start1, l1->length);
+ u64 start2 = l2->offset;
+ u64 last2 = last_byte_offset(start2, l2->length);
+ int ret;
+
+ /* if last1 == start2 there's a single byte overlap */
+ ret = (last2 >= start1) && (last1 >= start2);
+ dprintk("%s: l1 %llu:%lld l2 %llu:%lld ret=%d\n", __func__,
+ l1->offset, l1->length, l2->offset, l2->length, ret);
+ return ret;
+}
+
+static inline int
+same_fsid_major(struct nfs4_fsid *fsid, u64 major)
+{
+ return fsid->major == major;
+}
+
+static inline int
+same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
+{
+ return same_fsid_major(fsid, current_fh->fh_export->ex_fsid);
+}
+
+/*
+ * are two octet ranges overlapping or adjacent?
+ */
+static inline int
+lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
+{
+ u64 start1 = l1->offset;
+ u64 end1 = end_offset(start1, l1->length);
+ u64 start2 = l2->offset;
+ u64 end2 = end_offset(start2, l2->length);
+
+ /* is end1 == start2 ranges are adjacent */
+ return (end2 >= start1) && (end1 >= start2);
+}
+
+static void
+extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
+{
+ u64 lo_start = lo->offset;
+ u64 lo_end = end_offset(lo_start, lo->length);
+ u64 lg_start = lg->offset;
+ u64 lg_end = end_offset(lg_start, lg->length);
+
+ /* lo already covers lg? */
+ if (lo_start <= lg_start && lg_end <= lo_end)
+ return;
+
+ /* extend start offset */
+ if (lo_start > lg_start)
+ lo_start = lg_start;
+
+ /* extend end offset */
+ if (lo_end < lg_end)
+ lo_end = lg_end;
+
+ lo->offset = lo_start;
+ lo->length = (lo_end == NFS4_MAX_UINT64) ?
+ lo_end : lo_end - lo_start;
+}
+
+static struct nfs4_layout *
+merge_layout(struct nfs4_file *fp,
+ struct nfs4_client *clp,
+ struct nfsd4_layout_seg *seg)
+{
+ struct nfs4_layout *lp = NULL;
+
+ list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
+ if (lp->lo_seg.layout_type == seg->layout_type &&
+ lp->lo_seg.clientid == seg->clientid &&
+ lp->lo_seg.iomode == seg->iomode &&
+ lo_seg_mergeable(&lp->lo_seg, seg)) {
+ extend_layout(&lp->lo_seg, seg);
+ break;
+ }
+
+ return lp;
+}
+
+int
+nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
+ struct exp_xdr_stream *xdr)
+{
+ int status = nfserr_layouttrylater;
+ struct inode *ino = lgp->lg_fhp->fh_dentry->d_inode;
+ struct super_block *sb = ino->i_sb;
+ int can_merge;
+ struct nfs4_file *fp;
+ struct nfs4_client *clp;
+ struct nfs4_layout *lp = NULL;
+ struct nfsd4_pnfs_layoutget_arg args = {
+ .lg_minlength = lgp->lg_minlength,
+ .lg_fsid = lgp->lg_fhp->fh_export->ex_fsid,
+ .lg_fh = &lgp->lg_fhp->fh_handle,
+ };
+ struct nfsd4_pnfs_layoutget_res res = {
+ .lg_seg = lgp->lg_seg,
+ };
+
+ dprintk("NFSD: %s Begin\n", __func__);
+
+ can_merge = sb->s_pnfs_op->can_merge_layouts != NULL &&
+ sb->s_pnfs_op->can_merge_layouts(lgp->lg_seg.layout_type);
+
+ nfs4_lock_state();
+ fp = find_alloc_file(ino, lgp->lg_fhp);
+ clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
+ dprintk("pNFS %s: fp %p clp %p \n", __func__, fp, clp);
+ if (!fp || !clp)
+ goto out;
+
+ /* pre-alloc layout in case we can't merge after we call
+ * the file system
+ */
+ lp = alloc_layout();
+ if (!lp)
+ goto out;
+
+ dprintk("pNFS %s: pre-export type 0x%x maxcount %Zd "
+ "iomode %u offset %llu length %llu\n",
+ __func__, lgp->lg_seg.layout_type,
+ exp_xdr_qbytes(xdr->end - xdr->p),
+ lgp->lg_seg.iomode, lgp->lg_seg.offset, lgp->lg_seg.length);
+
+ /* FIXME: need to eliminate the use of the state lock */
+ nfs4_unlock_state();
+ status = sb->s_pnfs_op->layout_get(ino, xdr, &args, &res);
+ nfs4_lock_state();
+
+ dprintk("pNFS %s: post-export status %d "
+ "iomode %u offset %llu length %llu\n",
+ __func__, status, res.lg_seg.iomode,
+ res.lg_seg.offset, res.lg_seg.length);
+
+ if (status) {
+ switch (status) {
+ case -ETOOSMALL:
+ status = nfserr_toosmall;
+ break;
+ case -ENOMEM:
+ case -EAGAIN:
+ case -EINTR:
+ status = nfserr_layouttrylater;
+ break;
+ case -ENOENT:
+ status = nfserr_badlayout;
+ break;
+ case -E2BIG:
+ status = nfserr_toosmall;
+ break;
+ default:
+ status = nfserr_layoutunavailable;
+ }
+ goto out_freelayout;
+ }
+
+ lgp->lg_seg = res.lg_seg;
+ lgp->lg_roc = res.lg_return_on_close;
+
+ /* SUCCESS!
+ * Can the new layout be merged into an existing one?
+ * If so, free unused layout struct
+ */
+ if (can_merge && merge_layout(fp, clp, &res.lg_seg))
+ goto out_freelayout;
+
+ /* Can't merge, so let's initialize this new layout */
+ init_layout(lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
+out:
+ if (fp)
+ put_nfs4_file(fp);
+ nfs4_unlock_state();
+ dprintk("pNFS %s: lp %p exit status %d\n", __func__, lp, status);
+ return status;
+out_freelayout:
+ free_layout(lp);
+ goto out;
+}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8f274bf..b7e910f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1059,6 +1059,55 @@ out:
exp_put(exp);
return status;
}
+
+static __be32
+nfsd4_layoutget(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_pnfs_layoutget *lgp)
+{
+ int status;
+ struct super_block *sb;
+ struct svc_fh *current_fh = &cstate->current_fh;
+
+ status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ goto out;
+
+ status = nfserr_inval;
+ sb = current_fh->fh_dentry->d_inode->i_sb;
+ if (!sb)
+ goto out;
+
+ /* Ensure underlying file system supports pNFS and,
+ * if so, the requested layout type
+ */
+ status = nfsd4_layout_verify(sb, current_fh->fh_export,
+ lgp->lg_seg.layout_type);
+ if (status)
+ goto out;
+
+ status = nfserr_inval;
+ if (lgp->lg_seg.iomode != IOMODE_READ &&
+ lgp->lg_seg.iomode != IOMODE_RW &&
+ lgp->lg_seg.iomode != IOMODE_ANY) {
+ dprintk("pNFS %s: invalid iomode %d\n", __func__,
+ lgp->lg_seg.iomode);
+ goto out;
+ }
+
+ status = nfserr_badiomode;
+ if (lgp->lg_seg.iomode == IOMODE_ANY) {
+ dprintk("pNFS %s: IOMODE_ANY is not allowed\n", __func__);
+ goto out;
+ }
+
+ /* Set up arguments so layout can be retrieved at encode time */
+ lgp->lg_fhp = current_fh;
+ copy_clientid((clientid_t *)&lgp->lg_seg.clientid, cstate->session);
+ status = nfs_ok;
+out:
+ return status;
+}
#endif /* CONFIG_PNFSD */

/*
@@ -1431,6 +1480,10 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
},
+ [OP_LAYOUTGET] = {
+ .op_func = (nfsd4op_func)nfsd4_layoutget,
+ .op_name = "OP_LAYOUTGET",
+ },
#endif /* CONFIG_PNFSD */
};

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dc9d553..cea0edc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -110,7 +110,7 @@ opaque_hashval(const void *ptr, int nbytes)

static struct list_head del_recall_lru;

-static inline void
+inline void
put_nfs4_file(struct nfs4_file *fi)
{
if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
@@ -121,7 +121,7 @@ put_nfs4_file(struct nfs4_file *fi)
}
}

-static inline void
+inline void
get_nfs4_file(struct nfs4_file *fi)
{
atomic_inc(&fi->fi_ref);
@@ -846,6 +846,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
INIT_LIST_HEAD(&clp->cl_strhash);
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
+#if defined(CONFIG_PNFSD)
+ INIT_LIST_HEAD(&clp->cl_layouts);
+#endif /* CONFIG_PNFSD */
INIT_LIST_HEAD(&clp->cl_sessions);
INIT_LIST_HEAD(&clp->cl_lru);
clear_bit(0, &clp->cl_cb_slot_busy);
@@ -896,7 +899,7 @@ move_to_confirmed(struct nfs4_client *clp)
renew_client(clp);
}

-static struct nfs4_client *
+struct nfs4_client *
find_confirmed_client(clientid_t *clid)
{
struct nfs4_client *clp;
@@ -1709,7 +1712,7 @@ out:

/* OPEN Share state helper functions */
static inline struct nfs4_file *
-alloc_init_file(struct inode *ino)
+alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
{
struct nfs4_file *fp;
unsigned int hashval = file_hashval(ino);
@@ -1720,18 +1723,29 @@ alloc_init_file(struct inode *ino)
INIT_LIST_HEAD(&fp->fi_hash);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
+#if defined(CONFIG_PNFSD)
+ INIT_LIST_HEAD(&fp->fi_layouts);
+#endif /* CONFIG_PNFSD */
spin_lock(&recall_lock);
list_add(&fp->fi_hash, &file_hashtbl[hashval]);
spin_unlock(&recall_lock);
fp->fi_inode = igrab(ino);
fp->fi_id = current_fileid++;
fp->fi_had_conflict = false;
+#if defined(CONFIG_PNFSD)
+ fp->fi_fsid.major = current_fh->fh_export->ex_fsid;
+ fp->fi_fsid.minor = 0;
+ fp->fi_fhlen = current_fh->fh_handle.fh_size;
+ BUG_ON(fp->fi_fhlen > sizeof(fp->fi_fhval));
+ memcpy(fp->fi_fhval, &current_fh->fh_handle.fh_base,
+ fp->fi_fhlen);
+#endif /* CONFIG_PNFSD */
return fp;
}
return NULL;
}

-static void
+void
nfsd4_free_slab(struct kmem_cache **slab)
{
if (*slab == NULL)
@@ -1747,6 +1761,7 @@ nfsd4_free_slabs(void)
nfsd4_free_slab(&file_slab);
nfsd4_free_slab(&stateid_slab);
nfsd4_free_slab(&deleg_slab);
+ nfsd4_free_pnfs_slabs();
}

static int
@@ -1768,6 +1783,8 @@ nfsd4_init_slabs(void)
sizeof(struct nfs4_delegation), 0, 0, NULL);
if (deleg_slab == NULL)
goto out_nomem;
+ if (nfsd4_init_pnfs_slabs())
+ goto out_nomem;
return 0;
out_nomem:
nfsd4_free_slabs();
@@ -1908,6 +1925,18 @@ find_file(struct inode *ino)
return NULL;
}

+struct nfs4_file *
+find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
+{
+ struct nfs4_file *fp;
+
+ fp = find_file(ino);
+ if (fp)
+ return fp;
+
+ return alloc_init_file(ino, current_fh);
+}
+
static inline int access_valid(u32 x, u32 minorversion)
{
if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
@@ -2465,7 +2494,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
goto out;
status = nfserr_resource;
- fp = alloc_init_file(ino);
+ fp = alloc_init_file(ino, current_fh);
if (fp == NULL)
goto out;
}
@@ -3216,26 +3245,6 @@ out:
#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS)
#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1)

-static inline u64
-end_offset(u64 start, u64 len)
-{
- u64 end;
-
- end = start + len;
- return end >= start ? end: NFS4_MAX_UINT64;
-}
-
-/* last octet in a range */
-static inline u64
-last_byte_offset(u64 start, u64 len)
-{
- u64 end;
-
- BUG_ON(!len);
- end = start + len;
- return end > start ? end - 1: NFS4_MAX_UINT64;
-}
-
#define lockownerid_hashval(id) \
((id) & LOCK_HASH_MASK)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a374b1c..949e92d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -51,6 +51,7 @@

#include "xdr4.h"
#include "vfs.h"
+#include "pnfsd.h"

#define NFSDDBG_FACILITY NFSDDBG_XDR

@@ -1269,6 +1270,26 @@ nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,

DECODE_TAIL;
}
+
+static __be32
+nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+ struct nfsd4_pnfs_layoutget *lgp)
+{
+ DECODE_HEAD;
+
+ READ_BUF(36);
+ READ32(lgp->lg_signal);
+ READ32(lgp->lg_seg.layout_type);
+ READ32(lgp->lg_seg.iomode);
+ READ64(lgp->lg_seg.offset);
+ READ64(lgp->lg_seg.length);
+ READ64(lgp->lg_minlength);
+ nfsd4_decode_stateid(argp, &lgp->lg_sid);
+ READ_BUF(4);
+ READ32(lgp->lg_maxcount);
+
+ DECODE_TAIL;
+}
#endif /* CONFIG_PNFSD */

static __be32
@@ -1376,7 +1397,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
[OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdevinfo,
[OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_getdevlist,
[OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
[OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
#else /* CONFIG_PNFSD */
[OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -3307,6 +3328,90 @@ toosmall:
ADJUST_ARGS();
goto out;
}
+
+static __be32
+nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
+ int nfserr,
+ struct nfsd4_pnfs_layoutget *lgp)
+{
+ int maxcount, leadcount;
+ struct super_block *sb;
+ struct exp_xdr_stream xdr;
+ __be32 *p, *p_save, *p_start = resp->p;
+
+ dprintk("%s: err %d\n", __func__, nfserr);
+ if (nfserr)
+ return nfserr;
+
+ sb = lgp->lg_fhp->fh_dentry->d_inode->i_sb;
+ maxcount = PAGE_SIZE;
+ if (maxcount > lgp->lg_maxcount)
+ maxcount = lgp->lg_maxcount;
+
+ /* Check for space on xdr stream */
+ leadcount = 36 + sizeof(stateid_opaque_t);
+ RESERVE_SPACE(leadcount);
+ /* encode layout metadata after file system encodes layout */
+ p += XDR_QUADLEN(leadcount);
+ ADJUST_ARGS();
+
+ /* Ensure have room for ret_on_close, off, len, iomode, type */
+ maxcount -= leadcount;
+ if (maxcount < 0) {
+ printk(KERN_ERR "%s: buffer too small\n", __func__);
+ nfserr = nfserr_toosmall;
+ goto err;
+ }
+
+ /* Set xdr info so file system can encode layout */
+ xdr.p = p_save = resp->p;
+ xdr.end = resp->end;
+ if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
+ xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);
+
+ /* Retrieve, encode, and merge layout */
+ nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
+ if (nfserr)
+ goto err;
+
+ /* Ensure file system returned enough bytes for the client
+ * to access.
+ */
+ if (lgp->lg_seg.length < lgp->lg_minlength) {
+ nfserr = nfserr_badlayout;
+ goto err;
+ }
+
+ /* The file system should never write 0 bytes without
+ * returning an error
+ */
+ BUG_ON(xdr.p == p_save);
+
+ /* Rewind to beginning and encode attrs */
+ resp->p = p_start;
+ RESERVE_SPACE(4);
+ WRITE32(lgp->lg_roc); /* return on close */
+ ADJUST_ARGS();
+ nfsd4_encode_stateid(resp, &lgp->lg_sid);
+ RESERVE_SPACE(28);
+ /* Note: response logr_layout array count, always one for now */
+ WRITE32(1);
+ WRITE64(lgp->lg_seg.offset);
+ WRITE64(lgp->lg_seg.length);
+ WRITE32(lgp->lg_seg.iomode);
+ WRITE32(lgp->lg_seg.layout_type);
+
+ /* Update the xdr stream with the number of bytes written
+ * by the file system
+ */
+ p = xdr.p;
+ ADJUST_ARGS();
+
+ return nfs_ok;
+err:
+ resp->p = p_start;
+ return nfserr;
+}
#endif /* CONFIG_PNFSD */

static __be32
@@ -3373,7 +3478,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdevinfo,
[OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_getdevlist,
[OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
[OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
#else /* CONFIG_PNFSD */
[OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 7c46791..04d713f 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -34,6 +34,21 @@
#ifndef LINUX_NFSD_PNFSD_H
#define LINUX_NFSD_PNFSD_H

+#include <linux/list.h>
#include <linux/nfsd/nfsd4_pnfs.h>

+#include <state.h>
+#include <xdr4.h>
+
+/* outstanding layout */
+struct nfs4_layout {
+ struct list_head lo_perfile; /* hash by f_id */
+ struct list_head lo_perclnt; /* hash by clientid */
+ struct nfs4_file *lo_file; /* backpointer */
+ struct nfs4_client *lo_client;
+ struct nfsd4_layout_seg lo_seg;
+};
+
+int nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
+
#endif /* LINUX_NFSD_PNFSD_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2af7568..44b25d2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -230,6 +230,14 @@ struct nfs4_client {
struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
+#if defined(CONFIG_PNFSD)
+ struct list_head cl_layouts; /* outstanding layouts */
+#endif /* CONFIG_PNFSD */
+};
+
+struct nfs4_fsid {
+ u64 major;
+ u64 minor;
};

/* struct nfs4_client_reset
@@ -318,10 +326,19 @@ struct nfs4_file {
struct list_head fi_hash; /* hash by "struct inode *" */
struct list_head fi_stateids;
struct list_head fi_delegations;
+#if defined(CONFIG_PNFSD)
+ struct list_head fi_layouts;
+#endif /* CONFIG_PNFSD */
struct inode *fi_inode;
u32 fi_id; /* used with stateowner->so_id
* for stateid_hashtbl hash */
bool fi_had_conflict;
+#if defined(CONFIG_PNFSD)
+ /* used by layoutget / layoutrecall */
+ struct nfs4_fsid fi_fsid;
+ u32 fi_fhlen;
+ u8 fi_fhval[NFS4_FHSIZE];
+#endif /* CONFIG_PNFSD */
};

/*
@@ -393,6 +410,19 @@ extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
extern void nfsd4_recdir_purge_old(void);
extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_free_slab(struct kmem_cache **);
+extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
+extern void put_nfs4_file(struct nfs4_file *);
+extern void get_nfs4_file(struct nfs4_file *);
+extern struct nfs4_client *find_confirmed_client(clientid_t *);
+
+#if defined(CONFIG_PNFSD)
+extern int nfsd4_init_pnfs_slabs(void);
+extern void nfsd4_free_pnfs_slabs(void);
+#else /* CONFIG_PNFSD */
+static inline void nfsd4_free_pnfs_slabs(void) {}
+static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
+#endif /* CONFIG_PNFSD */

static inline void
nfs4_put_stateowner(struct nfs4_stateowner *so)
@@ -406,4 +436,24 @@ nfs4_get_stateowner(struct nfs4_stateowner *so)
kref_get(&so->so_ref);
}

+static inline u64
+end_offset(u64 start, u64 len)
+{
+ u64 end;
+
+ end = start + len;
+ return end >= start ? end : NFS4_MAX_UINT64;
+}
+
+/* last octet in a range */
+static inline u64
+last_byte_offset(u64 start, u64 len)
+{
+ u64 end;
+
+ BUG_ON(!len);
+ end = start + len;
+ return end > start ? end - 1 : NFS4_MAX_UINT64;
+}
+
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index acb215a..891f3d2 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -401,6 +401,16 @@ struct nfsd4_pnfs_getdevlist {
u32 gd_eof; /* response */
};

+struct nfsd4_pnfs_layoutget {
+ u64 lg_minlength; /* request */
+ u32 lg_signal; /* request */
+ u32 lg_maxcount; /* request */
+ struct svc_fh *lg_fhp; /* request */
+ stateid_t lg_sid; /* request/response */
+ struct nfsd4_layout_seg lg_seg; /* request/response */
+ u32 lg_roc; /* response */
+};
+
struct nfsd4_op {
int opnum;
__be32 status;
@@ -444,6 +454,7 @@ struct nfsd4_op {
#if defined(CONFIG_PNFSD)
struct nfsd4_pnfs_getdevlist pnfs_getdevlist;
struct nfsd4_pnfs_getdevinfo pnfs_getdevinfo;
+ struct nfsd4_pnfs_layoutget pnfs_layoutget;
#endif /* CONFIG_PNFSD */
} u;
struct nfs4_replay * replay;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 4a763a1..97d99e1 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -177,6 +177,7 @@ struct pnfs_filelayout_layout;

extern int filelayout_encode_devinfo(struct exp_xdr_stream *xdr,
const struct pnfs_filelayout_device *fdev);
-
+extern int filelayout_encode_layout(struct exp_xdr_stream *xdr,
+ const struct pnfs_filelayout_layout *flp);
#endif /* defined(CONFIG_EXPORTFS_FILE_LAYOUT) */
#endif /* LINUX_EXPORTFS_H */
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index d68fd14..be15b7f 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -49,6 +49,36 @@ struct nfsd4_pnfs_dev_iter_res {
u32 gd_eof; /* response */
};

+struct nfsd4_layout_seg {
+ u64 clientid;
+ u32 layout_type;
+ u32 iomode;
+ u64 offset;
+ u64 length;
+};
+
+/* Used by layout_get to encode layout (loc_body var in spec)
+ * Args:
+ * minlength - min number of accessible bytes given by layout
+ * fsid - Major part of struct pnfs_deviceid. File system uses this
+ * to build the deviceid returned in the layout.
+ * fh - fs can modify the file handle for use on data servers
+ * seg - layout info requested and layout info returned
+ * xdr - xdr info
+ * return_on_close - true if layout to be returned on file close
+ */
+
+struct nfsd4_pnfs_layoutget_arg {
+ u64 lg_minlength;
+ u64 lg_fsid;
+ const struct knfsd_fh *lg_fh;
+};
+
+struct nfsd4_pnfs_layoutget_res {
+ struct nfsd4_layout_seg lg_seg; /* request/resopnse */
+ u32 lg_return_on_close;
+};
+
/*
* pNFS export operations vector.
*
@@ -81,6 +111,26 @@ struct pnfs_export_operations {
int (*get_device_iter) (struct super_block *,
u32 layout_type,
struct nfsd4_pnfs_dev_iter_res *);
+
+ /* Retrieve and encode a layout for inode onto the xdr stream.
+ * arg->minlength is the minimum number of accessible bytes required
+ * by the client.
+ * The maximum number of bytes to encode the layout is given by
+ * the xdr stream end pointer.
+ * arg->fsid contains the major part of struct pnfs_deviceid.
+ * The file system uses this to build the deviceid returned
+ * in the layout.
+ * res->seg - layout segment requested and layout info returned.
+ * res->fh can be modified the file handle for use on data servers
+ * res->return_on_close - true if layout to be returned on file close
+ */
+ int (*layout_get) (struct inode *,
+ struct exp_xdr_stream *xdr,
+ const struct nfsd4_pnfs_layoutget_arg *,
+ struct nfsd4_pnfs_layoutget_res *);
+
+ /* Can layout segments be merged for this layout type? */
+ int (*can_merge_layouts) (u32 layout_type);
};

#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
--
1.6.5.1


2009-12-07 09:32:53

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 14/35] pnfsd: filelayout: layout encoding

[extracted from: pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: update pNFS server ops to draft 13]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Check for dense layout in layout encode.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: Fix server GETDEVICELIST to comply with NFSv4.1 Draft 13]
Signed-off-by: Ricardo Labiaga <[email protected]>
[pnfsd: Fix file layout layoutget export op for d13]
[pnfsd: Simplify layout get export interface.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: improve nfs4_pnfs_get_layout dprintks]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: initialize layoutget return_on_close]
Signed-off-by: Andy Adamson<[email protected]>
[pnfsd: Use 128 bit deviceid on server]
[pnfsd: update server layout xdr for draft 19.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: filelayout: use nfsd4_compoundres pointer in pnfs_xdr_info]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: filelayout: get rid of xdr encoding macros for file layout xdr]
[pnfsd: get rid of layout encoding function vector]
[pnfsd: filelayout: strictly define filelayout_encode_layout]
[pnfsd: filelayout: convert to using exp_xdr]
[include nfsd4_pnfs.h from nfs4layoutxdr.h for deviceid_t]
[pnfsd: rename deviceid_t struct pnfs_deviceid]
[pnfsd: fix cosmetic checkpatch warnings]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/exportfs/nfs4filelayoutxdr.c | 86 ++++++++++++++++++++++++++++++++++++
include/linux/nfsd/nfs4layoutxdr.h | 18 ++++++++
2 files changed, 104 insertions(+), 0 deletions(-)

diff --git a/fs/exportfs/nfs4filelayoutxdr.c b/fs/exportfs/nfs4filelayoutxdr.c
index b9c24d2..d7ceb72 100644
--- a/fs/exportfs/nfs4filelayoutxdr.c
+++ b/fs/exportfs/nfs4filelayoutxdr.c
@@ -30,6 +30,9 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/exp_xdr.h>
+#include <linux/module.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/nfs4layoutxdr.h>

/* We do our-own dprintk so filesystems are not dependent on sunrpc */
@@ -130,3 +133,86 @@ out:
return error;
}
EXPORT_SYMBOL(filelayout_encode_devinfo);
+
+/* Encodes the loc_body structure from draft 13
+ * on the response stream.
+ * Use linux error codes (not nfs) since these values are being
+ * returned to the file system.
+ */
+int
+filelayout_encode_layout(struct exp_xdr_stream *xdr,
+ const struct pnfs_filelayout_layout *flp)
+{
+ u32 len = 0, nfl_util, fhlen, i;
+ u32 *layoutlen_p;
+ int error;
+ __be32 *p;
+
+ dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
+ __func__,
+ flp->device_id.pnfs_fsid,
+ flp->device_id.pnfs_devid,
+ flp->lg_first_stripe_index,
+ flp->lg_fh_length);
+
+ /* Ensure file system added at least one file handle */
+ if (flp->lg_fh_length <= 0) {
+ dprintk("%s: File Layout has no file handles!!\n", __func__);
+ error = -NFS4ERR_LAYOUTUNAVAILABLE;
+ goto out;
+ }
+
+ /* Ensure room for len, devid, util, first_stripe_index,
+ * pattern_offset, number of filehandles */
+ p = layoutlen_p = exp_xdr_reserve_qwords(xdr, 1+2+2+1+1+2+1);
+ if (!p) {
+ error = -ETOOSMALL;
+ goto out;
+ }
+
+ /* save spot for opaque file layout length, fill-in later*/
+ p++;
+
+ /* encode device id */
+ p = exp_xdr_encode_u64(p, flp->device_id.fsid);
+ p = exp_xdr_encode_u64(p, flp->device_id.devid);
+
+ /* set and encode flags */
+ nfl_util = flp->lg_stripe_unit;
+ if (flp->lg_commit_through_mds)
+ nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
+ if (flp->lg_stripe_type == STRIPE_DENSE)
+ nfl_util |= NFL4_UFLG_DENSE;
+ p = exp_xdr_encode_u32(p, nfl_util);
+
+ /* encode first stripe index */
+ p = exp_xdr_encode_u32(p, flp->lg_first_stripe_index);
+
+ /* encode striping pattern start */
+ p = exp_xdr_encode_u64(p, flp->lg_pattern_offset);
+
+ /* encode number of file handles */
+ p = exp_xdr_encode_u32(p, flp->lg_fh_length);
+
+ /* encode file handles */
+ for (i = 0; i < flp->lg_fh_length; i++) {
+ fhlen = flp->lg_fh_list[i].fh_size;
+ p = exp_xdr_reserve_space(xdr, 4 + fhlen);
+ if (!p) {
+ error = -ETOOSMALL;
+ goto out;
+ }
+ p = exp_xdr_encode_opaque(p, &flp->lg_fh_list[i].fh_base, fhlen);
+ }
+
+ /* Set number of bytes encoded = total_bytes_encoded - length var */
+ len = (char *)p - (char *)layoutlen_p;
+ exp_xdr_encode_u32(layoutlen_p, len - 4);
+
+ error = 0;
+out:
+ dprintk("%s: End err %d xdrlen %d\n",
+ __func__, error, len);
+ return error;
+}
+EXPORT_SYMBOL(filelayout_encode_layout);
diff --git a/include/linux/nfsd/nfs4layoutxdr.h b/include/linux/nfsd/nfs4layoutxdr.h
index 5da0c74..26fddd5 100644
--- a/include/linux/nfsd/nfs4layoutxdr.h
+++ b/include/linux/nfsd/nfs4layoutxdr.h
@@ -35,6 +35,7 @@
#define NFSD_NFS4LAYOUTXDR_H

#include <linux/sunrpc/xdr.h>
+#include <linux/nfsd/nfsd4_pnfs.h>

/* the nfsd4_pnfs_devlist dev_addr for the file layout type */
struct pnfs_filelayout_devaddr {
@@ -55,4 +56,21 @@ struct pnfs_filelayout_device {
struct pnfs_filelayout_multipath *fl_device_list;
};

+struct pnfs_filelayout_layout {
+ u32 lg_layout_type; /* response */
+ u32 lg_stripe_type; /* response */
+ u32 lg_commit_through_mds; /* response */
+ u64 lg_stripe_unit; /* response */
+ u64 lg_pattern_offset; /* response */
+ u32 lg_first_stripe_index; /* response */
+ struct nfsd4_pnfs_deviceid device_id; /* response */
+ u32 lg_fh_length; /* response */
+ struct knfsd_fh *lg_fh_list; /* response */
+};
+
+enum stripetype4 {
+ STRIPE_SPARSE = 1,
+ STRIPE_DENSE = 2
+};
+
#endif /* NFSD_NFS4LAYOUTXDR_H */
--
1.6.5.1


2009-12-07 09:32:10

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 15/35] pnfsd: Helper functions for layout stateid processing.

From: Andy Adamson <[email protected]>

Add a list of per clientid reference counted layout state structures (struct
nfs4_layout_state) to struct nfs4_file to track the layout stateid usage.

A struct nfs4_layout_state is created upon first LAYOUTGET operation from a
clientid on the file. Each successful LAYOUTGET from a clientid on the file
that adds a nfsd4_layout_seg bumps the struct nfs4_layout_state reference
count, and the nfsd4_layout_seg is added to the nfs4_layout_state
ls_state list.

LAYOUTRETURNS that remove nfsd4_layout_segs decrement the nfs4_layout_state
reference count. A struct nfs4_layout_state is reaped when its reference
count goes to zero and the ls_state list is empty.

A delegation stateid is identified by a zero si_fileid field, an open/lock
stateid has non-zero si_stateownerid and si_fileid field, a layout stateid
is identified by a zero si_stateownerid field. The layout stateid
si_fileid field is used (reset) as a uniqifier.

NOTE: A design point of this architecture was to leave the nfs4_file fi_layouts
list alone so as not to disturb server implementations just prior to
Connectathon. We might want to get rid of the fi_layouts list and just
use the fi_layout_state list.

Use a spin_lock rather than the state_lock mutex for protecting
the layout and layoutrecall state.

[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: use a spinlock for layout state]
Signed-off-by: Benny Halevy <[email protected]>
[removed nfs4_layout_lock and nfs4_layout_unlock]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: Move pnfsd code out of nfs4state.c/h]
Signed-off-by: Boaz Harrosh <[email protected]>
[pnfsd: clean up layoutget export API]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsd.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++--
fs/nfsd/nfs4state.c | 1 +
fs/nfsd/pnfsd.h | 12 ++++++
fs/nfsd/state.h | 1 +
4 files changed, 109 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index b0794e3..b02daad 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -25,11 +25,19 @@

#define NFSDDBG_FACILITY NFSDDBG_PROC

+/* Globals */
+static u32 current_layoutid = 1;
+
/*
* Layout state - NFSv4.1 pNFS
*/
static struct kmem_cache *pnfs_layout_slab;

+/*
+ * Currently used for manipulating the layout state.
+ */
+static DEFINE_SPINLOCK(layout_lock);
+
void
nfsd4_free_pnfs_slabs(void)
{
@@ -46,6 +54,80 @@ nfsd4_init_pnfs_slabs(void)
return 0;
}

+static struct nfs4_layout_state *
+alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
+ stateid_t *stateid)
+{
+ struct nfs4_layout_state *new;
+
+ /* FIXME: use a kmem_cache */
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return new;
+ get_nfs4_file(fp);
+ INIT_LIST_HEAD(&new->ls_perfile);
+ INIT_LIST_HEAD(&new->ls_layouts);
+ kref_init(&new->ls_ref);
+ new->ls_client = clp;
+ new->ls_file = fp;
+ new->ls_stateid.si_boot = stateid->si_boot;
+ new->ls_stateid.si_stateownerid = 0; /* identifies layout stateid */
+ new->ls_stateid.si_generation = 1;
+ spin_lock(&layout_lock);
+ new->ls_stateid.si_fileid = current_layoutid++;
+ list_add(&new->ls_perfile, &fp->fi_layout_states);
+ spin_unlock(&layout_lock);
+ return new;
+}
+
+static inline void
+get_layout_state(struct nfs4_layout_state *ls)
+{
+ kref_get(&ls->ls_ref);
+}
+
+static void
+destroy_layout_state_common(struct nfs4_layout_state *ls)
+{
+ struct nfs4_file *fp = ls->ls_file;
+
+ dprintk("pNFS %s: ls %p fp %p clp %p\n", __func__, ls, fp,
+ ls->ls_client);
+ BUG_ON(!list_empty(&ls->ls_layouts));
+ kfree(ls);
+ put_nfs4_file(fp);
+}
+
+static void
+destroy_layout_state(struct kref *kref)
+{
+ struct nfs4_layout_state *ls =
+ container_of(kref, struct nfs4_layout_state, ls_ref);
+
+ spin_lock(&layout_lock);
+ list_del(&ls->ls_perfile);
+ spin_unlock(&layout_lock);
+ destroy_layout_state_common(ls);
+}
+
+static void
+destroy_layout_state_locked(struct kref *kref)
+{
+ struct nfs4_layout_state *ls =
+ container_of(kref, struct nfs4_layout_state, ls_ref);
+
+ list_del(&ls->ls_perfile);
+ destroy_layout_state_common(ls);
+}
+
+static inline void
+put_layout_state(struct nfs4_layout_state *ls)
+{
+ dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
+ atomic_read(&ls->ls_ref.refcount));
+ kref_put(&ls->ls_ref, destroy_layout_state);
+}
+
static inline struct nfs4_layout *
alloc_layout(void)
{
@@ -59,21 +141,27 @@ free_layout(struct nfs4_layout *lp)
}

static void
-init_layout(struct nfs4_layout *lp,
+init_layout(struct nfs4_layout_state *ls,
+ struct nfs4_layout *lp,
struct nfs4_file *fp,
struct nfs4_client *clp,
struct svc_fh *current_fh,
struct nfsd4_layout_seg *seg)
{
- dprintk("pNFS %s: lp %p clp %p fp %p ino %p\n", __func__,
- lp, clp, fp, fp->fi_inode);
+ dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__,
+ ls, lp, clp, fp, fp->fi_inode);

get_nfs4_file(fp);
lp->lo_client = clp;
lp->lo_file = fp;
+ get_layout_state(ls);
+ lp->lo_state = ls;
memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg));
+ spin_lock(&layout_lock);
+ list_add_tail(&lp->lo_perstate, &ls->ls_layouts);
list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
+ spin_unlock(&layout_lock);
dprintk("pNFS %s end\n", __func__);
}

@@ -159,6 +247,7 @@ merge_layout(struct nfs4_file *fp,
{
struct nfs4_layout *lp = NULL;

+ spin_lock(&layout_lock);
list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
if (lp->lo_seg.layout_type == seg->layout_type &&
lp->lo_seg.clientid == seg->clientid &&
@@ -167,6 +256,7 @@ merge_layout(struct nfs4_file *fp,
extend_layout(&lp->lo_seg, seg);
break;
}
+ spin_unlock(&layout_lock);

return lp;
}
@@ -182,6 +272,7 @@ nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
struct nfs4_file *fp;
struct nfs4_client *clp;
struct nfs4_layout *lp = NULL;
+ struct nfs4_layout_state *ls = NULL;
struct nfsd4_pnfs_layoutget_arg args = {
.lg_minlength = lgp->lg_minlength,
.lg_fsid = lgp->lg_fhp->fh_export->ex_fsid,
@@ -259,7 +350,7 @@ nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
goto out_freelayout;

/* Can't merge, so let's initialize this new layout */
- init_layout(lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
+ init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
out:
if (fp)
put_nfs4_file(fp);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cea0edc..9646240 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1725,6 +1725,7 @@ alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
INIT_LIST_HEAD(&fp->fi_delegations);
#if defined(CONFIG_PNFSD)
INIT_LIST_HEAD(&fp->fi_layouts);
+ INIT_LIST_HEAD(&fp->fi_layout_states);
#endif /* CONFIG_PNFSD */
spin_lock(&recall_lock);
list_add(&fp->fi_hash, &file_hashtbl[hashval]);
diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 04d713f..523b149 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -40,12 +40,24 @@
#include <state.h>
#include <xdr4.h>

+/* outstanding layout stateid */
+struct nfs4_layout_state {
+ struct list_head ls_perfile;
+ struct list_head ls_layouts; /* list of nfs4_layouts */
+ struct kref ls_ref;
+ struct nfs4_client *ls_client;
+ struct nfs4_file *ls_file;
+ stateid_t ls_stateid;
+};
+
/* outstanding layout */
struct nfs4_layout {
struct list_head lo_perfile; /* hash by f_id */
struct list_head lo_perclnt; /* hash by clientid */
+ struct list_head lo_perstate;
struct nfs4_file *lo_file; /* backpointer */
struct nfs4_client *lo_client;
+ struct nfs4_layout_state *lo_state;
struct nfsd4_layout_seg lo_seg;
};

diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 44b25d2..23d62f4 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -328,6 +328,7 @@ struct nfs4_file {
struct list_head fi_delegations;
#if defined(CONFIG_PNFSD)
struct list_head fi_layouts;
+ struct list_head fi_layout_states;
#endif /* CONFIG_PNFSD */
struct inode *fi_inode;
u32 fi_id; /* used with stateowner->so_id
--
1.6.5.1


2009-12-07 09:33:20

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 16/35] pnfsd: helper function for stateid checking

From: Andy Adamson <[email protected]>

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4state.c | 27 +++++++++++++++------------
1 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9646240..3e30f91 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2716,6 +2716,16 @@ STALE_STATEID(stateid_t *stateid)
return 0;
}

+static __be32
+nfs4_check_stateid(stateid_t *stateid)
+{
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ return nfserr_bad_stateid;
+ if (STALE_STATEID(stateid))
+ return nfserr_stale_stateid;
+ return 0;
+}
+
static int
EXPIRED_STATEID(stateid_t *stateid)
{
@@ -2930,13 +2940,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
*stpp = NULL;
*sopp = NULL;

- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
- dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
- return nfserr_bad_stateid;
- }
-
- if (STALE_STATEID(stateid))
- return nfserr_stale_stateid;
+ status = nfs4_check_stateid(stateid);
+ if (status)
+ return status;

if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;
@@ -3211,11 +3217,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;
nfs4_lock_state();
- status = nfserr_bad_stateid;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
- goto out;
- status = nfserr_stale_stateid;
- if (STALE_STATEID(stateid))
+ status = nfs4_check_stateid(stateid);
+ if (status)
goto out;
status = nfserr_bad_stateid;
if (!is_delegation_stateid(stateid))
--
1.6.5.1


2009-12-07 09:33:33

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 17/35] pnfsd: process the layout stateid

From: Andy Adamson <[email protected]>

Common function for LAYOUTGET and LAYOUTRETURN layout stateid processing.

The 'first open, delegation, or lock stateid' presented by the client is
looked up for verification.

Both initial and non-initial parallel LAYOUTGET operations and parallel
LAYOUTRETURN operations are supported.

Note: layout stateid seqid checking is more lax than that specified in
draft-ietf-nfsv4-minorversion1-22 for Connectathon.

Take a reference count whenever the pointer to the layout state
is kept, in particular when the layout structure is listed on the
state's ls_layouts. On dequeue_layout the layout state if being put
and its reference count will drop to zero if the list empties
unless someone's holding a reference transiently within the scope
of teh calling function, in which case the layout state is dereferenced
before the function exits.

Signed-off-by: Andy Adamson<[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: nfs4_process_layout_stateid print result stateid conditionally]
[pnfsd: use STATEID_FMT and STATEID_VAL for printing stateids]
[pnfsd: debug print layout stateid before putting the layout_state]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: fix layout state reference count]
Signed-off-by: Benny Halevy <[email protected]>
[used nfs4_check_stateid in nfs4_process_layout_stateid]
[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: use a spinlock for layout state]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Move pnfsd code out of nfs4state.c/h]
Signed-off-by: Boaz Harrosh <[email protected]>
[moved defs back into state.h]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsd.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4state.c | 10 ++--
fs/nfsd/state.h | 3 +
3 files changed, 158 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index b02daad..a72fe19 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -128,6 +128,155 @@ put_layout_state(struct nfs4_layout_state *ls)
kref_put(&ls->ls_ref, destroy_layout_state);
}

+/*
+ * Search the fp->fi_layout_state list for a layout state with the clientid.
+ * If not found, then this is a 'first open/delegation/lock stateid' from
+ * the client for this file.
+ * Called under the layout_lock.
+ */
+static struct nfs4_layout_state *
+find_get_layout_state(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+ struct nfs4_layout_state *ls;
+
+ list_for_each_entry(ls, &fp->fi_layout_states, ls_perfile) {
+ if (ls->ls_client == clp) {
+ dprintk("pNFS %s: before GET ls %p ls_ref %d\n",
+ __func__, ls,
+ atomic_read(&ls->ls_ref.refcount));
+ get_layout_state(ls);
+ return ls;
+ }
+ }
+ return NULL;
+}
+
+static int
+verify_stateid(struct nfs4_file *fp, stateid_t *stateid)
+{
+ struct nfs4_stateid *local = NULL;
+ struct nfs4_delegation *temp = NULL;
+
+ /* check if open or lock stateid */
+ local = find_stateid(stateid, RD_STATE);
+ if (local)
+ return 0;
+ temp = find_delegation_stateid(fp->fi_inode, stateid);
+ if (temp)
+ return 0;
+ return nfserr_bad_stateid;
+}
+
+/*
+ * nfs4_preocess_layout_stateid ()
+ *
+ * We have looked up the nfs4_file corresponding to the current_fh, and
+ * confirmed the clientid. Pull the few tests from nfs4_preprocess_stateid_op()
+ * that make sense with a layout stateid.
+ *
+ * Called with the state_lock held
+ * Returns zero and stateid is updated, or error.
+ *
+ * Note: the struct nfs4_layout_state pointer is only set by layoutget.
+ */
+static __be32
+nfs4_process_layout_stateid(struct nfs4_client *clp, struct nfs4_file *fp,
+ stateid_t *stateid, struct nfs4_layout_state **lsp)
+{
+ struct nfs4_layout_state *ls = NULL;
+ __be32 status = 0;
+
+ dprintk("--> %s clp %p fp %p \n", __func__, clp, fp);
+
+ dprintk("%s: operation stateid=" STATEID_FMT "\n", __func__,
+ STATEID_VAL(stateid));
+
+ status = nfs4_check_stateid(stateid);
+ if (status)
+ goto out;
+
+ /* Is this the first use of this layout ? */
+ spin_lock(&layout_lock);
+ ls = find_get_layout_state(clp, fp);
+ spin_unlock(&layout_lock);
+ if (!ls) {
+ /* Only alloc layout state on layoutget (which sets lsp). */
+ if (!lsp) {
+ dprintk("%s ERROR: Not layoutget & no layout stateid\n",
+ __func__);
+ status = nfserr_bad_stateid;
+ goto out;
+ }
+ dprintk("%s Initial stateid for layout: file %p client %p\n",
+ __func__, fp, clp);
+
+ /* verify input stateid */
+ status = verify_stateid(fp, stateid);
+ if (status < 0) {
+ dprintk("%s ERROR: invalid open/deleg/lock stateid\n",
+ __func__);
+ goto out;
+ }
+ ls = alloc_init_layout_state(clp, fp, stateid);
+ if (!ls) {
+ dprintk("%s pNFS ERROR: no memory for layout state\n",
+ __func__);
+ status = nfserr_resource;
+ goto out;
+ }
+ } else {
+ dprintk("%s Not initial stateid. Layout state %p file %p\n",
+ __func__, ls, fp);
+
+ /* BAD STATEID */
+ status = nfserr_bad_stateid;
+ if (memcmp(&ls->ls_stateid.si_opaque, &stateid->si_opaque,
+ sizeof(stateid_opaque_t)) != 0) {
+
+ /* if a LAYOUTGET operation and stateid is a valid
+ * open/deleg/lock stateid, accept it as a parallel
+ * initial layout stateid
+ */
+ if (lsp && ((verify_stateid(fp, stateid)) == 0)) {
+ dprintk("%s parallel initial layout state\n",
+ __func__);
+ goto update;
+ }
+
+ dprintk("%s ERROR bad opaque in stateid 1\n", __func__);
+ goto out_put;
+ }
+
+ /* stateid is a valid layout stateid for this file. */
+ if (stateid->si_generation > ls->ls_stateid.si_generation) {
+ dprintk("%s bad stateid 1\n", __func__);
+ goto out_put;
+ }
+update:
+ update_stateid(&ls->ls_stateid);
+ dprintk("%s Updated ls_stateid to %d on layoutstate %p\n",
+ __func__, ls->ls_stateid.si_generation, ls);
+ }
+ status = 0;
+ /* Set the stateid to be encoded */
+ memcpy(stateid, &ls->ls_stateid, sizeof(stateid_t));
+
+ /* Return the layout state if requested */
+ if (lsp) {
+ get_layout_state(ls);
+ *lsp = ls;
+ }
+ dprintk("%s: layout stateid=" STATEID_FMT "\n", __func__,
+ STATEID_VAL(&ls->ls_stateid));
+out_put:
+ dprintk("%s PUT LO STATE:\n", __func__);
+ put_layout_state(ls);
+out:
+ dprintk("<-- %s status %d\n", __func__, htonl(status));
+
+ return status;
+}
+
static inline struct nfs4_layout *
alloc_layout(void)
{
@@ -352,6 +501,8 @@ nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
/* Can't merge, so let's initialize this new layout */
init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg);
out:
+ if (ls)
+ put_layout_state(ls);
if (fp)
put_nfs4_file(fp);
nfs4_unlock_state();
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3e30f91..1731b35 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -61,8 +61,6 @@ static u64 current_sessionid = 1;
#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))

/* forward declarations */
-static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
-static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
static void nfs4_set_recdir(char *recdir);

@@ -2704,7 +2702,7 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode;
}

-static int
+int
STALE_STATEID(stateid_t *stateid)
{
if (time_after((unsigned long)boot_time,
@@ -2716,7 +2714,7 @@ STALE_STATEID(stateid_t *stateid)
return 0;
}

-static __be32
+__be32
nfs4_check_stateid(stateid_t *stateid)
{
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
@@ -3265,7 +3263,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];

-static struct nfs4_stateid *
+struct nfs4_stateid *
find_stateid(stateid_t *stid, int flags)
{
struct nfs4_stateid *local;
@@ -3294,7 +3292,7 @@ find_stateid(stateid_t *stid, int flags)
return NULL;
}

-static struct nfs4_delegation *
+struct nfs4_delegation *
find_delegation_stateid(struct inode *ino, stateid_t *stid)
{
struct nfs4_file *fp;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 23d62f4..cde091a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -416,6 +416,9 @@ extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
extern void put_nfs4_file(struct nfs4_file *);
extern void get_nfs4_file(struct nfs4_file *);
extern struct nfs4_client *find_confirmed_client(clientid_t *);
+extern struct nfs4_stateid *find_stateid(stateid_t *, int flags);
+extern struct nfs4_delegation *find_delegation_stateid(struct inode *, stateid_t *);
+extern __be32 nfs4_check_stateid(stateid_t *);

#if defined(CONFIG_PNFSD)
extern int nfsd4_init_pnfs_slabs(void);
--
1.6.5.1


2009-12-07 09:32:50

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 18/35] pnfsd: add helper functions for identifying DS stateids.

Benny's suggestion for more clearly spelling out how a filehandle's
fh_fsid_type is overloaded to indicate that a stateid is from a DS and needs
to be validated by the MDS.

[pnfsd: define and use FSID_MAX in enum nfsd_fsid]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: add helper functions for identifying DS stateids.]
Signed-off-by: David M. Richter <[email protected]>
[define a no-op version of pnfs_fh_is_ds for !CONFIG_PNFSD]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfsfh.c | 7 +++++--
include/linux/nfsd/nfsd4_pnfs.h | 39 +++++++++++++++++++++++++++++++++++++++
include/linux/nfsd/nfsfh.h | 1 +
3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 5693f68..faa79d5 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -12,6 +12,7 @@
#include <linux/exportfs.h>

#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/nfsd/nfsd4_pnfs.h>
#include "nfsd.h"
#include "vfs.h"
#include "auth.h"
@@ -139,6 +140,7 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
{
struct knfsd_fh *fh = &fhp->fh_handle;
+ int fsid_type;
struct fid *fid = NULL, sfid;
struct svc_export *exp;
struct dentry *dentry;
@@ -159,7 +161,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
return error;
if (fh->fh_auth_type != 0)
return error;
- len = key_len(fh->fh_fsid_type) / 4;
+ fsid_type = pnfs_fh_fsid_type(fh);
+ len = key_len(fsid_type) / 4;
if (len == 0)
return error;
if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
@@ -172,7 +175,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
data_left -= len;
if (data_left < 0)
return error;
- exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
+ exp = rqst_exp_find(rqstp, fsid_type, fh->fh_auth);
fid = (struct fid *)(fh->fh_auth + len);
} else {
__u32 tfh[2];
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index be15b7f..b80ff01 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -36,6 +36,7 @@

#include <linux/exportfs.h>
#include <linux/exp_xdr.h>
+#include <linux/nfsd/nfsfh.h>

struct nfsd4_pnfs_deviceid {
u64 fsid; /* filesystem ID */
@@ -133,4 +134,42 @@ struct pnfs_export_operations {
int (*can_merge_layouts) (u32 layout_type);
};

+#if defined(CONFIG_PNFSD)
+
+/*
+ * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
+ * to a DS by LAYOUTGET. nfs4_preprocess_stateid_op() uses this to decide how
+ * to handle a given stateid.
+ */
+static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
+{
+ return fh->fh_fsid_type >= FSID_MAX;
+}
+
+static inline void pnfs_fh_mark_ds(struct knfsd_fh *fh)
+{
+ BUG_ON(fh->fh_version != 1);
+ BUG_ON(pnfs_fh_is_ds(fh));
+ fh->fh_fsid_type += FSID_MAX;
+}
+
+#else /* CONFIG_PNFSD */
+
+static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PNFSD */
+
+/* allows fh_verify() to check the real fsid_type (i.e., not overloaded). */
+static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
+{
+ int fsid_type = fh->fh_fsid_type;
+
+ if (pnfs_fh_is_ds(fh))
+ return fsid_type - FSID_MAX;
+ return fsid_type;
+}
+
#endif /* _LINUX_NFSD_NFSD4_PNFS_H */
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 49523ed..8ffa986 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -171,6 +171,7 @@ enum nfsd_fsid {
FSID_UUID8,
FSID_UUID16,
FSID_UUID16_INUM,
+ FSID_MAX
};

enum fsid_source {
--
1.6.5.1


2009-12-07 09:33:03

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 19/35] pnfsd: accept all ds stateids

From: Andy Adamson <[email protected]>

Until a stateid protocol is implemented, remove all checking on
file layout data server stateids

Signed-off-by: Andy Adamson <[email protected]>
[remove #ifdef around pnfs_fh_is_ds]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4state.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1731b35..0cd563e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2858,6 +2858,9 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
if (grace_disallows_io(ino))
return nfserr_grace;

+ if (pnfs_fh_is_ds(&current_fh->fh_handle))
+ return 0;
+
if (nfsd4_has_session(cstate))
flags |= HAS_SESSION;

--
1.6.5.1


2009-12-07 09:33:16

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 20/35] pnfsd: LAYOUTGET layout stateid processing

From: Andy Adamson <[email protected]>

[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: clean up layoutget export API]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsd.c | 5 +++++
fs/nfsd/nfs4xdr.c | 2 +-
2 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index a72fe19..9543e96 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -443,6 +443,11 @@ nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp,
if (!fp || !clp)
goto out;

+ /* Check decoded layout stateid */
+ status = nfs4_process_layout_stateid(clp, fp, &lgp->lg_sid, &ls);
+ if (status)
+ goto out;
+
/* pre-alloc layout in case we can't merge after we call
* the file system
*/
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 949e92d..2760564 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3369,7 +3369,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp,
if (xdr.end - xdr.p > exp_xdr_qwords(maxcount & ~3))
xdr.end = xdr.p + exp_xdr_qwords(maxcount & ~3);

- /* Retrieve, encode, and merge layout */
+ /* Retrieve, encode, and merge layout; process stateid */
nfserr = nfs4_pnfs_get_layout(lgp, &xdr);
if (nfserr)
goto err;
--
1.6.5.1


2009-12-07 09:34:27

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 21/35] pnfsd: destroy layout on expire_client

[extracted from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: use a spinlock for layout state]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: expire_client code cleanup]
[pnfsd: expire_client code cleanup]
Signed-off-by: Boaz Harrosh <[email protected]>
[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
[removed nfs4_layout_lock and nfs4_layout_unlock]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: Move pnfsd code out of nfs4state.c/h]
Signed-off-by: Boaz Harrosh <[email protected]>
[moved defs back into state.h]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsd.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4state.c | 3 ++
fs/nfsd/state.h | 2 +
3 files changed, 58 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index 9543e96..f475b3c 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -128,6 +128,14 @@ put_layout_state(struct nfs4_layout_state *ls)
kref_put(&ls->ls_ref, destroy_layout_state);
}

+static inline void
+put_layout_state_locked(struct nfs4_layout_state *ls)
+{
+ dprintk("pNFS %s: ls %p ls_ref %d\n", __func__, ls,
+ atomic_read(&ls->ls_ref.refcount));
+ kref_put(&ls->ls_ref, destroy_layout_state_locked);
+}
+
/*
* Search the fp->fi_layout_state list for a layout state with the clientid.
* If not found, then this is a 'first open/delegation/lock stateid' from
@@ -314,6 +322,35 @@ init_layout(struct nfs4_layout_state *ls,
dprintk("pNFS %s end\n", __func__);
}

+static void
+dequeue_layout(struct nfs4_layout *lp)
+{
+ list_del(&lp->lo_perclnt);
+ list_del(&lp->lo_perfile);
+ list_del(&lp->lo_perstate);
+}
+
+static void
+destroy_layout(struct nfs4_layout *lp)
+{
+ struct nfs4_client *clp;
+ struct nfs4_file *fp;
+ struct nfs4_layout_state *ls;
+
+ dequeue_layout(lp);
+ clp = lp->lo_client;
+ fp = lp->lo_file;
+ ls = lp->lo_state;
+ dprintk("pNFS %s: lp %p clp %p fp %p ino %p ls_layouts empty %d\n",
+ __func__, lp, clp, fp, fp->fi_inode,
+ list_empty(&ls->ls_layouts));
+
+ kmem_cache_free(pnfs_layout_slab, lp);
+ /* release references taken by init_layout */
+ put_layout_state_locked(ls);
+ put_nfs4_file(fp);
+}
+
/*
* are two octet ranges overlapping?
* start1 last1
@@ -517,3 +554,19 @@ out_freelayout:
free_layout(lp);
goto out;
}
+
+void pnfs_expire_client(struct nfs4_client *clp)
+{
+ struct nfs4_layout *lp;
+
+ spin_lock(&layout_lock);
+ while (!list_empty(&clp->cl_layouts)) {
+ lp = list_entry(clp->cl_layouts.next, struct nfs4_layout,
+ lo_perclnt);
+ dprintk("NFSD: expire client. lp %p, fp %p\n", lp,
+ lp->lo_file);
+ BUG_ON(lp->lo_client != clp);
+ destroy_layout(lp);
+ }
+ spin_unlock(&layout_lock);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0cd563e..1833ddf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -742,6 +742,9 @@ expire_client(struct nfs4_client *clp)
list_del(&clp->cl_idhash);
list_del(&clp->cl_strhash);
list_del(&clp->cl_lru);
+
+ pnfs_expire_client(clp);
+
while (!list_empty(&clp->cl_openowners)) {
sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
release_openowner(sop);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index cde091a..9f68fd2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -423,9 +423,11 @@ extern __be32 nfs4_check_stateid(stateid_t *);
#if defined(CONFIG_PNFSD)
extern int nfsd4_init_pnfs_slabs(void);
extern void nfsd4_free_pnfs_slabs(void);
+extern void pnfs_expire_client(struct nfs4_client *);
#else /* CONFIG_PNFSD */
static inline void nfsd4_free_pnfs_slabs(void) {}
static inline int nfsd4_init_pnfs_slabs(void) { return 0; }
+static inline void pnfs_expire_client(struct nfs4_client *clp) {}
#endif /* CONFIG_PNFSD */

static inline void
--
1.6.5.1


2009-12-07 09:34:41

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 22/35] pnfsd: support layout_type attribute

Provide for getting the (read-only) layout_type attribute

[extraced from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Add super block to layout_type()]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: convert generic code to use new pnfs api]
Signed-off-by: Benny Halevy <[email protected]>
[Remove the use of struct pnfs_export_operations.]
[pnfsd: support layout_type attribute all layout types]
[pnfsd: check ex_pnfs in nfsd4_verify_layout]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: handle s_pnfs_op==NULL]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4xdr.c | 24 ++++++++++++++++++++++++
fs/nfsd/nfsd.h | 5 +++++
2 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2760564..40c794a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2196,6 +2196,30 @@ out_acl:
}
WRITE64(stat.ino);
}
+#if defined(CONFIG_PNFSD)
+ if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
+ struct super_block *sb = dentry->d_inode->i_sb;
+ int type = 0;
+
+ /* Query the filesystem for supported pNFS layout types.
+ * Currently, we only support one layout type per file system.
+ * The export_ops->layout_type() returns the pnfs_layouttype4.
+ */
+ buflen -= 4;
+ if (buflen < 0) /* length */
+ goto out_resource;
+
+ if (sb && sb->s_pnfs_op)
+ type = sb->s_pnfs_op->layout_type(sb);
+ if (type) {
+ if ((buflen -= 4) < 0) /* type */
+ goto out_resource;
+ WRITE32(1); /* length */
+ WRITE32(type); /* type */
+ } else
+ WRITE32(0); /* length */
+ }
+#endif /* CONFIG_PNFSD */
if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
WRITE32(3);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index ac121ad..a402854 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -287,8 +287,13 @@ extern struct timeval nfssvc_boot;
#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
NFSD4_SUPPORTED_ATTRS_WORD0

+#if defined(CONFIG_PNFSD)
+#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ (NFSD4_SUPPORTED_ATTRS_WORD1 | FATTR4_WORD1_FS_LAYOUT_TYPES)
+#else /* CONFIG_PNFSD */
#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
NFSD4_SUPPORTED_ATTRS_WORD1
+#endif /* CONFIG_PNFSD */

#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
--
1.6.5.1


2009-12-07 09:33:57

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 23/35] pnfsd: per block device dlm data server list cache

From: Andy Adamson <[email protected]>

Simple linked list cache of per block device dlm pnfs data servers.

[pnfsd: define dlm export ops for the !CONFIG_PNFSD case]
[pnfsd: fix pnfs_dlm_device string parsing]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: more fixes for pnfs_dlm_device string parsing]
Signed-off-by: Benny Halevy <[email protected]>
[restricted use of CONFIG_PNFSD]
[use NFSD_DLM_DS_LIST_MAX defined in include/linux/nfsd/nfs4pnfsdlm.h]
Acked-by: Steven Whitehouse <[email protected]>
---
fs/nfsd/Makefile | 2 +-
fs/nfsd/nfs4pnfsdlm.c | 162 ++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfsctl.c | 2 +
include/linux/nfsd/nfs4pnfsdlm.h | 49 ++++++++++++
4 files changed, 214 insertions(+), 1 deletions(-)
create mode 100644 fs/nfsd/nfs4pnfsdlm.c
create mode 100644 include/linux/nfsd/nfs4pnfsdlm.h

diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 4b4214c..ff5b54d 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,4 +11,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_PNFSD) += nfs4pnfsd.o
+nfsd-$(CONFIG_PNFSD) += nfs4pnfsd.o nfs4pnfsdlm.o
diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
new file mode 100644
index 0000000..9d91721
--- /dev/null
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -0,0 +1,162 @@
+/******************************************************************************
+ *
+ * (c) 2007 Network Appliance, Inc. All Rights Reserved.
+ * (c) 2009 NetApp. All Rights Reserved.
+ *
+ * NetApp provides this source code under the GPL v2 License.
+ * The GPL v2 license is available at
+ * http://opensource.org/licenses/gpl-license.php.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ******************************************************************************/
+
+#include <linux/nfsd/debug.h>
+#include <linux/nfsd/nfs4pnfsdlm.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+/* Just use a linked list. Do not expect more than 32 dlm_device_entries
+ * the first implementation will just use one device per cluster file system
+ */
+
+static LIST_HEAD(dlm_device_list);
+static DEFINE_SPINLOCK(dlm_device_list_lock);
+
+struct dlm_device_entry {
+ struct list_head dlm_dev_list;
+ char disk_name[DISK_NAME_LEN];
+ int num_ds;
+ char ds_list[NFSD_DLM_DS_LIST_MAX];
+};
+
+static struct dlm_device_entry *
+nfsd4_find_pnfs_dlm_device(char *disk_name)
+{
+ struct dlm_device_entry *dlm_pdev;
+
+ spin_lock(&dlm_device_list_lock);
+ list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
+ if (memcmp(dlm_pdev->disk_name, disk_name, strlen(disk_name))) {
+ spin_unlock(&dlm_device_list_lock);
+ return dlm_pdev;
+ }
+ }
+ spin_unlock(&dlm_device_list_lock);
+ return NULL;
+}
+
+/*
+ * pnfs_dlm_device string format:
+ * block-device-path:<ds1 ipv4 address>,<ds2 ipv4 address>
+ *
+ * Examples
+ * /dev/sda:192.168.1.96,192.168.1.97' creates a data server list with
+ * two data servers for the dlm cluster file system mounted on /dev/sda.
+ *
+ * /dev/sda:192.168.1.96,192.168.1.100'
+ * replaces the data server list for /dev/sda
+ *
+ * Only the deviceid == 1 is supported. Can add device id to
+ * pnfs_dlm_device string when needed.
+ *
+ * Only the round robin each data server once stripe index is supported.
+ */
+int
+nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
+
+{
+ struct dlm_device_entry *new, *found;
+ char *bufp = pnfs_dlm_device;
+ char *endp = bufp + strlen(bufp);
+ int err = -ENOMEM;
+
+ dprintk("--> %s len %d\n", __func__, len);
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return err;
+
+ err = -EINVAL;
+ /* disk_name */
+ /* FIXME: need to check for valid disk_name. search superblocks?
+ * check for slash dev slash ?
+ */
+ len = strcspn(bufp, ":");
+ if (len > DISK_NAME_LEN)
+ goto out_free;
+ memcpy(new->disk_name, bufp, len);
+
+ err = -EINVAL;
+ bufp += len + 1;
+ if (bufp >= endp)
+ goto out_free;
+
+ /* data server list */
+ /* FIXME: need to check for comma separated valid ip format */
+ len = strcspn(bufp, ":");
+ if (len > NFSD_DLM_DS_LIST_MAX)
+ goto out_free;
+ memcpy(new->ds_list, bufp, len);
+
+ /* count the number of comma-delimited DS IPs */
+ new->num_ds = 1;
+ while ((bufp = strchr(bufp, ',')) != NULL) {
+ new->num_ds++;
+ bufp++;
+ }
+
+ dprintk("%s disk_name %s num_ds %d ds_list %s\n", __func__,
+ new->disk_name, new->num_ds, new->ds_list);
+
+ found = nfsd4_find_pnfs_dlm_device(new->disk_name);
+ if (found) {
+ /* FIXME: should compare found->ds_list with new->ds_list
+ * and if it is different, kick off a CB_NOTIFY change
+ * deviceid.
+ */
+ dprintk("%s pnfs_dlm_device %s:%s already in cache "
+ " replace ds_list with new ds_list %s\n", __func__,
+ found->disk_name, found->ds_list, new->ds_list);
+ memset(found->ds_list, 0, DISK_NAME_LEN);
+ memcpy(found->ds_list, new->ds_list, strlen(new->ds_list));
+ kfree(new);
+ } else {
+ dprintk("%s Adding pnfs_dlm_device %s:%s\n", __func__,
+ new->disk_name, new->ds_list);
+ spin_lock(&dlm_device_list_lock);
+ list_add(&new->dlm_dev_list, &dlm_device_list);
+ spin_unlock(&dlm_device_list_lock);
+ }
+ dprintk("<-- %s Success\n", __func__);
+ return 0;
+
+out_free:
+ kfree(new);
+ dprintk("<-- %s returns %d\n", __func__, err);
+ return err;
+}
+
+void nfsd4_pnfs_dlm_shutdown(void)
+{
+ struct dlm_device_entry *dlm_pdev;
+
+ dprintk("--> %s\n", __func__);
+
+ spin_lock(&dlm_device_list_lock);
+ list_for_each_entry(dlm_pdev, &dlm_device_list, dlm_dev_list) {
+ list_del(&dlm_pdev->dlm_dev_list);
+ kfree(dlm_pdev);
+ }
+ spin_unlock(&dlm_device_list_lock);
+}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0415680..a44c1c2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -14,6 +14,7 @@
#include <linux/nfsd/syscall.h>
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfs4pnfsdlm.h>

#include "nfsd.h"
#include "cache.h"
@@ -1404,6 +1405,7 @@ out_free_stat:
static void __exit exit_nfsd(void)
{
nfsd_export_shutdown();
+ nfsd4_pnfs_dlm_shutdown();
nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
diff --git a/include/linux/nfsd/nfs4pnfsdlm.h b/include/linux/nfsd/nfs4pnfsdlm.h
new file mode 100644
index 0000000..63248aa
--- /dev/null
+++ b/include/linux/nfsd/nfs4pnfsdlm.h
@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * (c) 2007 Network Appliance, Inc. All Rights Reserved.
+ * (c) 2009 NetApp. All Rights Reserved.
+ *
+ * NetApp provides this source code under the GPL v2 License.
+ * The GPL v2 license is available at
+ * http://opensource.org/licenses/gpl-license.php.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ ******************************************************************************/
+#include <linux/genhd.h>
+
+/*
+ * Length of comma separated pnfs data server IPv4 addresses. Enough room for
+ * 32 addresses.
+ */
+#define NFSD_DLM_DS_LIST_MAX 512
+/*
+ * Length of colon separated pnfs dlm device of the form
+ * disk_name:comma separated data server IPv4 address
+ */
+#define NFSD_PNFS_DLM_DEVICE_MAX (NFSD_DLM_DS_LIST_MAX + DISK_NAME_LEN + 1)
+
+#ifdef CONFIG_PNFSD
+
+int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);
+
+void nfsd4_pnfs_dlm_shutdown(void);
+
+#else /* CONFIG_PNFSD */
+
+static inline void nfsd4_pnfs_dlm_shutdown(void)
+{
+ return;
+}
+
+#endif /* CONFIG_PNFSD */
--
1.6.5.1


2009-12-07 09:34:10

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 24/35] pnfsd: new nfsd filesystem file: pnfs_dlm_device

From: Andy Adamson <[email protected]>

Change nfsd filesystem name from pnfs_ds_list to pnfs_dlm_device
write the per block device dlm data server cache

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
---
fs/nfsd/nfsctl.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 73 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index a44c1c2..14b9f37 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -49,6 +49,9 @@ enum {
NFSD_Leasetime,
NFSD_RecoveryDir,
#endif
+#ifdef CONFIG_PNFSD
+ NFSD_pnfs_dlm_device,
+#endif
};

/*
@@ -73,6 +76,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
#endif
+#ifdef CONFIG_PNFSD
+static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size);
+#endif

static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Svc] = write_svc,
@@ -94,6 +100,9 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Leasetime] = write_leasetime,
[NFSD_RecoveryDir] = write_recoverydir,
#endif
+#ifdef CONFIG_PNFSD
+ [NFSD_pnfs_dlm_device] = write_pnfs_dlm_device,
+#endif
};

static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -1291,6 +1300,66 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)

#endif

+#ifdef CONFIG_PNFSD
+
+static ssize_t __write_pnfs_dlm_device(struct file *file, char *buf,
+ size_t size)
+{
+ char *mesg = buf;
+ char *pnfs_dlm_device;
+ int max_size = NFSD_PNFS_DLM_DEVICE_MAX;
+ int len, ret = 0;
+
+ if (size > 0) {
+ ret = -EINVAL;
+ if (size > max_size || buf[size-1] != '\n')
+ return ret;
+ buf[size-1] = 0;
+
+ pnfs_dlm_device = mesg;
+ len = qword_get(&mesg, pnfs_dlm_device, size);
+ if (len <= 0)
+ return ret;
+
+ ret = nfsd4_set_pnfs_dlm_device(pnfs_dlm_device, len);
+ }
+ return ret <= 0 ? ret : strlen(buf);
+}
+
+/**
+ * write_pnfs_dlm_device - Set or report the current pNFS data server list
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ *
+ * OR
+ *
+ * Input:
+ * buf: C string containing a block device name,
+ * a colon, and then a comma separated
+ * list of pNFS data server IPv4 addresses
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C
+ * string containing a block device name, a colon, and
+ * then a comma separated list of pNFS
+ * data server IPv4 addresses.
+ * return code is the size in bytes of the string
+ * On error: return code is a negative errno value
+ */
+static ssize_t write_pnfs_dlm_device(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_pnfs_dlm_device(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+#endif /* CONFIG_PNFSD */
+
/*----------------------------------------------------------------------------*/
/*
* populating the filesystem.
@@ -1322,6 +1391,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
#endif
+#ifdef CONFIG_PNFSD
+ [NFSD_pnfs_dlm_device] = {"pnfs_dlm_device", &transaction_ops,
+ S_IWUSR|S_IRUSR},
+#endif
/* last one */ {""}
};
return simple_fill_super(sb, 0x6e667364, nfsd_files);
--
1.6.5.1


2009-12-07 09:35:20

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 25/35] pnfsd: nfsd4_pnfs_dlm_getdeviter

From: Andy Adamson <[email protected]>

Export nfsd4_pnfs_dlm_getdeviter for dlm cluster file system use.

[was pnfsd: hardwire DLM cluster file layout get device iterator]
Signed-off-by: David M. Richter <[email protected]>
Signed-off-by: Frank Filz <[email protected]>
[pnfs-gfs2: return correct error value in GETDEVICEINFO]
Signed-off-by: David M. Richter <[email protected]>
[Use the GFS2 iterator as the default file layout iterator.]
Signed-off-by: Andy Adamson <[email protected]>
[Add the pnfsd default file layout getdevice info]
Signed-off-by: David M. Richter <[email protected]>
Signed-off-by: Frank Filz <[email protected]>
[pnfs-gfs2: return correct error value in GETDEVICEINFO]
Signed-off-by: David M. Richter <[email protected]>
[pnfsd: move and rename nfsd4_pnfs_fl_getdeviter]
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
[pnfsd: dev_iter: clean up export API]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsdlm.c | 21 +++++++++++++++++++++
1 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
index 9d91721..617a2f4 100644
--- a/fs/nfsd/nfs4pnfsdlm.c
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -23,6 +23,7 @@

#include <linux/nfsd/debug.h>
#include <linux/nfsd/nfs4pnfsdlm.h>
+#include <linux/nfsd/nfs4layoutxdr.h>

#define NFSDDBG_FACILITY NFSDDBG_PROC

@@ -160,3 +161,23 @@ void nfsd4_pnfs_dlm_shutdown(void)
}
spin_unlock(&dlm_device_list_lock);
}
+
+static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
+ u32 layout_type,
+ struct nfsd4_pnfs_dev_iter_res *res)
+{
+ if (layout_type != LAYOUT_NFSV4_FILES) {
+ printk(KERN_ERR "%s: ERROR: layout type isn't 'file' "
+ "(type: %x)\n", __func__, layout_type);
+ return -ENOTSUPP;
+ }
+
+ res->gd_eof = 1;
+ if (res->gd_cookie)
+ return -ENOENT;
+
+ res->gd_cookie = 1;
+ res->gd_verf = 1;
+ res->gd_devid = 1;
+ return 0;
+}
--
1.6.5.1


2009-12-07 09:34:37

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 26/35] pnfsd: nfsd4_pnfs_dlm_getdevinfo

From: Andy Adamson <[email protected]>

Export nfsd4_pnfs_dlm_getdevinfo for dlm cluster file system use.

[was pnfsd: hardwire DLM cluster file layout get device info]
[pnfsd: move and rename nfsd4_pnfs_fl_getdevinfo]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: get rid of devinfo encoding function vector]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: fix pnfs_dlm_device string parsing]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: more fixes for pnfs_dlm_device string parsing]
[pnfsd: filelayout: get rid of getdevinfo notify_types]
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
[pnfsd: rename deviceid_t struct pnfs_deviceid]
[pnfsd: clean up getdeviceinfo export op API]
[pnfsd: getdeviceinfo deviceid needs to be const.]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsdlm.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4proc.c | 1 +
2 files changed, 109 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
index 617a2f4..5ed542f 100644
--- a/fs/nfsd/nfs4pnfsdlm.c
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -181,3 +181,111 @@ static int nfsd4_pnfs_dlm_getdeviter(struct super_block *sb,
res->gd_devid = 1;
return 0;
}
+
+static int nfsd4_pnfs_dlm_getdevinfo(struct super_block *sb,
+ struct exp_xdr_stream *xdr,
+ u32 layout_type,
+ const struct nfsd4_pnfs_deviceid *devid)
+{
+ int err, len, i = 0;
+ struct pnfs_filelayout_device fdev;
+ struct pnfs_filelayout_devaddr *daddr;
+ struct dlm_device_entry *dlm_pdev;
+ char *bufp;
+
+ err = -ENOTSUPP;
+ if (layout_type != LAYOUT_NFSV4_FILES) {
+ dprintk("%s: ERROR: layout type isn't 'file' "
+ "(type: %x)\n", __func__, layout_type);
+ return err;
+ }
+
+ /* We only hand out a deviceid of 1 in LAYOUTGET, so a GETDEVICEINFO
+ * with a gdia_device_id != 1 is invalid.
+ */
+ err = -EINVAL;
+ if (devid->devid != 1) {
+ dprintk("%s: WARNING: didn't receive a deviceid of "
+ "1 (got: 0x%llx)\n", __func__, devid->devid);
+ return err;
+ }
+
+ /*
+ * If the DS list has not been established, return -EINVAL
+ */
+ dlm_pdev = nfsd4_find_pnfs_dlm_device(sb->s_bdev->bd_disk->disk_name);
+ if (!dlm_pdev) {
+ dprintk("%s: DEBUG: disk %s Not Found\n", __func__,
+ sb->s_bdev->bd_disk->disk_name);
+ return err;
+ }
+
+ dprintk("%s: Found disk %s with DS list |%s|\n",
+ __func__, dlm_pdev->disk_name, dlm_pdev->ds_list);
+
+ memset(&fdev, '\0', sizeof(fdev));
+ fdev.fl_device_length = dlm_pdev->num_ds;
+
+ err = -ENOMEM;
+ len = sizeof(*fdev.fl_device_list) * fdev.fl_device_length;
+ fdev.fl_device_list = kzalloc(len, GFP_KERNEL);
+ if (!fdev.fl_device_list) {
+ printk(KERN_ERR "%s: ERROR: unable to kmalloc a device list "
+ "buffer for %d DSes.\n", __func__, i);
+ goto out;
+ }
+
+ /* Set a simple stripe indicie */
+ fdev.fl_stripeindices_length = fdev.fl_device_length;
+ fdev.fl_stripeindices_list = kzalloc(sizeof(u32) *
+ fdev.fl_stripeindices_length, GFP_KERNEL);
+
+ if (!fdev.fl_stripeindices_list) {
+ printk(KERN_ERR "%s: ERROR: unable to kmalloc a stripeindices "
+ "list buffer for %d DSes.\n", __func__, i);
+ goto out;
+ }
+ for (i = 0; i < fdev.fl_stripeindices_length; i++)
+ fdev.fl_stripeindices_list[i] = i;
+
+ /* Transfer the data server list with a single multipath entry */
+ bufp = dlm_pdev->ds_list;
+ for (i = 0; i < fdev.fl_device_length; i++) {
+ daddr = kmalloc(sizeof(*daddr), GFP_KERNEL);
+ if (!daddr) {
+ printk(KERN_ERR "%s: ERROR: unable to kmalloc a device "
+ "addr buffer.\n", __func__);
+ goto out;
+ }
+
+ daddr->r_netid.data = "tcp";
+ daddr->r_netid.len = 3;
+
+ len = strcspn(bufp, ",");
+ daddr->r_addr.data = kmalloc(len + 4, GFP_KERNEL);
+ memcpy(daddr->r_addr.data, bufp, len);
+ /*
+ * append the port number. interpreted as two more bytes
+ * beyond the quad: ".8.1" -> 0x08.0x01 -> 0x0801 = port 2049.
+ */
+ memcpy(daddr->r_addr.data + len, ".8.1", 4);
+ daddr->r_addr.len = len + 4;
+
+ fdev.fl_device_list[i].fl_multipath_length = 1;
+ fdev.fl_device_list[i].fl_multipath_list = daddr;
+
+ dprintk("%s: encoding DS |%s|\n", __func__, bufp);
+
+ bufp += len + 1;
+ }
+
+ /* have nfsd encode the device info */
+ err = filelayout_encode_devinfo(xdr, &fdev);
+out:
+ for (i = 0; i < fdev.fl_device_length; i++)
+ kfree(fdev.fl_device_list[i].fl_multipath_list);
+ kfree(fdev.fl_device_list);
+ kfree(fdev.fl_stripeindices_list);
+ dprintk("<-- %s returns %d\n", __func__, err);
+ return err;
+}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b7e910f..0336037 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -939,6 +939,7 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}

#if defined(CONFIG_PNFSD)
+
static __be32
nfsd4_layout_verify(struct super_block *sb, struct svc_export *exp,
unsigned int layout_type)
--
1.6.5.1


2009-12-07 09:34:51

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 27/35] pnfsd: nfsd4_pnfs_dlm_layoutget

From: Andy Adamson <[email protected]>

Export nfsd4_pnfs_dlm_layoutget for dlm cluster file system use.

Use the number of data servers as a hash mask and hash inode i_ino
to choose the layout's first_stripe_index.

Always give out whole file layouts.

Always give out IOMODE_READ layouts. DLM locking semantics want to stripe
only READs with WRITEs going through the MDS.

[was pnfsd: hardwire DLM file layout layoutget]
[was pnfs-gfs2: initial LAYOUT* work for pNFS/GFS2 integration]
Frank Filz's work on the layout_type() and layout_get() export operations,
with stubs for layout_commit() and layout_return(). Tested at Connectathon.
Signed-off-by: Frank Filz <[email protected]>
Signed-off-by: David M. Richter <[email protected]>
[pnfs-gfs2: convert to using new pnfs export api]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: gfs2 layout_type interface]
Signed-off-by: Marc Eshel <[email protected]>
[Since GFS2 only uses a stripe of one, changed lg_commit_through_mds from
true to false.]
[pnfsd: move and rename nfsd4_pnfs_fl_layoutget]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: get rid of layout encoding function vector]
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
[pnfsd: rename deviceid_t struct pnfs_deviceid]
[pnfsd: clean up layoutget export API]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsdlm.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 89 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
index 5ed542f..db3ae1d 100644
--- a/fs/nfsd/nfs4pnfsdlm.c
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -289,3 +289,92 @@ out:
dprintk("<-- %s returns %d\n", __func__, err);
return err;
}
+
+static int get_stripe_unit(int blocksize)
+{
+ if (blocksize >= NFSSVC_MAXBLKSIZE)
+ return blocksize;
+ return NFSSVC_MAXBLKSIZE - (NFSSVC_MAXBLKSIZE % blocksize);
+}
+
+/*
+ * Look up inode block device in pnfs_dlm_device list.
+ * Hash on the inode->i_ino and number of data servers.
+ */
+static int dlm_ino_hash(struct inode *ino)
+{
+ struct dlm_device_entry *de;
+ u32 hash_mask = 0;
+
+ /* If can't find the inode block device in the pnfs_dlm_deivce list
+ * then don't hand out a layout
+ */
+ de = nfsd4_find_pnfs_dlm_device(ino->i_sb->s_bdev->bd_disk->disk_name);
+ if (!de)
+ return -EINVAL;
+ hash_mask = de->num_ds - 1;
+ return ino->i_ino & hash_mask;
+}
+
+static int nfsd4_pnfs_dlm_layoutget(struct inode *inode,
+ struct exp_xdr_stream *xdr,
+ const struct nfsd4_pnfs_layoutget_arg *args,
+ struct nfsd4_pnfs_layoutget_res *res)
+{
+ struct pnfs_filelayout_layout *layout = NULL;
+ struct knfsd_fh *fhp = NULL;
+ int rc = 0, index;
+
+ dprintk("%s: LAYOUT_GET\n", __func__);
+
+ index = dlm_ino_hash(inode);
+ dprintk("%s first stripe index %d i_ino %lu\n", __func__, index,
+ inode->i_ino);
+ if (index < 0)
+ return index;
+
+ res->lg_seg.layout_type = LAYOUT_NFSV4_FILES;
+ /* Always give out whole file layouts */
+ res->lg_seg.offset = 0;
+ res->lg_seg.length = NFS4_MAX_UINT64;
+ /* Always give out READ ONLY layouts */
+ res->lg_seg.iomode = IOMODE_READ;
+
+ layout = kzalloc(sizeof(*layout), GFP_KERNEL);
+ if (layout == NULL) {
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ /* Set file layout response args */
+ layout->lg_layout_type = LAYOUT_NFSV4_FILES;
+ layout->lg_stripe_type = STRIPE_SPARSE;
+ layout->lg_commit_through_mds = false;
+ layout->lg_stripe_unit = get_stripe_unit(inode->i_sb->s_blocksize);
+ layout->lg_fh_length = 1;
+ layout->device_id.fsid = args->lg_fsid;
+ layout->device_id.devid = 1; /*FSFTEMP*/
+ layout->lg_first_stripe_index = index; /*FSFTEMP*/
+ layout->lg_pattern_offset = 0;
+
+ fhp = kmalloc(sizeof(*fhp), GFP_KERNEL);
+ if (fhp == NULL) {
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ memcpy(fhp, args->lg_fh, sizeof(*fhp));
+ pnfs_fh_mark_ds(fhp);
+ layout->lg_fh_list = fhp;
+
+ /* Call nfsd to encode layout */
+ rc = filelayout_encode_layout(xdr, layout);
+exit:
+ kfree(layout);
+ kfree(fhp);
+ return rc;
+
+error:
+ res->lg_seg.length = 0;
+ goto exit;
+}
--
1.6.5.1


2009-12-07 09:36:00

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 28/35] pnfsd: add dlm file layout layout-type

From: Andy Adamson <[email protected]>

Export nfsd4_pnfs_dlm_layouttype for use by dlm cluster file systems.

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
---
fs/nfsd/nfs4pnfsdlm.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
index db3ae1d..162afaf 100644
--- a/fs/nfsd/nfs4pnfsdlm.c
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -378,3 +378,9 @@ error:
res->lg_seg.length = 0;
goto exit;
}
+
+static int
+nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
+{
+ return LAYOUT_NFSV4_FILES;
+}
--
1.6.5.1


2009-12-07 09:36:13

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 29/35] pnfsd: dlm pnfs_export_operations

From: Andy Adamson <[email protected]>

Declare a global pnfs_export_operations struct for use with DLM cluster
file systems who wish to be exported by pnfs.

Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: define dlm export ops for the !CONFIG_PNFSD case]
[gfs2: set pnfs_dlm_export_ops only for CONFIG_PNFSD]
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
---
fs/nfsd/nfs4pnfsdlm.c | 9 +++++++++
include/linux/nfsd/nfs4pnfsdlm.h | 3 +++
2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
index 162afaf..ccab22f 100644
--- a/fs/nfsd/nfs4pnfsdlm.c
+++ b/fs/nfsd/nfs4pnfsdlm.c
@@ -384,3 +384,12 @@ nfsd4_pnfs_dlm_layouttype(struct super_block *sb)
{
return LAYOUT_NFSV4_FILES;
}
+
+/* For use by DLM cluster file systems exported by pNFSD */
+const struct pnfs_export_operations pnfs_dlm_export_ops = {
+ .layout_type = nfsd4_pnfs_dlm_layouttype,
+ .get_device_info = nfsd4_pnfs_dlm_getdevinfo,
+ .get_device_iter = nfsd4_pnfs_dlm_getdeviter,
+ .layout_get = nfsd4_pnfs_dlm_layoutget,
+};
+EXPORT_SYMBOL(pnfs_dlm_export_ops);
diff --git a/include/linux/nfsd/nfs4pnfsdlm.h b/include/linux/nfsd/nfs4pnfsdlm.h
index 63248aa..a961c1e 100644
--- a/include/linux/nfsd/nfs4pnfsdlm.h
+++ b/include/linux/nfsd/nfs4pnfsdlm.h
@@ -35,6 +35,9 @@

#ifdef CONFIG_PNFSD

+/* For use by DLM cluster file systems exported by pNFSD */
+extern const struct pnfs_export_operations pnfs_dlm_export_ops;
+
int nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len);

void nfsd4_pnfs_dlm_shutdown(void);
--
1.6.5.1


2009-12-07 09:35:30

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 30/35] pnfsd: gfs2: use generic file layout pnfs operations vector

From: Andy Adamson <[email protected]>

Signed-off-by: Andy Adamson <[email protected]>
[gfs2: set pnfs_dlm_export_ops only for CONFIG_PNFSD]
Signed-off-by: Benny Halevy <[email protected]>
Acked-by: Steven Whitehouse <[email protected]>
---
fs/gfs2/ops_fstype.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 52fb6c0..f5abd93 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -18,6 +18,7 @@
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
#include <linux/slow-work.h>
+#include <linux/nfsd/nfs4pnfsdlm.h>

#include "gfs2.h"
#include "incore.h"
@@ -1147,6 +1148,9 @@ static int fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_export_op = &gfs2_export_ops;
+#if defined(CONFIG_PNFSD)
+ sb->s_pnfs_op = &pnfs_dlm_export_ops;
+#endif /* CONFIG_PNFSD */
sb->s_xattr = gfs2_xattr_handlers;
sb->s_time_gran = 1;
sb->s_maxbytes = MAX_LFS_FILESIZE;
--
1.6.5.1


2009-12-07 09:35:43

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 31/35] posix_acl: resolve compile dependency in posix_acl.h

get_cached_acl is defined as inline in posix_acl.h
requiring the full definition of struct inode as it
dereferences its struct inode * parameter.

Cc: Alexander Viro <[email protected]>
Cc: [email protected]
Cc: J. Bruce Fields <[email protected]>
Cc: Trond Myklebust <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
---
include/linux/posix_acl.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 065a365..f422bbe 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -8,6 +8,7 @@
#ifndef __LINUX_POSIX_ACL_H
#define __LINUX_POSIX_ACL_H

+#include <linux/fs.h>
#include <linux/slab.h>

#define ACL_UNDEFINED_ID (-1)
--
1.6.5.1


2009-12-07 09:35:56

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 32/35] nfs: resolve compile dependency in nfs_xdr.h

Include headers in nfs_xdr.h required for
struct rpc_task, nfs4_verifier, nfs4_stateid

Cc: Trond Myklebust <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
---
include/linux/nfs_xdr.h | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 00a0c81..a2a45d1 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -3,6 +3,8 @@

#include <linux/nfsacl.h>
#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/sunrpc/sched.h>

/*
* To change the maximum rsize and wsize supported by the NFS client, adjust
--
1.6.5.1


2009-12-07 09:36:10

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 33/35] pnfsd: layout commit

[extracted from pnfsd: Initial pNFS server implementation.]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Streamline error code checking for non-pnfs filesystems]
Signed-off-by: Dean Hildebrand <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: update server layout xdr for draft 19.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: use stateid_t for layout stateid xdr data structs]
[pnfsd: use stateid xdr decode function for layoutcommit]
[pnfsd: fix copy_clientid for layotucommit]
[pnfsd: convert generic code to use new pnfs api]
[pnfsd: define pnfs_export_operations]
[pnfsd: obliterate old vfs api]
[pnfsd: fixup ENCODE_HEAD for layoutcommit]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: layout commit all layout types]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: do not take the i_mutex when filesystem provides layout_commit]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: check ex_pnfs in nfsd4_verify_layout]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: fix cosmetic checkpatch warnings]
[pnfsd: clean up layoutcommit export api]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4proc.c | 83 +++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4xdr.c | 70 ++++++++++++++++++++++++++++++++-
fs/nfsd/xdr4.h | 7 +++
include/linux/nfsd/nfsd4_pnfs.h | 21 ++++++++++
4 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0336037..d05e260 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1109,6 +1109,85 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
out:
return status;
}
+
+static __be32
+nfsd4_layoutcommit(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_pnfs_layoutcommit *lcp)
+{
+ int status;
+ struct inode *ino = NULL;
+ struct iattr ia;
+ struct super_block *sb;
+ struct svc_fh *current_fh = &cstate->current_fh;
+
+ dprintk("NFSD: nfsd4_layoutcommit \n");
+ status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ goto out;
+
+ status = nfserr_inval;
+ ino = current_fh->fh_dentry->d_inode;
+ if (!ino)
+ goto out;
+
+ status = nfserr_inval;
+ sb = ino->i_sb;
+ if (!sb)
+ goto out;
+
+ /* Ensure underlying file system supports pNFS and,
+ * if so, the requested layout type
+ */
+ status = nfsd4_layout_verify(sb, current_fh->fh_export,
+ lcp->args.lc_seg.layout_type);
+ if (status)
+ goto out;
+
+ /* This will only extend the file length. Do a quick
+ * check to see if there is any point in waiting for the update
+ * locks.
+ * TODO: Is this correct for all back ends?
+ */
+ dprintk("%s:new offset: %d new size: %llu old size: %lld\n",
+ __func__, lcp->args.lc_newoffset, lcp->args.lc_last_wr + 1,
+ ino->i_size);
+
+ /* Set clientid from sessionid */
+ copy_clientid((clientid_t *)&lcp->args.lc_seg.clientid, cstate->session);
+ lcp->res.lc_size_chg = 0;
+ if (sb->s_pnfs_op->layout_commit) {
+ status = sb->s_pnfs_op->layout_commit(ino, &lcp->args, &lcp->res);
+ dprintk("%s:layout_commit result %d\n", __func__, status);
+ } else {
+ fh_lock(current_fh);
+ if ((lcp->args.lc_newoffset == 0) ||
+ ((lcp->args.lc_last_wr + 1) <= ino->i_size)) {
+ status = 0;
+ lcp->res.lc_size_chg = 0;
+ fh_unlock(current_fh);
+ goto out;
+ }
+
+ /* Try our best to update the file size */
+ dprintk("%s: Modifying file size\n", __func__);
+ ia.ia_valid = ATTR_SIZE;
+ ia.ia_size = lcp->args.lc_last_wr + 1;
+ status = notify_change(current_fh->fh_dentry, &ia);
+ fh_unlock(current_fh);
+ dprintk("%s:notify_change result %d\n", __func__, status);
+ }
+
+ if (!status && lcp->res.lc_size_chg &&
+ EX_ISSYNC(current_fh->fh_export)) {
+ dprintk("%s: Synchronously writing inode size %llu\n",
+ __func__, ino->i_size);
+ write_inode_now(ino, 1);
+ lcp->res.lc_newsize = i_size_read(ino);
+ }
+out:
+ return status;
+}
#endif /* CONFIG_PNFSD */

/*
@@ -1485,6 +1564,10 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_layoutget,
.op_name = "OP_LAYOUTGET",
},
+ [OP_LAYOUTCOMMIT] = {
+ .op_func = (nfsd4op_func)nfsd4_layoutcommit,
+ .op_name = "OP_LAYOUTCOMMIT",
+ },
#endif /* CONFIG_PNFSD */
};

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 40c794a..96f6567 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1290,6 +1290,51 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,

DECODE_TAIL;
}
+
+static __be32
+nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+ struct nfsd4_pnfs_layoutcommit *lcp)
+{
+ DECODE_HEAD;
+ u32 timechange;
+
+ READ_BUF(20);
+ READ64(lcp->args.lc_seg.offset);
+ READ64(lcp->args.lc_seg.length);
+ READ32(lcp->args.lc_reclaim);
+ nfsd4_decode_stateid(argp, &lcp->lc_sid);
+ READ_BUF(4);
+ READ32(lcp->args.lc_newoffset);
+ if (lcp->args.lc_newoffset) {
+ READ_BUF(8);
+ READ64(lcp->args.lc_last_wr);
+ } else
+ lcp->args.lc_last_wr = 0;
+ READ_BUF(4);
+ READ32(timechange);
+ if (timechange) {
+ READ_BUF(12);
+ READ64(lcp->args.lc_mtime.seconds);
+ READ32(lcp->args.lc_mtime.nseconds);
+ } else {
+ lcp->args.lc_mtime.seconds = 0;
+ lcp->args.lc_mtime.nseconds = 0;
+ }
+ READ_BUF(8);
+ READ32(lcp->args.lc_seg.layout_type);
+ /* XXX: saving XDR'ed layout update. Since we don't have the
+ * current_fh yet, and therefore no export_ops, we can't call
+ * the layout specific decode routines. File and pVFS2
+ * do not use the layout update....
+ */
+ READ32(lcp->args.lc_up_len);
+ if (lcp->args.lc_up_len > 0) {
+ READ_BUF(lcp->args.lc_up_len);
+ READMEM(lcp->args.lc_up_layout, lcp->args.lc_up_len);
+ }
+
+ DECODE_TAIL;
+}
#endif /* CONFIG_PNFSD */

static __be32
@@ -1396,7 +1441,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
#if defined(CONFIG_PNFSD)
[OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdevinfo,
[OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_getdevlist,
- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
[OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
[OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
#else /* CONFIG_PNFSD */
@@ -3436,6 +3481,27 @@ err:
resp->p = p_start;
return nfserr;
}
+
+static __be32
+nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_pnfs_layoutcommit *lcp)
+{
+ __be32 *p;
+
+ if (nfserr)
+ goto out;
+
+ RESERVE_SPACE(4);
+ WRITE32(lcp->res.lc_size_chg);
+ ADJUST_ARGS();
+ if (lcp->res.lc_size_chg) {
+ RESERVE_SPACE(8);
+ WRITE64(lcp->res.lc_newsize);
+ ADJUST_ARGS();
+ }
+out:
+ return nfserr;
+}
#endif /* CONFIG_PNFSD */

static __be32
@@ -3501,7 +3567,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
#if defined(CONFIG_PNFSD)
[OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdevinfo,
[OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_getdevlist,
- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
[OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
[OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
#else /* CONFIG_PNFSD */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 891f3d2..19a94e2 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -411,6 +411,12 @@ struct nfsd4_pnfs_layoutget {
u32 lg_roc; /* response */
};

+struct nfsd4_pnfs_layoutcommit {
+ struct nfsd4_pnfs_layoutcommit_arg args;
+ stateid_t lc_sid; /* request */
+ struct nfsd4_pnfs_layoutcommit_res res;
+};
+
struct nfsd4_op {
int opnum;
__be32 status;
@@ -455,6 +461,7 @@ struct nfsd4_op {
struct nfsd4_pnfs_getdevlist pnfs_getdevlist;
struct nfsd4_pnfs_getdevinfo pnfs_getdevinfo;
struct nfsd4_pnfs_layoutget pnfs_layoutget;
+ struct nfsd4_pnfs_layoutcommit pnfs_layoutcommit;
#endif /* CONFIG_PNFSD */
} u;
struct nfs4_replay * replay;
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index b80ff01..69c43f6 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -36,6 +36,7 @@

#include <linux/exportfs.h>
#include <linux/exp_xdr.h>
+#include <linux/nfs_xdr.h>
#include <linux/nfsd/nfsfh.h>

struct nfsd4_pnfs_deviceid {
@@ -80,6 +81,21 @@ struct nfsd4_pnfs_layoutget_res {
u32 lg_return_on_close;
};

+struct nfsd4_pnfs_layoutcommit_arg {
+ struct nfsd4_layout_seg lc_seg; /* request */
+ u32 lc_reclaim; /* request */
+ u32 lc_newoffset; /* request */
+ u64 lc_last_wr; /* request */
+ struct nfstime4 lc_mtime; /* request */
+ u32 lc_up_len; /* layout length */
+ void *lc_up_layout; /* decoded by callback */
+};
+
+struct nfsd4_pnfs_layoutcommit_res {
+ u32 lc_size_chg; /* boolean for response */
+ u64 lc_newsize; /* response */
+};
+
/*
* pNFS export operations vector.
*
@@ -130,6 +146,11 @@ struct pnfs_export_operations {
const struct nfsd4_pnfs_layoutget_arg *,
struct nfsd4_pnfs_layoutget_res *);

+ /* Commit changes to layout */
+ int (*layout_commit) (struct inode *,
+ const struct nfsd4_pnfs_layoutcommit_arg *,
+ struct nfsd4_pnfs_layoutcommit_res *);
+
/* Can layout segments be merged for this layout type? */
int (*can_merge_layouts) (u32 layout_type);
};
--
1.6.5.1


2009-12-07 09:36:24

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 34/35] pnfsd: layout return

Split pnfs_return_layout return file/{fsid,all} loops
into sub-functions in preparation for NFS4ERR_NOMATCHING_LAYOUT
error handling.

[extracted from pnfsd: Initial pNFS server implementation.]
[pnfsd: nfsd layout cache: layout return changes]
Signed-off-by: Benny Halevy <bhalevy at panasas.com>
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Mike Sager <[email protected]>
[pnfsd: fix bug in return_layout for RETURN_FILE]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: exit from nfs4_pnfs_return_layout without unlocking]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: add debug printouts in return_layout path]
[pnfsd: refactor return_layout]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: Streamline error code checking for non-pnfs filesystems]
[pnfsd: update server layout xdr for draft 19.]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: fix bug nfsd4_encode_layoutreturn]
[pnfsd: nfsd4_encode_layoutreturn needs ADJUST_ARGS when encoding response stateid]
[pnfsd: use stateid_t for layout stateid xdr data structs]
[pnfsd: layoutreturn optional stateid in response only for RETURN_FILE]
[pnfsd: decode opaque lrf_body in layoutreturn draft-19]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: reset recall flags]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: handle RETURN_{FSID,ALL} with no nfs4_file]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: test and fix layout return]
Signed-off-by: Marc Eshel <[email protected]>
[pnfsd: Fixes in nfs4_pnfs_return_layout]
Signed-off-by: Dean Hildebrand <[email protected]>
[pnfsd: use stateid xdr {en,de}code functions for layoutreturn]
[pnfsd: fix copy_clientid for layoutreturn]
[pnfsd: convert generic code to use new pnfs api]
[pnfsd: define pnfs_export_operations]
[pnfsd: obliterate old vfs api]
Signed-off-by: Benny Halevy <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: fixup ENCODE_HEAD for layoutreturn]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: set lrs_present to false on final layout return]
[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: use a spinlock for layout state]
Signed-off-by: Benny Halevy <[email protected]>
[pnfsd: layout return all layout types]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: layout_return hint PART 01]
Signed-off-by: Boaz Harrosh <[email protected]>
[pnfsd: check ex_pnfs in nfsd4_verify_layout]
Signed-off-by: Andy Adamson <[email protected]>
[pnfsd: fix cosmetic checkpatch warnings]
[pnfsd: clean up layoutreturn export API]
[moved find_file into RETURN_FILE condition]
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsd.c | 176 +++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4proc.c | 58 +++++++++++++
fs/nfsd/nfs4state.c | 2 +-
fs/nfsd/nfs4xdr.c | 49 ++++++++++-
fs/nfsd/pnfsd.h | 2 +
fs/nfsd/state.h | 1 +
fs/nfsd/xdr4.h | 12 +++
include/linux/nfsd/nfsd4_pnfs.h | 15 ++++
8 files changed, 312 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index f475b3c..ba3d96f 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -351,6 +351,29 @@ destroy_layout(struct nfs4_layout *lp)
put_nfs4_file(fp);
}

+void fs_layout_return(struct super_block *sb, struct inode *ino,
+ struct nfsd4_pnfs_layoutreturn *lrp, int flags,
+ void *recall_cookie)
+{
+ int ret;
+
+ if (unlikely(!sb->s_pnfs_op->layout_return))
+ return;
+
+ lrp->lr_flags = flags;
+ lrp->args.lr_cookie = recall_cookie;
+
+ if (!ino) /* FSID or ALL */
+ ino = sb->s_root->d_inode;
+
+ ret = sb->s_pnfs_op->layout_return(ino, &lrp->args);
+ dprintk("%s: inode %lu iomode=%d offset=0x%llx length=0x%llx "
+ "cookie = %p flags 0x%x status=%d\n",
+ __func__, ino->i_ino, lrp->args.lr_seg.iomode,
+ lrp->args.lr_seg.offset, lrp->args.lr_seg.length,
+ recall_cookie, flags, ret);
+}
+
/*
* are two octet ranges overlapping?
* start1 last1
@@ -555,6 +578,159 @@ out_freelayout:
goto out;
}

+static void
+trim_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lr)
+{
+ u64 lo_start = lo->offset;
+ u64 lo_end = end_offset(lo_start, lo->length);
+ u64 lr_start = lr->offset;
+ u64 lr_end = end_offset(lr_start, lr->length);
+
+ dprintk("%s:Begin lo %llu:%lld lr %llu:%lld\n", __func__,
+ lo->offset, lo->length, lr->offset, lr->length);
+
+ /* lr fully covers lo? */
+ if (lr_start <= lo_start && lo_end <= lr_end) {
+ lo->length = 0;
+ goto out;
+ }
+
+ /*
+ * split not supported yet. retain layout segment.
+ * remains must be returned by the client
+ * on the final layout return.
+ */
+ if (lo_start < lr_start && lr_end < lo_end) {
+ dprintk("%s: split not supported\n", __func__);
+ goto out;
+ }
+
+ if (lo_start < lr_start)
+ lo_end = lr_start - 1;
+ else /* lr_end < lo_end */
+ lo_start = lr_end + 1;
+
+ lo->offset = lo_start;
+ lo->length = (lo_end == NFS4_MAX_UINT64) ? lo_end : lo_end - lo_start;
+out:
+ dprintk("%s:End lo %llu:%lld\n", __func__, lo->offset, lo->length);
+}
+
+static int
+pnfs_return_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp,
+ struct nfsd4_pnfs_layoutreturn *lrp)
+{
+ int layouts_found = 0;
+ struct nfs4_layout *lp, *nextlp;
+
+ dprintk("%s: clp %p fp %p\n", __func__, clp, fp);
+ spin_lock(&layout_lock);
+ list_for_each_entry_safe (lp, nextlp, &fp->fi_layouts, lo_perfile) {
+ dprintk("%s: lp %p client %p,%p lo_type %x,%x iomode %d,%d\n",
+ __func__, lp,
+ lp->lo_client, clp,
+ lp->lo_seg.layout_type, lrp->args.lr_seg.layout_type,
+ lp->lo_seg.iomode, lrp->args.lr_seg.iomode);
+ if (lp->lo_client != clp ||
+ lp->lo_seg.layout_type != lrp->args.lr_seg.layout_type ||
+ (lp->lo_seg.iomode != lrp->args.lr_seg.iomode &&
+ lrp->args.lr_seg.iomode != IOMODE_ANY) ||
+ !lo_seg_overlapping(&lp->lo_seg, &lrp->args.lr_seg))
+ continue;
+ layouts_found++;
+ trim_layout(&lp->lo_seg, &lrp->args.lr_seg);
+ if (!lp->lo_seg.length) {
+ lrp->lrs_present = 0;
+ destroy_layout(lp);
+ }
+ }
+ spin_unlock(&layout_lock);
+
+ return layouts_found;
+}
+
+static int
+pnfs_return_client_layouts(struct nfs4_client *clp,
+ struct nfsd4_pnfs_layoutreturn *lrp, u64 ex_fsid)
+{
+ int layouts_found = 0;
+ struct nfs4_layout *lp, *nextlp;
+
+ spin_lock(&layout_lock);
+ list_for_each_entry_safe (lp, nextlp, &clp->cl_layouts, lo_perclnt) {
+ if (lrp->args.lr_seg.layout_type != lp->lo_seg.layout_type ||
+ (lrp->args.lr_seg.iomode != lp->lo_seg.iomode &&
+ lrp->args.lr_seg.iomode != IOMODE_ANY))
+ continue;
+
+ if (lrp->args.lr_return_type == RETURN_FSID &&
+ !same_fsid_major(&lp->lo_file->fi_fsid, ex_fsid))
+ continue;
+
+ layouts_found++;
+ destroy_layout(lp);
+ }
+ spin_unlock(&layout_lock);
+
+ return layouts_found;
+}
+
+int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
+ struct nfsd4_pnfs_layoutreturn *lrp)
+{
+ int status = 0;
+ int layouts_found = 0;
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ struct nfs4_file *fp = NULL;
+ struct nfs4_client *clp;
+ u64 ex_fsid = current_fh->fh_export->ex_fsid;
+ void *recall_cookie = NULL;
+
+ dprintk("NFSD: %s\n", __func__);
+
+ nfs4_lock_state();
+ clp = find_confirmed_client((clientid_t *)&lrp->args.lr_seg.clientid);
+ if (!clp)
+ goto out;
+
+ if (lrp->args.lr_return_type == RETURN_FILE) {
+ fp = find_file(ino);
+ if (!fp) {
+ printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
+ "ino %p:%lu\n",
+ __func__, ino, ino ? ino->i_ino : 0L);
+ goto out;
+ }
+
+ /* update layouts */
+ layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
+ /* optimize for the all-empty case */
+ if (list_empty(&fp->fi_layouts))
+ recall_cookie = PNFS_LAST_LAYOUT_NO_RECALLS;
+ } else {
+ layouts_found = pnfs_return_client_layouts(clp, lrp, ex_fsid);
+ }
+
+ dprintk("pNFS %s: clp %p fp %p layout_type 0x%x iomode %d "
+ "return_type %d fsid 0x%llx offset %llu length %llu: "
+ "layouts_found %d\n",
+ __func__, clp, fp, lrp->args.lr_seg.layout_type,
+ lrp->args.lr_seg.iomode, lrp->args.lr_return_type,
+ ex_fsid,
+ lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);
+
+ if (fp)
+ put_nfs4_file(fp);
+out:
+ nfs4_unlock_state();
+
+ /* call exported filesystem layout_return (ignore return-code) */
+ fs_layout_return(sb, ino, lrp, 0, recall_cookie);
+
+ dprintk("pNFS %s: exit status %d \n", __func__, status);
+ return status;
+}
+
void pnfs_expire_client(struct nfs4_client *clp)
{
struct nfs4_layout *lp;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d05e260..b4c3ff2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1188,6 +1188,60 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
out:
return status;
}
+
+static __be32
+nfsd4_layoutreturn(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_pnfs_layoutreturn *lrp)
+{
+ int status;
+ struct super_block *sb;
+ struct svc_fh *current_fh = &cstate->current_fh;
+
+ status = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
+ if (status)
+ goto out;
+
+ status = nfserr_inval;
+ sb = current_fh->fh_dentry->d_inode->i_sb;
+ if (!sb)
+ goto out;
+
+ /* Ensure underlying file system supports pNFS and,
+ * if so, the requested layout type
+ */
+ status = nfsd4_layout_verify(sb, current_fh->fh_export,
+ lrp->args.lr_seg.layout_type);
+ if (status)
+ goto out;
+
+ status = nfserr_inval;
+ if (lrp->args.lr_return_type != RETURN_FILE &&
+ lrp->args.lr_return_type != RETURN_FSID &&
+ lrp->args.lr_return_type != RETURN_ALL) {
+ dprintk("pNFS %s: invalid return_type %d\n", __func__,
+ lrp->args.lr_return_type);
+ goto out;
+ }
+
+ status = nfserr_inval;
+ if (lrp->args.lr_seg.iomode != IOMODE_READ &&
+ lrp->args.lr_seg.iomode != IOMODE_RW &&
+ lrp->args.lr_seg.iomode != IOMODE_ANY) {
+ dprintk("pNFS %s: invalid iomode %d\n", __func__,
+ lrp->args.lr_seg.iomode);
+ goto out;
+ }
+
+ /* Set clientid from sessionid */
+ copy_clientid((clientid_t *)&lrp->args.lr_seg.clientid, cstate->session);
+ lrp->lrs_present = (lrp->args.lr_return_type == RETURN_FILE);
+ status = nfs4_pnfs_return_layout(sb, current_fh, lrp);
+out:
+ dprintk("pNFS %s: status %d return_type 0x%x lrs_present %d\n",
+ __func__, status, lrp->args.lr_return_type, lrp->lrs_present);
+ return status;
+}
#endif /* CONFIG_PNFSD */

/*
@@ -1568,6 +1622,10 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_layoutcommit,
.op_name = "OP_LAYOUTCOMMIT",
},
+ [OP_LAYOUTRETURN] = {
+ .op_func = (nfsd4op_func)nfsd4_layoutreturn,
+ .op_name = "OP_LAYOUTRETURN",
+ },
#endif /* CONFIG_PNFSD */
};

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1833ddf..108cb3e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1909,7 +1909,7 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
}

/* search file_hashtbl[] for file */
-static struct nfs4_file *
+struct nfs4_file *
find_file(struct inode *ino)
{
unsigned int hashval = file_hashval(ino);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 96f6567..238ff6a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1335,6 +1335,33 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,

DECODE_TAIL;
}
+
+static __be32
+nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+ struct nfsd4_pnfs_layoutreturn *lrp)
+{
+ DECODE_HEAD;
+
+ READ_BUF(16);
+ READ32(lrp->args.lr_reclaim);
+ READ32(lrp->args.lr_seg.layout_type);
+ READ32(lrp->args.lr_seg.iomode);
+ READ32(lrp->args.lr_return_type);
+ if (lrp->args.lr_return_type == RETURN_FILE) {
+ READ_BUF(16);
+ READ64(lrp->args.lr_seg.offset);
+ READ64(lrp->args.lr_seg.length);
+ nfsd4_decode_stateid(argp, &lrp->lr_sid);
+ READ_BUF(4);
+ READ32(lrp->args.lrf_body_len);
+ if (lrp->args.lrf_body_len > 0) {
+ READ_BUF(lrp->args.lrf_body_len);
+ READMEM(lrp->args.lrf_body, lrp->args.lrf_body_len);
+ }
+ }
+
+ DECODE_TAIL;
+}
#endif /* CONFIG_PNFSD */

static __be32
@@ -1443,7 +1470,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
[OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_getdevlist,
[OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
[OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
#else /* CONFIG_PNFSD */
[OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -3502,6 +3529,24 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, int nfserr,
out:
return nfserr;
}
+
+static __be32
+nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, int nfserr,
+ struct nfsd4_pnfs_layoutreturn *lrp)
+{
+ __be32 *p;
+
+ if (nfserr)
+ goto out;
+
+ RESERVE_SPACE(4);
+ WRITE32(lrp->lrs_present != 0); /* got stateid? */
+ ADJUST_ARGS();
+ if (lrp->lrs_present)
+ nfsd4_encode_stateid(resp, &lrp->lr_sid);
+out:
+ return nfserr;
+}
#endif /* CONFIG_PNFSD */

static __be32
@@ -3569,7 +3614,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_getdevlist,
[OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
[OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
#else /* CONFIG_PNFSD */
[OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
[OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 523b149..96000f1 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -62,5 +62,7 @@ struct nfs4_layout {
};

int nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *);
+int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
+ struct nfsd4_pnfs_layoutreturn *);

#endif /* LINUX_NFSD_PNFSD_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9f68fd2..3da4be4 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -412,6 +412,7 @@ extern void nfsd4_recdir_purge_old(void);
extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
extern void nfsd4_free_slab(struct kmem_cache **);
+extern struct nfs4_file *find_file(struct inode *);
extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
extern void put_nfs4_file(struct nfs4_file *);
extern void get_nfs4_file(struct nfs4_file *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 19a94e2..b72bfd4 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -417,6 +417,17 @@ struct nfsd4_pnfs_layoutcommit {
struct nfsd4_pnfs_layoutcommit_res res;
};

+enum layoutreturn_flags {
+ LR_FLAG_INTERN = 1 << 0, /* internal return */
+};
+
+struct nfsd4_pnfs_layoutreturn {
+ struct nfsd4_pnfs_layoutreturn_arg args;
+ u32 lr_flags;
+ stateid_t lr_sid; /* request/resopnse */
+ u32 lrs_present; /* response */
+};
+
struct nfsd4_op {
int opnum;
__be32 status;
@@ -462,6 +473,7 @@ struct nfsd4_op {
struct nfsd4_pnfs_getdevinfo pnfs_getdevinfo;
struct nfsd4_pnfs_layoutget pnfs_layoutget;
struct nfsd4_pnfs_layoutcommit pnfs_layoutcommit;
+ struct nfsd4_pnfs_layoutreturn pnfs_layoutreturn;
#endif /* CONFIG_PNFSD */
} u;
struct nfs4_replay * replay;
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index 69c43f6..be17aa6 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -96,6 +96,17 @@ struct nfsd4_pnfs_layoutcommit_res {
u64 lc_newsize; /* response */
};

+#define PNFS_LAST_LAYOUT_NO_RECALLS ((void *)-1) /* used with lr_cookie below */
+
+struct nfsd4_pnfs_layoutreturn_arg {
+ u32 lr_return_type; /* request */
+ struct nfsd4_layout_seg lr_seg; /* request */
+ u32 lr_reclaim; /* request */
+ u32 lrf_body_len; /* request */
+ void *lrf_body; /* request */
+ void *lr_cookie; /* fs private */
+};
+
/*
* pNFS export operations vector.
*
@@ -151,6 +162,10 @@ struct pnfs_export_operations {
const struct nfsd4_pnfs_layoutcommit_arg *,
struct nfsd4_pnfs_layoutcommit_res *);

+ /* Returns the layout */
+ int (*layout_return) (struct inode *,
+ const struct nfsd4_pnfs_layoutreturn_arg *);
+
/* Can layout segments be merged for this layout type? */
int (*can_merge_layouts) (u32 layout_type);
};
--
1.6.5.1


2009-12-07 09:37:34

by Benny Halevy

[permalink] [raw]
Subject: [PATCH v2 35/35] pnfsd: layoutreturn stateid processing

From: Andy Adamson <[email protected]>

[Moved pnfsd code from nfs4state.c to nfs4pnfsd.c]
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/nfs4pnfsd.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
index ba3d96f..9023038 100644
--- a/fs/nfsd/nfs4pnfsd.c
+++ b/fs/nfsd/nfs4pnfsd.c
@@ -702,6 +702,13 @@ int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
goto out;
}

+ /* Check the stateid */
+ dprintk("%s PROCESS LO_STATEID inode %p\n", __func__, ino);
+ status = nfs4_process_layout_stateid(clp, fp, &lrp->lr_sid,
+ NULL);
+ if (status)
+ goto out_put_file;
+
/* update layouts */
layouts_found = pnfs_return_file_layouts(clp, fp, lrp);
/* optimize for the all-empty case */
@@ -719,6 +726,7 @@ int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
ex_fsid,
lrp->args.lr_seg.offset, lrp->args.lr_seg.length, layouts_found);

+out_put_file:
if (fp)
put_nfs4_file(fp);
out:
--
1.6.5.1


2009-12-07 09:53:56

by Benny Halevy

[permalink] [raw]
Subject: Re: [pnfs] [PATCH v2 0/35] Initial pnfsd file layout support

The patchset is also available here:
git://linux-nfs.org/~bhalevy/linux-pnfs.git pnfsd-files-next

Benny

On Dec. 07, 2009, 11:26 +0200, Benny Halevy <[email protected]> wrote:
> Bruce,
>
> The following patches implement initial pnfsd server support for
> the files layout and the dlm-based file systems, including GETDEVICELIST,
> GETDEVICEINFO, and LAYOUTGET.
> LAYOUTCOMMIT and LAYOUTRETURN generic implementation provides the
> complete implementation that was tested by Andy in the Austin Fall
> 2010 Bakeathon.
>
> The patchset is based onto your nfsd-next branch at
> d1ecbbf Merge branch 'for-2.6.33-incoming' into HEAD
> post Boaz' headers cleanup patchset.
>
> v2 includes the fixes posted here:
> http://linux-nfs.org/pipermail/pnfs/2009-December/009607.html
> as well as some cosmetic cleanups and cleanup of CONFIG_PNFSD usage
> on the lines Christoph suggested aimed at minimizing its use
> See diff -w at the bottom of this email for details
>
> [PATCH v2 01/35] pnfsd: Define CONFIG_PNFSD
> [PATCH v2 02/35] pnfsd: define NFSDDBG_PNFS
> [PATCH v2 03/35] pnfsd, pnfs: protocol level pnfs constants
> [PATCH v2 04/35] pnfsd: return pnfs flags on exchange_id
> [PATCH v2 05/35] pnfsd: don't set up back channel on create_session for ds
> [PATCH v2 06/35] pnfsd: introduce pnfsd header files
> [PATCH v2 07/35] pnfsd: define pnfs_export_operations
> [PATCH v2 08/35] pnfsd: add pnfs export option
> [PATCH v2 09/35] pnfsd: layout verify
> [PATCH v2 10/35] pnfsd: introduce exp_xdr.h
> [PATCH v2 11/35] pnfsd: get device list/info
> [PATCH v2 12/35] pnfsd: filelayout: get device list/info
> [PATCH v2 13/35] pnfsd: layout get
> [PATCH v2 14/35] pnfsd: filelayout: layout encoding
> [PATCH v2 15/35] pnfsd: Helper functions for layout stateid processing.
> [PATCH v2 16/35] pnfsd: helper function for stateid checking
> [PATCH v2 17/35] pnfsd: process the layout stateid
> [PATCH v2 18/35] pnfsd: add helper functions for identifying DS stateids.
> [PATCH v2 19/35] pnfsd: accept all ds stateids
> [PATCH v2 20/35] pnfsd: LAYOUTGET layout stateid processing
> [PATCH v2 21/35] pnfsd: destroy layout on expire_client
> [PATCH v2 22/35] pnfsd: support layout_type attribute
> [PATCH v2 23/35] pnfsd: per block device dlm data server list cache
> [PATCH v2 24/35] pnfsd: new nfsd filesystem file: pnfs_dlm_device
> [PATCH v2 25/35] pnfsd: nfsd4_pnfs_dlm_getdeviter
> [PATCH v2 26/35] pnfsd: nfsd4_pnfs_dlm_getdevinfo
> [PATCH v2 27/35] pnfsd: nfsd4_pnfs_dlm_layoutget
> [PATCH v2 28/35] pnfsd: add dlm file layout layout-type
> [PATCH v2 29/35] pnfsd: dlm pnfs_export_operations
> [PATCH v2 30/35] pnfsd: gfs2: use generic file layout pnfs operations vector
> [PATCH v2 31/35] posix_acl: resolve compile dependency in posix_acl.h
> [PATCH v2 32/35] nfs: resolve compile dependency in nfs_xdr.h
> [PATCH v2 33/35] pnfsd: layout commit
> [PATCH v2 34/35] pnfsd: layout return
> [PATCH v2 35/35] pnfsd: layoutreturn stateid processing
>
> diff from v1 + posted fixes:
>
> git diff --stat -p -M -w origin/pnfsd-files pnfsd-files
> fs/exportfs/Makefile | 4 ++--
> fs/exportfs/nfs4filelayoutxdr.c | 6 ------
> fs/gfs2/export.c | 1 +
> fs/nfsd/nfs4pnfsd.c | 17 +----------------
> fs/nfsd/nfs4pnfsdlm.c | 4 ++--
> fs/nfsd/nfs4state.c | 17 +++++++++++++----
> fs/nfsd/nfsfh.c | 6 +-----
> fs/nfsd/pnfsd.h | 4 ----
> include/linux/nfs4.h | 2 --
> include/linux/nfsd/nfsd.h | 8 --------
> include/linux/nfsd/nfsd4_pnfs.h | 11 +++++++++++
> include/linux/nfsd/state.h | 2 +-
> include/linux/nfsd/xdr4.h | 8 --------
> 13 files changed, 32 insertions(+), 58 deletions(-)
>
> diff --git a/fs/exportfs/Makefile b/fs/exportfs/Makefile
> index f820d80..658207d 100644
> --- a/fs/exportfs/Makefile
> +++ b/fs/exportfs/Makefile
> @@ -3,5 +3,5 @@
>
> obj-$(CONFIG_EXPORTFS) += exportfs.o
>
> -exportfs-objs := expfs.o
> -exportfs-objs += nfs4filelayoutxdr.o
> +exportfs-y := expfs.o
> +exportfs-$(CONFIG_EXPORTFS_FILE_LAYOUT) += nfs4filelayoutxdr.o
> diff --git a/fs/exportfs/nfs4filelayoutxdr.c b/fs/exportfs/nfs4filelayoutxdr.c
> index f076908..782b673 100644
> --- a/fs/exportfs/nfs4filelayoutxdr.c
> +++ b/fs/exportfs/nfs4filelayoutxdr.c
> @@ -1,6 +1,4 @@
> /*
> -* linux/fs/nfsd/nfs4filelayout_xdr.c
> -*
> * Copyright (c) 2006 The Regents of the University of Michigan.
> * All rights reserved.
> *
> @@ -33,8 +31,6 @@
> *
> *
> */
> -#if defined(CONFIG_EXPORTFS_FILE_LAYOUT)
> -
> #include <linux/module.h>
> #include <linux/sunrpc/svc.h>
> #include <linux/nfsd/nfsd.h>
> @@ -226,5 +222,3 @@ out:
> return error;
> }
> EXPORT_SYMBOL(filelayout_encode_layout);
> -
> -#endif /* CONFIG_EXPORTFS_FILE_LAYOUT */
> diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
> index 9cea712..d15876e 100644
> --- a/fs/gfs2/export.c
> +++ b/fs/gfs2/export.c
> @@ -254,3 +254,4 @@ const struct export_operations gfs2_export_ops = {
> .get_name = gfs2_get_name,
> .get_parent = gfs2_get_parent,
> };
> +
> diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c
> index 4849463..aa7abad 100644
> --- a/fs/nfsd/nfs4pnfsd.c
> +++ b/fs/nfsd/nfs4pnfsd.c
> @@ -21,8 +21,6 @@
> *
> *****************************************************************************/
>
> -#if defined(CONFIG_PNFSD)
> -
> #include <linux/param.h>
> #include <linux/slab.h>
> #include <linux/sunrpc/svc.h>
> @@ -66,18 +64,6 @@ nfsd4_init_pnfs_slabs(void)
> return 0;
> }
>
> -static struct nfs4_file *
> -find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
> -{
> - struct nfs4_file *fp;
> -
> - fp = find_file(ino);
> - if (fp)
> - return fp;
> -
> - return alloc_init_file(ino, current_fh);
> -}
> -
> static struct nfs4_layout_state *
> alloc_init_layout_state(struct nfs4_client *clp, struct nfs4_file *fp,
> stateid_t *stateid)
> @@ -717,8 +703,8 @@ int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
> if (!clp)
> goto out;
>
> - fp = find_file(ino);
> if (lrp->args.lr_return_type == RETURN_FILE) {
> + fp = find_file(ino);
> if (!fp) {
> printk(KERN_ERR "%s: RETURN_FILE: no nfs4_file for "
> "ino %p:%lu\n",
> @@ -778,4 +764,3 @@ void pnfs_expire_client(struct nfs4_client *clp)
> }
> spin_unlock(&layout_lock);
> }
> -#endif /* CONFIG_PNFSD */
> diff --git a/fs/nfsd/nfs4pnfsdlm.c b/fs/nfsd/nfs4pnfsdlm.c
> index b3027fe..ed2e940 100644
> --- a/fs/nfsd/nfs4pnfsdlm.c
> +++ b/fs/nfsd/nfs4pnfsdlm.c
> @@ -40,7 +40,7 @@ struct dlm_device_entry {
> struct list_head dlm_dev_list;
> char disk_name[DISK_NAME_LEN];
> int num_ds;
> - char ds_list[NFSD_PNFS_DS_LIST_MAX];
> + char ds_list[NFSD_DLM_DS_LIST_MAX];
> };
>
> static struct dlm_device_entry *
> @@ -108,7 +108,7 @@ nfsd4_set_pnfs_dlm_device(char *pnfs_dlm_device, int len)
> /* data server list */
> /* FIXME: need to check for comma separated valid ip format */
> len = strcspn(bufp, ":");
> - if (len > NFSD_PNFS_DS_LIST_MAX)
> + if (len > NFSD_DLM_DS_LIST_MAX)
> goto out_free;
> memcpy(new->ds_list, bufp, len);
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index d99caae..bc359ea 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1726,7 +1726,7 @@ out:
> }
>
> /* OPEN Share state helper functions */
> -inline struct nfs4_file *
> +static inline struct nfs4_file *
> alloc_init_file(struct inode *ino, struct svc_fh *current_fh)
> {
> struct nfs4_file *fp;
> @@ -1941,6 +1941,18 @@ find_file(struct inode *ino)
> return NULL;
> }
>
> +struct nfs4_file *
> +find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
> +{
> + struct nfs4_file *fp;
> +
> + fp = find_file(ino);
> + if (fp)
> + return fp;
> +
> + return alloc_init_file(ino, current_fh);
> +}
> +
> static inline int access_valid(u32 x, u32 minorversion)
> {
> if ((x & NFS4_SHARE_ACCESS_MASK) < NFS4_SHARE_ACCESS_READ)
> @@ -2863,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
> if (grace_disallows_io(ino))
> return nfserr_grace;
>
> -#if defined(CONFIG_PNFSD)
> if (pnfs_fh_is_ds(&current_fh->fh_handle))
> return 0;
> -#endif /* CONFIG_PNFSD */
>
> if (nfsd4_has_session(cstate))
> flags |= HAS_SESSION;
> @@ -4183,4 +4193,3 @@ nfs4_reset_lease(time_t leasetime)
> {
> user_lease_time = leasetime;
> }
> -
> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> index 7d624f4..8300b2f 100644
> --- a/fs/nfsd/nfsfh.c
> +++ b/fs/nfsd/nfsfh.c
> @@ -22,12 +22,8 @@
> #include <linux/sunrpc/svc.h>
> #include <linux/sunrpc/svcauth_gss.h>
> #include <linux/nfsd/nfsd.h>
> -#include "auth.h"
> -
> -#if defined(CONFIG_PNFSD)
> -#include <linux/nfsd/state.h>
> #include <linux/nfsd/nfsd4_pnfs.h>
> -#endif /* CONFIG_PNFSD */
> +#include "auth.h"
>
> #define NFSDDBG_FACILITY NFSDDBG_FH
>
> diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
> index 7d255f5..c3354e8 100644
> --- a/fs/nfsd/pnfsd.h
> +++ b/fs/nfsd/pnfsd.h
> @@ -34,8 +34,6 @@
> #ifndef LINUX_NFSD_PNFSD_H
> #define LINUX_NFSD_PNFSD_H
>
> -#if defined(CONFIG_PNFSD)
> -
> #include <linux/nfsd/state.h>
> #include <linux/nfsd/nfsd4_pnfs.h>
>
> @@ -64,6 +62,4 @@ int nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *, struct exp_xdr_stream *)
> int nfs4_pnfs_return_layout(struct super_block *, struct svc_fh *,
> struct nfsd4_pnfs_layoutreturn *);
>
> -#endif /* CONFIG_PNFSD */
> -
> #endif /* LINUX_NFSD_PNFSD_H */
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 3c251f4..a899cff 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -548,7 +548,6 @@ enum state_protect_how4 {
> SP4_SSV = 2
> };
>
> -#if defined(CONFIG_PNFS) || defined(CONFIG_PNFSD)
> enum pnfs_layouttype {
> LAYOUT_NFSV4_FILES = 1,
> LAYOUT_OSD2_OBJECTS = 2,
> @@ -591,7 +590,6 @@ enum filelayout_hint_care4 {
> NFLH4_CARE_STRIPE_UNIT_SIZE = 0x00000040,
> NFLH4_CARE_STRIPE_COUNT = 0x00000080
> };
> -#endif /* defined(CONFIG_PNFS) || defined(CONFIG_PNFSD) */
>
> #endif
> #endif
> diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
> index 6eb5c6e..c61e220 100644
> --- a/include/linux/nfsd/nfsd.h
> +++ b/include/linux/nfsd/nfsd.h
> @@ -181,14 +181,6 @@ static inline void nfs4_reset_lease(time_t leasetime) { }
> static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
> #endif
>
> -#ifdef CONFIG_PNFSD
> -/* Length of comma separated pnfs data server IPv4 addresses. Enough room for
> - * 32 addresses.
> - */
> -#define NFSD_PNFS_DS_LIST_MAX 512
> -
> -#endif
> -
> /*
> * lockd binding
> */
> diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
> index e96c2d4..dbed31a 100644
> --- a/include/linux/nfsd/nfsd4_pnfs.h
> +++ b/include/linux/nfsd/nfsd4_pnfs.h
> @@ -169,6 +169,8 @@ struct pnfs_export_operations {
> int (*can_merge_layouts) (u32 layout_type);
> };
>
> +#if defined(CONFIG_PNFSD)
> +
> /*
> * fh_fsid_type is overloaded to indicate whether a filehandle was one supplied
> * to a DS by LAYOUTGET. nfs4_preprocess_stateid_op() uses this to decide how
> @@ -196,4 +198,13 @@ static inline int pnfs_fh_fsid_type(struct knfsd_fh *fh)
> return fsid_type;
> }
>
> +#else /* CONFIG_PNFSD */
> +
> +static inline int pnfs_fh_is_ds(struct knfsd_fh *fh)
> +{
> + return 0;
> +}
> +
> +#endif /* CONFIG_PNFSD */
> +
> #endif /* _LINUX_NFSD_NFSD4_PNFS_H */
> diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
> index 8cf7e51..16ee98c 100644
> --- a/include/linux/nfsd/state.h
> +++ b/include/linux/nfsd/state.h
> @@ -415,7 +415,7 @@ extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
> extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
> extern void nfsd4_free_slab(struct kmem_cache **);
> extern struct nfs4_file *find_file(struct inode *);
> -extern struct nfs4_file *alloc_init_file(struct inode *, struct svc_fh *);
> +extern struct nfs4_file *find_alloc_file(struct inode *, struct svc_fh *);
> extern void put_nfs4_file(struct nfs4_file *);
> extern void get_nfs4_file(struct nfs4_file *);
> extern struct nfs4_client *find_confirmed_client(clientid_t *);
> diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
> index 8e36ac3..831151f 100644
> --- a/include/linux/nfsd/xdr4.h
> +++ b/include/linux/nfsd/xdr4.h
> @@ -515,14 +515,6 @@ struct nfsd4_compoundres {
> struct nfsd4_compound_state cstate;
> };
>
> -static inline __be32 *
> -nfsd4_xdr_reserve_space(struct nfsd4_compoundres *resp, size_t nbytes)
> -{
> - __be32 *p = resp->p;
> - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end);
> - return p;
> -}
> -
> static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
> {
> struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
> _______________________________________________
> pNFS mailing list
> [email protected]
> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs