2011-02-23 00:09:46

by Benny Halevy

[permalink] [raw]
Subject: [PATCH] pnfsd-lexp: recall layout on truncate

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/pnfsd.h | 1 +
fs/nfsd/pnfsd_lexp.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/vfs.c | 4 ++
3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 946f334..51dd982 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -139,6 +139,7 @@ extern struct sockaddr pnfsd_lexp_addr;
extern size_t pnfs_lexp_addr_len;

extern void pnfsd_lexp_init(struct inode *);
+extern int pnfsd_lexp_recall_layout(struct inode *);
#endif /* CONFIG_PNFSD_LOCAL_EXPORT */

#endif /* LINUX_NFSD_PNFSD_H */
diff --git a/fs/nfsd/pnfsd_lexp.c b/fs/nfsd/pnfsd_lexp.c
index bf2f403..736cc3f 100644
--- a/fs/nfsd/pnfsd_lexp.c
+++ b/fs/nfsd/pnfsd_lexp.c
@@ -18,6 +18,8 @@
* by David M. Richter <[email protected]>
*/

+#include <linux/sched.h>
+#include <linux/wait.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/nfsd/nfs4layoutxdr.h>

@@ -28,6 +30,8 @@
struct sockaddr pnfsd_lexp_addr;
size_t pnfs_lexp_addr_len;

+static wait_queue_head_t lo_recall_wq;
+
static int
pnfsd_lexp_layout_type(struct super_block *sb)
{
@@ -196,8 +200,7 @@ static int
pnfsd_lexp_layout_return(struct inode *inode,
const struct nfsd4_pnfs_layoutreturn_arg *args)
{
- dprintk("%s: (unimplemented)\n", __func__);
-
+ wake_up_all(&lo_recall_wq);
return 0;
}

@@ -220,6 +223,75 @@ static struct pnfs_export_operations pnfsd_lexp_ops = {
void
pnfsd_lexp_init(struct inode *inode)
{
+ static bool init_once;
+
dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
+
+ if (!init_once++)
+ init_waitqueue_head(&lo_recall_wq);
+}
+
+static bool
+has_no_layout(struct nfs4_file *fp)
+{
+ return list_empty(&fp->fi_layouts);
+}
+
+/*
+ * recalls the layout if needed and waits synchronously for its return
+ */
+int
+pnfsd_lexp_recall_layout(struct inode *inode)
+{
+ struct nfs4_file *fp;
+ struct nfsd4_pnfs_cb_layout cbl;
+ struct pnfsd_cb_ctl cb_ctl;
+ int status = 0;
+
+ dprintk("%s: begin\n", __func__);
+ fp = find_file(inode);
+ BUG_ON(!fp);
+
+ if (has_no_layout(fp))
+ goto out;
+
+ memset(&cb_ctl, 0, sizeof(cb_ctl));
+ status = pnfsd_get_cb_op(&cb_ctl);
+ BUG_ON(status);
+
+ memset(&cbl, 0, sizeof(cbl));
+ cbl.cbl_recall_type = RETURN_FILE;
+ cbl.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
+ /* for now, always recall the whole layout */
+ cbl.cbl_seg.iomode = IOMODE_ANY;
+ cbl.cbl_seg.offset = 0;
+ cbl.cbl_seg.length = NFS4_MAX_UINT64;
+
+ while (!has_no_layout(fp)) {
+ dprintk("%s: recalling layout\n", __func__);
+ status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
+
+ switch (status) {
+ case 0:
+ case -EAGAIN:
+ break;
+ case -ENOENT: /* no matching layout */
+ status = 0;
+ goto out_put_cb;
+ default:
+ goto out_put_cb;
+ }
+
+ dprintk("%s: waiting\n", __func__);
+ status = wait_event_interruptible(lo_recall_wq, has_no_layout(fp));
+ if (status)
+ break;
+ }
+out_put_cb:
+ pnfsd_put_cb_op(&cb_ctl);
+out:
+ put_nfs4_file(fp);
+ dprintk("%s: status=%d\n", __func__, status);
+ return status;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 79ba25f..021e89e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -36,6 +36,7 @@
#ifdef CONFIG_NFSD_V4
#include "acl.h"
#include "idmap.h"
+#include "pnfsd.h"
#include <linux/nfsd4_spnfs.h>
#endif /* CONFIG_NFSD_V4 */
#if defined(CONFIG_SPNFS_BLOCK)
@@ -384,6 +385,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
+#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
+ pnfsd_lexp_recall_layout(inode);
+#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
#if defined(CONFIG_SPNFS_BLOCK)
if (pnfs_block_enabled(inode, 0)) {
err = bl_layoutrecall(inode, RETURN_FILE,
--
1.7.3.4



2011-02-23 17:55:59

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH] pnfsd-lexp: recall layout on truncate

On 2011-02-23 09:18, Boaz Harrosh wrote:
> On 02/22/2011 04:09 PM, Benny Halevy wrote:
>> Signed-off-by: Benny Halevy <[email protected]>
>> @@ -384,6 +385,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
>> NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
>> if (err)
>> goto out;
>> +#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
>> + pnfsd_lexp_recall_layout(inode);
>
> Ha, I just realized, this is not good, for when CONFIG_PNFSD_LOCAL_EXPORT is set
> but the actual sb is pnfs-exported by the file system like gfs2 and exofs.
> You need to check if the FS is owned by the PNFSD_LOCAL_EXPORT.

Good point. Can you please test the following patch?

>From 9afc5e82390fe889aee2770eb217613146ddc9de Mon Sep 17 00:00:00 2001
From: Benny Halevy <[email protected]>
Date: Wed, 23 Feb 2011 09:53:15 -0800
Subject: [PATCH] SQUASHME: pnfsd-lexp: recall layout on truncate only if inode is exported over pnfsd-lexp

Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfsd/pnfsd.h | 1 +
fs/nfsd/pnfsd_lexp.c | 6 ++++++
fs/nfsd/vfs.c | 3 ++-
3 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
index 51dd982..c11d93d 100644
--- a/fs/nfsd/pnfsd.h
+++ b/fs/nfsd/pnfsd.h
@@ -139,6 +139,7 @@ extern struct sockaddr pnfsd_lexp_addr;
extern size_t pnfs_lexp_addr_len;

extern void pnfsd_lexp_init(struct inode *);
+extern bool is_inode_pnfsd_lexp(struct inode *);
extern int pnfsd_lexp_recall_layout(struct inode *);
#endif /* CONFIG_PNFSD_LOCAL_EXPORT */

diff --git a/fs/nfsd/pnfsd_lexp.c b/fs/nfsd/pnfsd_lexp.c
index 736cc3f..71d503e 100644
--- a/fs/nfsd/pnfsd_lexp.c
+++ b/fs/nfsd/pnfsd_lexp.c
@@ -232,6 +232,12 @@ pnfsd_lexp_init(struct inode *inode)
init_waitqueue_head(&lo_recall_wq);
}

+bool
+is_inode_pnfsd_lexp(struct inode *inode)
+{
+ return inode->i_sb->s_pnfs_op == &pnfsd_lexp_ops;
+}
+
static bool
has_no_layout(struct nfs4_file *fp)
{
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 021e89e..2a3eff2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -386,7 +386,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (err)
goto out;
#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
- pnfsd_lexp_recall_layout(inode);
+ if (is_inode_pnfsd_lexp(inode))
+ pnfsd_lexp_recall_layout(inode);
#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
#if defined(CONFIG_SPNFS_BLOCK)
if (pnfs_block_enabled(inode, 0)) {
--
1.7.3.4


2011-02-23 05:07:37

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH] pnfsd-lexp: recall layout on truncate

On 02/22/2011 04:09 PM, Benny Halevy wrote:
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> fs/nfsd/pnfsd.h | 1 +
> fs/nfsd/pnfsd_lexp.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/vfs.c | 4 ++
> 3 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
> index 946f334..51dd982 100644
> --- a/fs/nfsd/pnfsd.h
> +++ b/fs/nfsd/pnfsd.h
> @@ -139,6 +139,7 @@ extern struct sockaddr pnfsd_lexp_addr;
> extern size_t pnfs_lexp_addr_len;
>
> extern void pnfsd_lexp_init(struct inode *);
> +extern int pnfsd_lexp_recall_layout(struct inode *);
> #endif /* CONFIG_PNFSD_LOCAL_EXPORT */
>
> #endif /* LINUX_NFSD_PNFSD_H */
> diff --git a/fs/nfsd/pnfsd_lexp.c b/fs/nfsd/pnfsd_lexp.c
> index bf2f403..736cc3f 100644
> --- a/fs/nfsd/pnfsd_lexp.c
> +++ b/fs/nfsd/pnfsd_lexp.c
> @@ -18,6 +18,8 @@
> * by David M. Richter <[email protected]>
> */
>
> +#include <linux/sched.h>
> +#include <linux/wait.h>
> #include <linux/sunrpc/svc_xprt.h>
> #include <linux/nfsd/nfs4layoutxdr.h>
>
> @@ -28,6 +30,8 @@
> struct sockaddr pnfsd_lexp_addr;
> size_t pnfs_lexp_addr_len;
>
> +static wait_queue_head_t lo_recall_wq;
> +
> static int
> pnfsd_lexp_layout_type(struct super_block *sb)
> {
> @@ -196,8 +200,7 @@ static int
> pnfsd_lexp_layout_return(struct inode *inode,
> const struct nfsd4_pnfs_layoutreturn_arg *args)
> {
> - dprintk("%s: (unimplemented)\n", __func__);
> -
> + wake_up_all(&lo_recall_wq);
> return 0;
> }
>
> @@ -220,6 +223,75 @@ static struct pnfs_export_operations pnfsd_lexp_ops = {
> void
> pnfsd_lexp_init(struct inode *inode)
> {
> + static bool init_once;
> +
> dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
> inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
> +
> + if (!init_once++)
> + init_waitqueue_head(&lo_recall_wq);
> +}
> +
> +static bool
> +has_no_layout(struct nfs4_file *fp)
> +{
> + return list_empty(&fp->fi_layouts);
> +}

I don't like that negative logic naming it's better
to ask for positive return and do the !empty inside

Note the
+ while (!has_no_layout(fp)) {

Below. Double negative in a single statement is to much
for my 50 years old brain

> +
> +/*
> + * recalls the layout if needed and waits synchronously for its return
> + */
> +int
> +pnfsd_lexp_recall_layout(struct inode *inode)
> +{
> + struct nfs4_file *fp;
> + struct nfsd4_pnfs_cb_layout cbl;
> + struct pnfsd_cb_ctl cb_ctl;
> + int status = 0;
> +
> + dprintk("%s: begin\n", __func__);
> + fp = find_file(inode);
> + BUG_ON(!fp);
> +
> + if (has_no_layout(fp))
> + goto out;
> +
> + memset(&cb_ctl, 0, sizeof(cb_ctl));
> + status = pnfsd_get_cb_op(&cb_ctl);

You are inside nfsd module so you don't need this
reference taking and call through a vector. But
I guess since it's only for testing It is good as
a documentation example of use.

> + BUG_ON(status);
> +
> + memset(&cbl, 0, sizeof(cbl));
> + cbl.cbl_recall_type = RETURN_FILE;
> + cbl.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
> + /* for now, always recall the whole layout */
> + cbl.cbl_seg.iomode = IOMODE_ANY;
> + cbl.cbl_seg.offset = 0;
> + cbl.cbl_seg.length = NFS4_MAX_UINT64;
> +
> + while (!has_no_layout(fp)) {
> + dprintk("%s: recalling layout\n", __func__);
> + status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
> +
> + switch (status) {
> + case 0:
> + case -EAGAIN:
> + break;
> + case -ENOENT: /* no matching layout */
> + status = 0;
> + goto out_put_cb;
> + default:
> + goto out_put_cb;
> + }
> +
> + dprintk("%s: waiting\n", __func__);
> + status = wait_event_interruptible(lo_recall_wq, has_no_layout(fp));
> + if (status)
> + break;
> + }
> +out_put_cb:
> + pnfsd_put_cb_op(&cb_ctl);
> +out:
> + put_nfs4_file(fp);
> + dprintk("%s: status=%d\n", __func__, status);
> + return status;
> }
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 79ba25f..021e89e 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -36,6 +36,7 @@
> #ifdef CONFIG_NFSD_V4
> #include "acl.h"
> #include "idmap.h"
> +#include "pnfsd.h"
> #include <linux/nfsd4_spnfs.h>
> #endif /* CONFIG_NFSD_V4 */
> #if defined(CONFIG_SPNFS_BLOCK)
> @@ -384,6 +385,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
> NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
> if (err)
> goto out;
> +#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
> + pnfsd_lexp_recall_layout(inode);

This is not enough. You need to lock with the layout_get code
to not give any layouts from before the issued recall till after
the execution of the truncate. Any layout_gets between that time should
return NFS_ERROR_DELAY or so.

See exofs/export.c for an example

Boaz

> +#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
> #if defined(CONFIG_SPNFS_BLOCK)
> if (pnfs_block_enabled(inode, 0)) {
> err = bl_layoutrecall(inode, RETURN_FILE,


2011-02-23 17:19:51

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH] pnfsd-lexp: recall layout on truncate

On 02/22/2011 04:09 PM, Benny Halevy wrote:
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> fs/nfsd/pnfsd.h | 1 +
> fs/nfsd/pnfsd_lexp.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/vfs.c | 4 ++
> 3 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h
> index 946f334..51dd982 100644
> --- a/fs/nfsd/pnfsd.h
> +++ b/fs/nfsd/pnfsd.h
> @@ -139,6 +139,7 @@ extern struct sockaddr pnfsd_lexp_addr;
> extern size_t pnfs_lexp_addr_len;
>
> extern void pnfsd_lexp_init(struct inode *);
> +extern int pnfsd_lexp_recall_layout(struct inode *);
> #endif /* CONFIG_PNFSD_LOCAL_EXPORT */
>
> #endif /* LINUX_NFSD_PNFSD_H */
> diff --git a/fs/nfsd/pnfsd_lexp.c b/fs/nfsd/pnfsd_lexp.c
> index bf2f403..736cc3f 100644
> --- a/fs/nfsd/pnfsd_lexp.c
> +++ b/fs/nfsd/pnfsd_lexp.c
> @@ -18,6 +18,8 @@
> * by David M. Richter <[email protected]>
> */
>
> +#include <linux/sched.h>
> +#include <linux/wait.h>
> #include <linux/sunrpc/svc_xprt.h>
> #include <linux/nfsd/nfs4layoutxdr.h>
>
> @@ -28,6 +30,8 @@
> struct sockaddr pnfsd_lexp_addr;
> size_t pnfs_lexp_addr_len;
>
> +static wait_queue_head_t lo_recall_wq;
> +
> static int
> pnfsd_lexp_layout_type(struct super_block *sb)
> {
> @@ -196,8 +200,7 @@ static int
> pnfsd_lexp_layout_return(struct inode *inode,
> const struct nfsd4_pnfs_layoutreturn_arg *args)
> {
> - dprintk("%s: (unimplemented)\n", __func__);
> -
> + wake_up_all(&lo_recall_wq);
> return 0;
> }
>
> @@ -220,6 +223,75 @@ static struct pnfs_export_operations pnfsd_lexp_ops = {
> void
> pnfsd_lexp_init(struct inode *inode)
> {
> + static bool init_once;
> +
> dprintk("%s: &pnfsd_lexp_ops=%p\n", __func__, &pnfsd_lexp_ops);
> inode->i_sb->s_pnfs_op = &pnfsd_lexp_ops;
> +
> + if (!init_once++)
> + init_waitqueue_head(&lo_recall_wq);
> +}
> +
> +static bool
> +has_no_layout(struct nfs4_file *fp)
> +{
> + return list_empty(&fp->fi_layouts);
> +}
> +
> +/*
> + * recalls the layout if needed and waits synchronously for its return
> + */
> +int
> +pnfsd_lexp_recall_layout(struct inode *inode)
> +{
> + struct nfs4_file *fp;
> + struct nfsd4_pnfs_cb_layout cbl;
> + struct pnfsd_cb_ctl cb_ctl;
> + int status = 0;
> +
> + dprintk("%s: begin\n", __func__);
> + fp = find_file(inode);
> + BUG_ON(!fp);
> +
> + if (has_no_layout(fp))
> + goto out;
> +
> + memset(&cb_ctl, 0, sizeof(cb_ctl));
> + status = pnfsd_get_cb_op(&cb_ctl);
> + BUG_ON(status);
> +
> + memset(&cbl, 0, sizeof(cbl));
> + cbl.cbl_recall_type = RETURN_FILE;
> + cbl.cbl_seg.layout_type = LAYOUT_NFSV4_1_FILES;
> + /* for now, always recall the whole layout */
> + cbl.cbl_seg.iomode = IOMODE_ANY;
> + cbl.cbl_seg.offset = 0;
> + cbl.cbl_seg.length = NFS4_MAX_UINT64;
> +
> + while (!has_no_layout(fp)) {
> + dprintk("%s: recalling layout\n", __func__);
> + status = cb_ctl.cb_op->cb_layout_recall(inode->i_sb, inode, &cbl);
> +
> + switch (status) {
> + case 0:
> + case -EAGAIN:
> + break;
> + case -ENOENT: /* no matching layout */
> + status = 0;
> + goto out_put_cb;
> + default:
> + goto out_put_cb;
> + }
> +
> + dprintk("%s: waiting\n", __func__);
> + status = wait_event_interruptible(lo_recall_wq, has_no_layout(fp));
> + if (status)
> + break;
> + }
> +out_put_cb:
> + pnfsd_put_cb_op(&cb_ctl);
> +out:
> + put_nfs4_file(fp);
> + dprintk("%s: status=%d\n", __func__, status);
> + return status;
> }
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 79ba25f..021e89e 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -36,6 +36,7 @@
> #ifdef CONFIG_NFSD_V4
> #include "acl.h"
> #include "idmap.h"
> +#include "pnfsd.h"
> #include <linux/nfsd4_spnfs.h>
> #endif /* CONFIG_NFSD_V4 */
> #if defined(CONFIG_SPNFS_BLOCK)
> @@ -384,6 +385,9 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
> NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
> if (err)
> goto out;
> +#if defined(CONFIG_PNFSD_LOCAL_EXPORT)
> + pnfsd_lexp_recall_layout(inode);

Ha, I just realized, this is not good, for when CONFIG_PNFSD_LOCAL_EXPORT is set
but the actual sb is pnfs-exported by the file system like gfs2 and exofs.
You need to check if the FS is owned by the PNFSD_LOCAL_EXPORT.

Boaz

> +#endif /* CONFIG_PNFSD_LOCAL_EXPORT */
> #if defined(CONFIG_SPNFS_BLOCK)
> if (pnfs_block_enabled(inode, 0)) {
> err = bl_layoutrecall(inode, RETURN_FILE,