2010-06-08 22:14:49

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 0/7] forgetful client

This set of patches (2.6.35-rc2) includes a first attempt to implement
the forgetful client model for the pNFS client. The model
is explained is patch 6.
It also includes some minor cleanups in the layout management code
that help to improve the maintanability of the current code.

Passed cthon tests against the pyNFS server, and against a modified
version of pyNFS server that randomly issues layout recalls after opens.

Alexandros Batsakis (7):
pnfs-submit: clean struct nfs_inode
pnfs-submit: remove lgetcount, lretcount
pnfs-submit: change stateid to be a union
pnfs-submit: request whole-file layouts only
pnfs-submit: change layout list to be similar to other state lists
pnfs-submit: forgetful client (layouts)
pnfs-submit: support for CB_RECALL_ANY (layouts)

fs/nfs/callback.h | 7 ++
fs/nfs/callback_proc.c | 231 ++++++++++++++++++++++++++++++++++-----------
fs/nfs/callback_xdr.c | 2 +-
fs/nfs/client.c | 2 +-
fs/nfs/delegation.c | 19 ++--
fs/nfs/inode.c | 12 +-
fs/nfs/nfs4_fs.h | 1 +
fs/nfs/nfs4proc.c | 46 +++++----
fs/nfs/nfs4state.c | 4 +-
fs/nfs/nfs4xdr.c | 38 ++++----
fs/nfs/pnfs.c | 214 +++++++++++++++++------------------------
fs/nfs/pnfs.h | 3 +-
fs/nfsd/nfs4callback.c | 1 -
include/linux/nfs4.h | 16 +++-
include/linux/nfs4_pnfs.h | 2 +-
include/linux/nfs_fs.h | 28 +++---
include/linux/nfs_fs_sb.h | 2 +-
17 files changed, 372 insertions(+), 256 deletions(-)



2010-06-08 22:14:50

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 5/7] pnfs-submit: change layout list to be similar to other state lists

The current design keeps a list (nfs_client) of inodes having layouts.
In order to make that code more similar to delegation handling (and in general to the rest of the NFS code),
this patch changes the list element to layouts directly.
No backpointer from the layout to the inode is needed as the inode can be accesed by a container_of() call

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/callback_proc.c | 9 +++++++--
fs/nfs/client.c | 2 +-
fs/nfs/inode.c | 4 ++--
fs/nfs/pnfs.c | 10 ++++------
include/linux/nfs_fs.h | 4 +---
include/linux/nfs_fs_sb.h | 2 +-
6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 16b4510..3bae785 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -76,7 +76,6 @@ static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs
return nfs4_validate_delegation_stateid;
}

-
__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
{
struct nfs_client *clp;
@@ -140,6 +139,7 @@ nfs_layoutrecall_find_inode(struct nfs_client *clp,
const struct cb_pnfs_layoutrecallargs *args)
{
struct nfs_inode *nfsi;
+ struct pnfs_layout_type *layout;
struct nfs_server *server;
struct inode *ino = NULL;

@@ -147,9 +147,14 @@ nfs_layoutrecall_find_inode(struct nfs_client *clp,
__func__, args->cbl_recall_type, clp);

spin_lock(&clp->cl_lock);
- list_for_each_entry(nfsi, &clp->cl_lo_inodes, lo_inodes) {
+ list_for_each_entry(layout, &clp->cl_layouts, lo_layouts) {
+ nfsi = PNFS_NFS_INODE(layout);
+ if (!nfsi)
+ continue;
+
dprintk("%s: Searching inode=%lu\n",
__func__, nfsi->vfs_inode.i_ino);
+
if (args->cbl_recall_type == RETURN_FILE) {
if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
continue;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 09ab4ea..b9abf15 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -157,7 +157,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
#if defined(CONFIG_NFS_V4_1)
- INIT_LIST_HEAD(&clp->cl_lo_inodes);
+ INIT_LIST_HEAD(&clp->cl_layouts);
#endif
nfs_fscache_get_client_cookie(clp);

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d43f2c5..b632ba7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1400,7 +1400,7 @@ static void pnfs_destroy_inode(struct nfs_inode *nfsi)
if (!list_empty(&nfsi->layout.segs))
pnfs_destroy_layout(nfsi);

- BUG_ON(!list_empty(&nfsi->lo_inodes));
+ BUG_ON(!list_empty(&nfsi->layout.lo_layouts));
BUG_ON(!list_empty(&nfsi->layout.segs));
BUG_ON(nfsi->layout.refcount);
BUG_ON(nfsi->layout.ld_data);
@@ -1418,10 +1418,10 @@ void nfs_destroy_inode(struct inode *inode)
static void pnfs_init_once(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
- INIT_LIST_HEAD(&nfsi->lo_inodes);
init_waitqueue_head(&nfsi->lo_waitq);
spin_lock_init(&nfsi->lo_lock);
seqlock_init(&nfsi->layout.seqlock);
+ INIT_LIST_HEAD(&nfsi->layout.lo_layouts);
INIT_LIST_HEAD(&nfsi->layout.segs);
nfsi->layout.refcount = 0;
nfsi->layout.ld_data = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 04a36db..bdd0d19 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -351,10 +351,10 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
io_ops->free_layout(lo->ld_data);
lo->ld_data = NULL;

- /* Unlist the inode. */
+ /* Unlist the layout. */
clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
spin_lock(&clp->cl_lock);
- list_del_init(&nfsi->lo_inodes);
+ list_del_init(&lo->lo_layouts);
spin_unlock(&clp->cl_lock);
}
spin_unlock(&nfsi->lo_lock);
@@ -866,10 +866,8 @@ alloc_init_layout(struct inode *ino)

BUG_ON(lo->ld_data != NULL);
lo->ld_data = ld_data;
- seqlock_init(&lo->seqlock);
memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
- INIT_LIST_HEAD(&lo->segs);
lo->roc_iomode = 0;
return lo;
}
@@ -925,8 +923,8 @@ get_lock_alloc_layout(struct inode *ino)
spin_lock(&nfsi->lo_lock);

spin_lock(&clp->cl_lock);
- if (list_empty(&nfsi->lo_inodes))
- list_add_tail(&nfsi->lo_inodes, &clp->cl_lo_inodes);
+ if (list_empty(&lo->lo_layouts))
+ list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
spin_unlock(&clp->cl_lock);
} else
lo = ERR_PTR(-ENOMEM);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f6e3e20..ee45eac 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -99,6 +99,7 @@ struct posix_acl;

struct pnfs_layout_type {
int refcount;
+ struct list_head lo_layouts; /* other client layouts */
struct list_head segs; /* layout segments list */
int roc_iomode; /* iomode to return on close, 0=none */
seqlock_t seqlock; /* Protects the stateid */
@@ -204,9 +205,6 @@ struct nfs_inode {

/* pNFS layout information */
#if defined(CONFIG_NFS_V4_1)
- /* Inodes having layouts */
- struct list_head lo_inodes;
-
wait_queue_head_t lo_waitq;
spinlock_t lo_lock;
struct pnfs_layout_type layout;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4d58efd..b6a23e6 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -85,7 +85,7 @@ struct nfs_client {
/* The flags used for obtaining the clientid during EXCHANGE_ID */
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
- struct list_head cl_lo_inodes; /* Inodes having layouts */
+ struct list_head cl_layouts;
struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
#endif /* CONFIG_NFS_V4_1 */

--
1.6.2.5


2010-06-08 22:14:50

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 6/7] pnfs-submit: forgetful client (layouts)

Forgetful client model:

If we receive a CB_LAYOUTRECALL
- we spawn a thread to handle the recall
(xxx: now only one recall can be active at a time, else NFS4ERR_DELAY)
- we check the stateid seqid
if it does not match we return NFS4ERR_DELAY
- we check for pending I/O
if there is we return NFS4ERR_DELAY
Else we return NO_MATCHING_LAYOUT.
Note that for whole file layouts there is no need to serialize LAYOUTGETs/LAYOUTRETURNs
For bulk layouts, if there is a layout active, we return NFS4_OK and we start
cleaning the layouts asynchronously. At the end we send a bulk LAYOUTRETURN.
Note that there is no need to prevent any new LAYOUTGETs explicitly as the server should reject them.

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/callback_proc.c | 146 ++++++++++++++++++++++++++++++++++--------------
fs/nfs/nfs4_fs.h | 1 +
fs/nfs/pnfs.c | 70 ++++++++++-------------
3 files changed, 136 insertions(+), 81 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 3bae785..abdbf40 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -129,6 +129,38 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf

#if defined(CONFIG_NFS_V4_1)

+static bool
+pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
+ const nfs4_stateid stateid)
+{
+ int seqlock;
+ bool res;
+ u32 oldseqid, newseqid;
+
+ do {
+ seqlock = read_seqbegin(&lo->seqlock);
+ oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
+ newseqid = be32_to_cpu(stateid.u.stateid.seqid);
+ res = !memcmp(lo->stateid.u.stateid.other,
+ stateid.u.stateid.other,
+ NFS4_STATEID_OTHER_SIZE);
+ if (res) { /* comparing layout stateids */
+ if (oldseqid == ~0)
+ res = (newseqid == 1);
+ else
+ res = (newseqid == oldseqid + 1);
+ } else { /* open stateid */
+ res = !memcmp(lo->stateid.u.data,
+ &zero_stateid,
+ NFS4_STATEID_SIZE);
+ if (res)
+ res = (newseqid == 1);
+ }
+ } while (read_seqretry(&lo->seqlock, seqlock));
+
+ return res;
+}
+
/*
* Retrieve an inode based on layout recall parameters
*
@@ -191,9 +223,10 @@ static int pnfs_recall_layout(void *data)
struct inode *inode, *ino;
struct nfs_client *clp;
struct cb_pnfs_layoutrecallargs rl;
+ struct nfs4_pnfs_layoutreturn *lrp;
struct recall_layout_threadargs *args =
(struct recall_layout_threadargs *)data;
- int status;
+ int status = 0;

daemonize("nfsv4-layoutreturn");

@@ -204,47 +237,59 @@ static int pnfs_recall_layout(void *data)
clp = args->clp;
inode = args->inode;
rl = *args->rl;
- args->result = 0;
- complete(&args->started);
- args = NULL;
- /* Note: args must not be used after this point!!! */
-
-/* FIXME: need barrier here:
- pause I/O to data servers
- pause layoutgets
- drain all outstanding writes to storage devices
- wait for any outstanding layoutreturns and layoutgets mentioned in
- cb_sequence.
- then return layouts, resume after layoutreturns complete
- */

/* support whole file layouts only */
rl.cbl_seg.offset = 0;
rl.cbl_seg.length = NFS4_MAX_UINT64;

if (rl.cbl_recall_type == RETURN_FILE) {
- status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
- RETURN_FILE, true);
+ if (pnfs_is_next_layout_stateid(&NFS_I(inode)->layout,
+ rl.cbl_stateid))
+ status = pnfs_return_layout(inode, &rl.cbl_seg,
+ &rl.cbl_stateid, RETURN_FILE,
+ false);
+ else
+ status = cpu_to_be32(NFS4ERR_DELAY);
if (status)
dprintk("%s RETURN_FILE error: %d\n", __func__, status);
+ else
+ status = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
+ args->result = status;
+ complete(&args->started);
goto out;
}

- /* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
+ status = cpu_to_be32(NFS4_OK);
+ args->result = status;
+ complete(&args->started);
+ args = NULL;
+
+ /* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
- /* XXX need to check status on pnfs_return_layout */
- pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, true);
+ /* FIXME: need to check status on pnfs_return_layout */
+ pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
iput(ino);
}

+ lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+ if (!lrp) {
+ dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
+ __func__);
+ goto out;
+ }
+
/* send final layoutreturn */
- status = pnfs_return_layout(inode, &rl.cbl_seg, NULL,
- rl.cbl_recall_type, true);
- if (status)
- printk(KERN_INFO "%s: ignoring pnfs_return_layout status=%d\n",
- __func__, status);
+ lrp->args.reclaim = 0;
+ lrp->args.layout_type = rl.cbl_layout_type;
+ lrp->args.return_type = rl.cbl_recall_type;
+ lrp->args.lseg = rl.cbl_seg;
+ lrp->args.inode = inode;
+ lrp->lo = NULL;
+ pnfs4_proc_layoutreturn(lrp, true);
+
out:
- iput(inode);
+ clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
+ nfs_put_client(clp);
module_put_and_exit(0);
dprintk("%s: exit status %d\n", __func__, 0);
return 0;
@@ -262,15 +307,18 @@ static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
.rl = rl,
};
struct task_struct *t;
- int status;
-
- /* should have returned NFS4ERR_NOMATCHING_LAYOUT... */
- BUG_ON(inode == NULL);
+ int status = -EAGAIN;

dprintk("%s: -->\n", __func__);

+ /* FIXME: do not allow two concurrent layout recalls */
+ if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
+ return status;
+
init_completion(&data.started);
__module_get(THIS_MODULE);
+ if (!atomic_inc_not_zero(&clp->cl_count))
+ goto out_put_no_client;

t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
if (IS_ERR(t)) {
@@ -284,6 +332,9 @@ static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
wait_for_completion(&data.started);
return data.result;
out_module_put:
+ nfs_put_client(clp);
+out_put_no_client:
+ clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
module_put(THIS_MODULE);
return status;
}
@@ -294,35 +345,46 @@ __be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
struct nfs_client *clp;
struct inode *inode = NULL;
__be32 res;
+ int status;
unsigned int num_client = 0;

dprintk("%s: -->\n", __func__);

- res = htonl(NFS4ERR_INVAL);
- clp = nfs_find_client(args->cbl_addr, 4);
+ res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ clp = nfs_find_client(args->cbl_addr, 4);
if (clp == NULL) {
dprintk("%s: no client for addr %u.%u.%u.%u\n",
__func__, NIPQUAD(args->cbl_addr));
goto out;
}

- res = htonl(NFS4ERR_NOMATCHING_LAYOUT);
+ res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
do {
struct nfs_client *prev = clp;
num_client++;
- inode = nfs_layoutrecall_find_inode(clp, args);
- if (inode != NULL) {
- if (PNFS_LD(&NFS_I(inode)->layout)->id ==
- args->cbl_layout_type) {
- /* Set up a helper thread to actually
- * return the delegation */
- res = pnfs_async_return_layout(clp, inode, args);
- if (res != 0)
- res = htonl(NFS4ERR_RESOURCE);
- break;
+ /* the callback must come from the MDS personality */
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ goto loop;
+ if (args->cbl_recall_type == RETURN_FILE) {
+ inode = nfs_layoutrecall_find_inode(clp, args);
+ if (inode != NULL) {
+ status = pnfs_async_return_layout(clp, inode,
+ args);
+ if (status)
+ res = cpu_to_be32(NFS4ERR_DELAY);
+ iput(inode);
}
+ } else { /* _ALL or _FSID */
+ /* we need the inode to get the nfs_server struct */
+ inode = nfs_layoutrecall_find_inode(clp, args);
+ if (!inode)
+ goto loop;
+ status = pnfs_async_return_layout(clp, inode, args);
+ if (status)
+ res = cpu_to_be32(NFS4ERR_DELAY);
iput(inode);
}
+loop:
clp = nfs_find_client_next(prev);
nfs_put_client(prev);
} while (clp != NULL);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ebc9b3b..2f7974b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
NFS4CLNT_SESSION_RESET,
NFS4CLNT_SESSION_DRAINING,
NFS4CLNT_RECALL_SLOT,
+ NFS4CLNT_LAYOUT_RECALL,
};

/*
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bdd0d19..42c46d8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -687,6 +687,8 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,

dprintk("--> %s\n", __func__);

+ BUG_ON(type != RETURN_FILE);
+
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (lrp == NULL) {
if (lo && (type == RETURN_FILE))
@@ -723,13 +725,11 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,

dprintk("--> %s type %d\n", __func__, type);

- if (range)
- arg = *range;
- else {
- arg.iomode = IOMODE_ANY;
- arg.offset = 0;
- arg.length = NFS4_MAX_UINT64;
- }
+
+ arg.iomode = range ? range->iomode : IOMODE_ANY;
+ arg.offset = 0;
+ arg.length = NFS4_MAX_UINT64;
+
if (type == RETURN_FILE) {
lo = get_lock_current_layout(nfsi);
if (lo && !has_layout_to_return(lo, &arg)) {
@@ -738,11 +738,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
}
if (!lo) {
dprintk("%s: no layout segments to return\n", __func__);
- /* must send the LAYOUTRETURN in response to recall */
- if (stateid)
- goto send_return;
- else
- goto out;
+ goto out;
}

/* unlock w/o put rebalanced by eventual call to
@@ -751,12 +747,23 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
spin_unlock(&nfsi->lo_lock);

if (pnfs_return_layout_barrier(nfsi, &arg)) {
+ if (stateid) { /* callback */
+ status = -EAGAIN;
+ spin_lock(&nfsi->lo_lock);
+ put_unlock_current_layout(lo);
+ goto out;
+ }
dprintk("%s: waiting\n", __func__);
wait_event(nfsi->lo_waitq,
- !pnfs_return_layout_barrier(nfsi, &arg));
+ !pnfs_return_layout_barrier(nfsi, &arg));
}

if (layoutcommit_needed(nfsi)) {
+ if (stateid && !wait) { /* callback */
+ dprintk("%s: layoutcommit pending\n", __func__);
+ status = -EAGAIN;
+ goto out;
+ }
status = pnfs_layoutcommit_inode(ino, wait);
if (status) {
dprintk("%s: layoutcommit failed, status=%d. "
@@ -765,9 +772,13 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
status = 0;
}
}
+
+ if (stateid && wait)
+ status = return_layout(ino, &arg, stateid, type,
+ lo, wait);
+ else
+ pnfs_layout_release(lo, &arg);
}
-send_return:
- status = return_layout(ino, &arg, stateid, type, lo, wait);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
@@ -1022,7 +1033,7 @@ pnfs_update_layout(struct inode *ino,
struct nfs4_pnfs_layout_segment arg = {
.iomode = iomode,
.offset = 0,
- .length = ~0
+ .length = NFS4_MAX_UINT64,
};
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_type *lo;
@@ -1041,31 +1052,12 @@ pnfs_update_layout(struct inode *ino,
/* Check to see if the layout for the given range already exists */
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (lseg && !lseg->valid) {
- spin_unlock(&nfsi->lo_lock);
if (take_ref)
put_lseg(lseg);
- for (;;) {
- prepare_to_wait(&nfsi->lo_waitq, &__wait,
- TASK_KILLABLE);
- spin_lock(&nfsi->lo_lock);
- lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
- if (!lseg || lseg->valid)
- break;
- dprintk("%s: invalid lseg %p ref %d\n", __func__,
- lseg, atomic_read(&lseg->kref.refcount)-1);
- if (take_ref)
- put_lseg(lseg);
- if (signal_pending(current)) {
- lseg = NULL;
- result = -ERESTARTSYS;
- break;
- }
- spin_unlock(&nfsi->lo_lock);
- schedule();
- }
- finish_wait(&nfsi->lo_waitq, &__wait);
- if (result)
- goto out_put;
+
+ /* someone is cleaning the layout */
+ result = -EAGAIN;
+ goto out_put;
}

if (lseg) {
--
1.6.2.5


2010-06-08 22:14:49

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 2/7] pnfs-submit: remove lgetcount, lretcount

This is in order to prepare for the forgetful client. There is no need to explicitly count the number of outstanding layout operations, as the protocol has provision for it (seqid of stateid -- e.g. section 12.5.5.2.1.2). As long as no requests for intersecting layouts are issued LAYOUTGETs/LAYOUTRETURNs can be sent in parallel

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/nfs4proc.c | 5 ++---
fs/nfs/pnfs.c | 47 ++++++++++++-----------------------------------
fs/nfs/pnfs.h | 3 +--
include/linux/nfs_fs.h | 2 --
4 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f962f92..bf854fe 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5540,7 +5540,7 @@ static void nfs4_pnfs_layoutget_release(void *calldata)
struct nfs4_pnfs_layoutget *lgp = calldata;

dprintk("--> %s\n", __func__);
- pnfs_layout_release(lgp->lo, &lgp->lo->lgetcount, NULL);
+ pnfs_layout_release(lgp->lo, NULL);
if (lgp->res.layout.buf != NULL)
free_page((unsigned long) lgp->res.layout.buf);
kfree(calldata);
@@ -5761,8 +5761,7 @@ static void nfs4_pnfs_layoutreturn_release(void *calldata)
if (lrp->lo && (lrp->args.return_type == RETURN_FILE)) {
if (!lrp->res.lrs_present)
pnfs_set_layout_stateid(lrp->lo, &zero_stateid);
- pnfs_layout_release(lrp->lo, &lrp->lo->lretcount,
- &lrp->args.lseg);
+ pnfs_layout_release(lrp->lo, &lrp->args.lseg);
}
kfree(calldata);
dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8620f68..4e35ad9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,7 +361,7 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
}

void
-pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
+pnfs_layout_release(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range)
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
@@ -369,7 +369,6 @@ pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
spin_lock(&nfsi->lo_lock);
if (range)
pnfs_free_layout(lo, range);
- atomic_dec(count);
put_unlock_current_layout(lo);
wake_up_all(&nfsi->lo_waitq);
}
@@ -552,7 +551,7 @@ get_layout(struct inode *ino,

lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
if (lgp == NULL) {
- pnfs_layout_release(lo, &lo->lgetcount, NULL);
+ pnfs_layout_release(lo, NULL);
return -ENOMEM;
}
lgp->lo = lo;
@@ -626,6 +625,13 @@ has_layout_to_return(struct pnfs_layout_type *lo,
return out;
}

+static inline bool
+_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
+{
+ return atomic_read(&lseg->kref.refcount) == 1;
+}
+
+
static void
pnfs_free_layout(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range)
@@ -636,7 +642,8 @@ pnfs_free_layout(struct pnfs_layout_type *lo,

BUG_ON_UNLOCKED_LO(lo);
list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
- if (!should_free_lseg(lseg, range))
+ if (!should_free_lseg(lseg, range) ||
+ !_pnfs_can_return_lseg(lseg))
continue;
dprintk("%s: freeing lseg %p iomode %d "
"offset %llu length %llu\n", __func__,
@@ -649,12 +656,6 @@ pnfs_free_layout(struct pnfs_layout_type *lo,
dprintk("%s:Return\n", __func__);
}

-static inline bool
-_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
-{
- return atomic_read(&lseg->kref.refcount) == 1;
-}
-
static bool
pnfs_return_layout_barrier(struct nfs_inode *nfsi,
struct nfs4_pnfs_layout_segment *range)
@@ -674,10 +675,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
ret = true;
}
}
- if (atomic_read(&nfsi->layout.lgetcount))
- ret = true;
spin_unlock(&nfsi->lo_lock);
-
dprintk("%s:Return %d\n", __func__, ret);
return ret;
}
@@ -697,7 +695,7 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (lrp == NULL) {
if (lo && (type == RETURN_FILE))
- pnfs_layout_release(lo, &lo->lretcount, NULL);
+ pnfs_layout_release(lo, NULL);
goto out;
}
lrp->args.reclaim = 0;
@@ -752,9 +750,6 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
goto out;
}

- /* Matching dec is done in .rpc_release (on non-error paths) */
- atomic_inc(&lo->lretcount);
-
/* unlock w/o put rebalanced by eventual call to
* pnfs_layout_release
*/
@@ -879,8 +874,6 @@ alloc_init_layout(struct inode *ino)
seqlock_init(&lo->seqlock);
memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
- atomic_set(&lo->lgetcount, 0);
- atomic_set(&lo->lretcount, 0);
INIT_LIST_HEAD(&lo->segs);
lo->roc_iomode = 0;
return lo;
@@ -1020,19 +1013,6 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
return ret;
}

-/* Called with spin lock held */
-void drain_layoutreturns(struct pnfs_layout_type *lo)
-{
- while (atomic_read(&lo->lretcount)) {
- struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
-
- spin_unlock(&nfsi->lo_lock);
- dprintk("%s: waiting\n", __func__);
- wait_event(nfsi->lo_waitq, (atomic_read(&lo->lretcount) == 0));
- spin_lock(&nfsi->lo_lock);
- }
-}
-
/* Update the file's layout for the given range and iomode.
* Layout is retreived from the server if needed.
* If lsegpp is given, the appropriate layout segment is referenced and
@@ -1120,9 +1100,6 @@ pnfs_update_layout(struct inode *ino,
}
}

- drain_layoutreturns(lo);
- /* Matching dec is done in .rpc_release (on non-error paths) */
- atomic_inc(&lo->lgetcount);
/* Lose lock, but not reference, match this with pnfs_layout_release */
spin_unlock(&nfsi->lo_lock);

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c89be78..a71145e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -61,8 +61,7 @@ void pnfs_update_layout_commit(struct inode *, struct list_head *, pgoff_t, unsi
ssize_t pnfs_file_write(struct file *, const char __user *, size_t, loff_t *);
void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *, int rpc_status);
int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp);
-void pnfs_layout_release(struct pnfs_layout_type *, atomic_t *,
- struct nfs4_pnfs_layout_segment *range);
+void pnfs_layout_release(struct pnfs_layout_type *, struct nfs4_pnfs_layout_segment *range);
void pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
const nfs4_stateid *stateid);
void pnfs_destroy_layout(struct nfs_inode *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 45846c5..f6e3e20 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -99,8 +99,6 @@ struct posix_acl;

struct pnfs_layout_type {
int refcount;
- atomic_t lretcount; /* Layoutreturns outstanding */
- atomic_t lgetcount; /* Layoutgets outstanding */
struct list_head segs; /* layout segments list */
int roc_iomode; /* iomode to return on close, 0=none */
seqlock_t seqlock; /* Protects the stateid */
--
1.6.2.5


2010-06-08 22:14:50

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 3/7] pnfs-submit: change stateid to be a union

In NFSv4.1 the stateid consists of the other and seqid fields. For layout
processing we need to numerically compare the seqid value of layout stateids.
To do so, introduce a union to nfs4_stateid to swtich between opaque(16 bytes)
and opaque(12 bytes) / __be32

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/callback_proc.c | 13 +++++++------
fs/nfs/callback_xdr.c | 2 +-
fs/nfs/delegation.c | 19 +++++++++++--------
fs/nfs/nfs4proc.c | 41 +++++++++++++++++++++++++----------------
fs/nfs/nfs4state.c | 4 ++--
fs/nfs/nfs4xdr.c | 38 +++++++++++++++++++++-----------------
fs/nfs/pnfs.c | 11 ++++++-----
fs/nfsd/nfs4callback.c | 1 -
include/linux/nfs4.h | 16 ++++++++++++++--
9 files changed, 87 insertions(+), 58 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 0053fbb..8752239 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -121,8 +121,9 @@ out:

int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
{
- if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
+ if (delegation == NULL || memcmp(delegation->stateid.u.data,
+ stateid->u.data,
+ sizeof(delegation->stateid.u.data)))
return 0;
return 1;
}
@@ -331,11 +332,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
if (delegation == NULL)
return 0;

- /* seqid is 4-bytes long */
- if (((u32 *) &stateid->data)[0] != 0)
+ if (stateid->u.stateid.seqid != 0)
return 0;
- if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
- sizeof(stateid->data)-4))
+ if (memcmp(&delegation->stateid.u.stateid.other,
+ &stateid->u.stateid.other,
+ NFS4_STATEID_OTHER_SIZE))
return 0;

return 1;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 1856181..b88db4d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -137,7 +137,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
p = read_buf(xdr, 16);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
- memcpy(stateid->data, p, 16);
+ memcpy(stateid->u.data, p, 16);
return 0;
}

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 3016345..13b3e8d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -104,7 +104,8 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
- if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+ if (memcmp(state->stateid.u.data, stateid->u.data,
+ sizeof(state->stateid.u.data)) != 0)
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
if (delegation != NULL) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL) {
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
+ memcpy(delegation->stateid.u.data, res->delegation.u.data,
+ sizeof(delegation->stateid.u.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
oldcred = delegation->cred;
@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfs
if (delegation == NULL)
goto nomatch;
spin_lock(&delegation->lock);
- if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
+ if (stateid != NULL && memcmp(delegation->stateid.u.data,
+ stateid->u.data,
+ sizeof(delegation->stateid.u.data)) != 0)
goto nomatch_unlock;
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
if (delegation == NULL)
return -ENOMEM;
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
+ memcpy(delegation->stateid.u.data, res->delegation.u.data,
+ sizeof(delegation->stateid.u.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
delegation->change_attr = nfsi->change_attr;
@@ -562,7 +564,8 @@ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation != NULL) {
- memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+ memcpy(dst->u.data, delegation->stateid.u.data,
+ sizeof(dst->u.data));
ret = 1;
}
rcu_read_unlock();
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bf854fe..ee3e3bc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -867,8 +867,10 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
{
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
- memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+ memcpy(state->stateid.u.data, stateid->u.data,
+ sizeof(state->stateid.u.data));
+ memcpy(state->open_stateid.u.data, stateid->u.data,
+ sizeof(state->open_stateid.u.data));
switch (fmode) {
case FMODE_READ:
set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -896,7 +898,8 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
*/
write_seqlock(&state->seqlock);
if (deleg_stateid != NULL) {
- memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+ memcpy(state->stateid.u.data, deleg_stateid->u.data,
+ sizeof(state->stateid.u.data));
set_bit(NFS_DELEGATED_STATE, &state->flags);
}
if (open_stateid != NULL)
@@ -927,7 +930,8 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat

if (delegation == NULL)
delegation = &deleg_cur->stateid;
- else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
+ else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
+ NFS4_STATEID_SIZE) != 0)
goto no_delegation_unlock;

nfs_mark_delegation_referenced(deleg_cur);
@@ -989,7 +993,8 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
break;
}
/* Save the delegation */
- memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+ memcpy(stateid.u.data, delegation->stateid.u.data,
+ sizeof(stateid.u.data));
rcu_read_unlock();
ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
if (ret != 0)
@@ -1155,10 +1160,13 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
* Check if we need to update the current stateid.
*/
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
- memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+ memcmp(state->stateid.u.data, state->open_stateid.u.data,
+ sizeof(state->stateid.u.data)) != 0) {
write_seqlock(&state->seqlock);
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+ memcpy(state->stateid.u.data,
+ state->open_stateid.u.data,
+ sizeof(state->stateid.u.data));
write_sequnlock(&state->seqlock);
}
pnfs4_layout_reclaim(state);
@@ -1229,8 +1237,8 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
if (IS_ERR(opendata))
return PTR_ERR(opendata);
opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
- memcpy(opendata->o_arg.u.delegation.data, stateid->data,
- sizeof(opendata->o_arg.u.delegation.data));
+ memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
+ sizeof(opendata->o_arg.u.delegation.u.data));
ret = nfs4_open_recover(opendata, state);
nfs4_opendata_put(opendata);
return ret;
@@ -1288,8 +1296,8 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
if (RPC_ASSASSINATED(task))
return;
if (data->rpc_status == 0) {
- memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
- sizeof(data->o_res.stateid.data));
+ memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
+ sizeof(data->o_res.stateid.u.data));
nfs_confirm_seqid(&data->owner->so_seqid, 0);
renew_lease(data->o_res.server, data->timestamp);
data->rpc_done = 1;
@@ -4132,9 +4140,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
return;
switch (task->tk_status) {
case 0:
- memcpy(calldata->lsp->ls_stateid.data,
- calldata->res.stateid.data,
- sizeof(calldata->lsp->ls_stateid.data));
+ memcpy(calldata->lsp->ls_stateid.u.data,
+ calldata->res.stateid.u.data,
+ sizeof(calldata->lsp->ls_stateid.u.
+ data));
renew_lease(calldata->server, calldata->timestamp);
break;
case -NFS4ERR_BAD_STATEID:
@@ -4348,8 +4357,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
goto out;
}
if (data->rpc_status == 0) {
- memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
- sizeof(data->lsp->ls_stateid.data));
+ memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
+ sizeof(data->lsp->ls_stateid.u.data));
data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8185c1e..0f44704 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1055,8 +1055,8 @@ restart:
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
*/
- memset(state->stateid.data, 0,
- sizeof(state->stateid.data));
+ memset(state->stateid.u.data, 0,
+ sizeof(state->stateid.u.data));
/* Mark the file as being 'closed' */
state->state = 0;
break;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 31a4b89..301ae14 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -986,7 +986,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_CLOSE);
*p++ = cpu_to_be32(arg->seqid->sequence->counter);
- xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_close_maxsz;
}
@@ -1160,7 +1160,8 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
if (args->new_lock_owner){
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
- p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
+ NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
p = xdr_encode_hyper(p, args->lock_owner.clientid);
*p++ = cpu_to_be32(16);
@@ -1169,7 +1170,7 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
}
else {
p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
- p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->lock_seqid->sequence->counter);
}
hdr->nops++;
@@ -1201,7 +1202,8 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
*p++ = cpu_to_be32(OP_LOCKU);
*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
*p++ = cpu_to_be32(args->seqid->sequence->counter);
- p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
+ NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, args->fl->fl_start);
xdr_encode_hyper(p, nfs4_lock_length(args->fl));
hdr->nops++;
@@ -1351,7 +1353,7 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc

p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
- xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
encode_string(xdr, name->len, name->name);
}

@@ -1382,7 +1384,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co

p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
- p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(arg->seqid->sequence->counter);
hdr->nops++;
hdr->replen += decode_open_confirm_maxsz;
@@ -1394,7 +1396,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close

p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
- p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
hdr->nops++;
@@ -1432,9 +1434,10 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
- xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid.u.data,
+ NFS4_STATEID_SIZE);
} else
- xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
}

static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
@@ -1548,7 +1551,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun

p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_SETATTR);
- xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
p = reserve_space(xdr, 2*4);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(FATTR4_WORD0_ACL);
@@ -1579,7 +1582,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs

p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_SETATTR);
- xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_setattr_maxsz;
encode_attrs(xdr, arg->iap, server);
@@ -1642,7 +1645,7 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);

*p++ = cpu_to_be32(OP_DELEGRETURN);
- xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_delegreturn_maxsz;
}
@@ -1833,7 +1836,8 @@ encode_layoutget(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->lseg.offset);
p = xdr_encode_hyper(p, args->lseg.length);
p = xdr_encode_hyper(p, args->minlength);
- p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, &args->stateid.u.data,
+ NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->maxcount);

dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
@@ -1865,7 +1869,7 @@ encode_layoutcommit(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->lseg.offset);
p = xdr_encode_hyper(p, args->lseg.length);
*p++ = cpu_to_be32(0); /* reclaim */
- p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(1); /* newoffset = TRUE */
p = xdr_encode_hyper(p, args->lastbytewritten);
*p = cpu_to_be32(args->time_modify_changed != 0);
@@ -1912,7 +1916,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, args->lseg.offset);
p = xdr_encode_hyper(p, args->lseg.length);
- p = xdr_encode_opaque_fixed(p, &args->stateid.data,
+ p = xdr_encode_opaque_fixed(p, &args->stateid.u.data,
NFS4_STATEID_SIZE);

dprintk("%s: call %pF\n", __func__,
@@ -3935,7 +3939,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)

static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
- return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
+ return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
}

static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -5183,7 +5187,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
if (unlikely(!p))
goto out_overflow;
res->return_on_close = be32_to_cpup(p++);
- p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
layout_count = be32_to_cpup(p);
if (!layout_count) {
dprintk("%s: server responded with empty layout array\n",
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4e35ad9..8202523 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -489,7 +489,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
const nfs4_stateid *stateid)
{
write_seqlock(&lo->seqlock);
- memcpy(lo->stateid.data, stateid->data, sizeof(lo->stateid.data));
+ memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
write_sequnlock(&lo->seqlock);
}

@@ -502,7 +502,8 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo)

do {
seq = read_seqbegin(&lo->seqlock);
- memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
+ memcpy(dst->u.data, lo->stateid.u.data,
+ sizeof(lo->stateid.u.data));
} while (read_seqretry(&lo->seqlock, seq));

dprintk("<-- %s\n", __func__);
@@ -517,8 +518,8 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)

do {
seq = read_seqbegin(&state->seqlock);
- memcpy(dst->data, state->stateid.data,
- sizeof(state->stateid.data));
+ memcpy(dst->u.data, state->stateid.u.data,
+ sizeof(state->stateid.u.data));
} while (read_seqretry(&state->seqlock, seq));

dprintk("<-- %s\n", __func__);
@@ -564,7 +565,7 @@ get_layout(struct inode *ino,
lgp->args.inode = ino;
lgp->lsegpp = lsegpp;

- if (!memcmp(lo->stateid.data, &zero_stateid, NFS4_STATEID_SIZE)) {
+ if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
struct nfs_open_context *oldctx = ctx;

if (!oldctx) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78e7e..d80356a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@

#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
-#define NFS4_STATEID_SIZE 16

/* Index of predefined Linux callback client operations */

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 219e6b4..46bd627 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -17,7 +17,9 @@

#define NFS4_BITMAP_SIZE 2
#define NFS4_VERIFIER_SIZE 8
-#define NFS4_STATEID_SIZE 16
+#define NFS4_STATEID_SEQID_SIZE 4
+#define NFS4_STATEID_OTHER_SIZE 12
+#define NFS4_STATEID_SIZE (NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
#define NFS4_FHSIZE 128
#define NFS4_MAXPATHLEN PATH_MAX
#define NFS4_MAXNAMLEN NAME_MAX
@@ -174,7 +176,17 @@ struct nfs4_acl {
};

typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
+
+struct nfs41_stateid {
+ __be32 seqid;
+ char other[NFS4_STATEID_OTHER_SIZE];
+} __attribute__ ((packed));
+typedef struct {
+ union {
+ char data[NFS4_STATEID_SIZE];
+ struct nfs41_stateid stateid;
+ } u;
+} nfs4_stateid;

enum nfs_opnum4 {
OP_ACCESS = 3,
--
1.6.2.5


2010-06-08 22:14:50

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 4/7] pnfs-submit: request whole-file layouts only

In the first iteration of the pNFS code, we support only whole file layouts.
To facilitate the move to multiple-segments, we keep the segment processing
code, but the segment list should always contain at most one segment per I/O type

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/callback_proc.c | 7 ++++---
fs/nfs/pnfs.c | 25 ++++++++-----------------
2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 8752239..16b4510 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -213,6 +213,10 @@ static int pnfs_recall_layout(void *data)
then return layouts, resume after layoutreturns complete
*/

+ /* support whole file layouts only */
+ rl.cbl_seg.offset = 0;
+ rl.cbl_seg.length = NFS4_MAX_UINT64;
+
if (rl.cbl_recall_type == RETURN_FILE) {
status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
RETURN_FILE, true);
@@ -221,9 +225,6 @@ static int pnfs_recall_layout(void *data)
goto out;
}

- rl.cbl_seg.offset = 0;
- rl.cbl_seg.length = NFS4_MAX_UINT64;
-
/* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
/* XXX need to check status on pnfs_return_layout */
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8202523..04a36db 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -530,12 +530,6 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
* for now, assume that whole file layouts are requested.
* arg->offset: 0
* arg->length: all ones
-*
-* for now, assume the LAYOUTGET operation is triggered by an I/O request.
-* the count field is the count in the I/O request, and will be used
-* as the minlength. for the file operation that piggy-backs
-* the LAYOUTGET operation with an OPEN, s
-* arg->minlength = count.
*/
static int
get_layout(struct inode *ino,
@@ -556,11 +550,11 @@ get_layout(struct inode *ino,
return -ENOMEM;
}
lgp->lo = lo;
- lgp->args.minlength = PAGE_CACHE_SIZE;
+ lgp->args.minlength = NFS4_MAX_UINT64;
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
lgp->args.lseg.iomode = range->iomode;
- lgp->args.lseg.offset = range->offset;
- lgp->args.lseg.length = max(range->length, lgp->args.minlength);
+ lgp->args.lseg.offset = 0;
+ lgp->args.lseg.length = NFS4_MAX_UINT64;
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->lsegpp = lsegpp;
@@ -734,7 +728,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
else {
arg.iomode = IOMODE_ANY;
arg.offset = 0;
- arg.length = ~0;
+ arg.length = NFS4_MAX_UINT64;
}
if (type == RETURN_FILE) {
lo = get_lock_current_layout(nfsi);
@@ -1029,8 +1023,8 @@ pnfs_update_layout(struct inode *ino,
{
struct nfs4_pnfs_layout_segment arg = {
.iomode = iomode,
- .offset = pos,
- .length = count
+ .offset = 0,
+ .length = ~0
};
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_type *lo;
@@ -1120,7 +1114,6 @@ out_put:
void
pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
{
- struct nfs4_pnfs_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg = NULL;
struct nfs_inode *nfsi = PNFS_NFS_INODE(lgp->lo);
time_t suspend = 0;
@@ -1129,11 +1122,10 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)

lgp->status = rpc_status;
if (likely(!rpc_status)) {
- if (unlikely(res->layout.len <= 0)) {
+ if (unlikely(lgp->res.layout.len < 0)) {
printk(KERN_ERR
- "%s: ERROR! Layout size is ZERO!\n", __func__);
+ "%s: ERROR Returned layout size is ZERO\n", __func__);
lgp->status = -EIO;
- goto get_out;
}
goto out;
}
@@ -1211,7 +1203,6 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
break;
}

-get_out:
/* remember that get layout failed and suspend trying */
nfsi->layout.pnfs_layout_suspend = suspend;
set_bit(lo_fail_bit(lgp->args.lseg.iomode),
--
1.6.2.5


2010-06-08 22:14:49

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 1/7] pnfs-submit: clean struct nfs_inode

by moving layout specific fields from nfs_inode to struct pnfs_layout_type

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/inode.c | 8 +++---
fs/nfs/pnfs.c | 55 ++++++++++++++++++++++++--------------------
include/linux/nfs4_pnfs.h | 2 +-
include/linux/nfs_fs.h | 22 +++++++++---------
4 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b33d1a1..d43f2c5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1366,12 +1366,12 @@ void nfs4_clear_inode(struct inode *inode)
static void pnfs_alloc_init_inode(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
- nfsi->pnfs_layout_state = 0;
+ nfsi->layout.pnfs_layout_state = 0;
memset(&nfsi->layout.stateid, 0, NFS4_STATEID_SIZE);
nfsi->layout.roc_iomode = 0;
- nfsi->lo_cred = NULL;
- nfsi->pnfs_write_begin_pos = 0;
- nfsi->pnfs_write_end_pos = 0;
+ nfsi->layout.lo_cred = NULL;
+ nfsi->layout.pnfs_write_begin_pos = 0;
+ nfsi->layout.pnfs_write_end_pos = 0;
#endif /* CONFIG_NFS_V4_1 */
}

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8cc4412..8620f68 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -154,7 +154,7 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
spin_lock(&nfsi->lo_lock);
if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
- nfsi->lo_cred = get_rpccred(ctx->state->owner->so_cred);
+ nfsi->layout.lo_cred = get_rpccred(ctx->state->owner->so_cred);
nfsi->change_attr++;
spin_unlock(&nfsi->lo_lock);
dprintk("%s: Set layoutcommit\n", __func__);
@@ -174,17 +174,17 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
loff_t end_pos;

spin_lock(&nfsi->lo_lock);
- if (offset < nfsi->pnfs_write_begin_pos)
- nfsi->pnfs_write_begin_pos = offset;
+ if (offset < nfsi->layout.pnfs_write_begin_pos)
+ nfsi->layout.pnfs_write_begin_pos = offset;
end_pos = offset + extent - 1; /* I'm being inclusive */
- if (end_pos > nfsi->pnfs_write_end_pos)
- nfsi->pnfs_write_end_pos = end_pos;
+ if (end_pos > nfsi->layout.pnfs_write_end_pos)
+ nfsi->layout.pnfs_write_end_pos = end_pos;
dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
__func__,
(unsigned long) extent,
(unsigned long) offset ,
- (unsigned long) nfsi->pnfs_write_begin_pos,
- (unsigned long) nfsi->pnfs_write_end_pos);
+ (unsigned long) nfsi->layout.pnfs_write_begin_pos,
+ (unsigned long) nfsi->layout.pnfs_write_end_pos);
spin_unlock(&nfsi->lo_lock);
}

@@ -915,7 +915,8 @@ get_lock_alloc_layout(struct inode *ino)
* wait until bit is cleared if we lost this race.
*/
res = wait_on_bit_lock(
- &nfsi->pnfs_layout_state, NFS_INO_LAYOUT_ALLOC,
+ &nfsi->layout.pnfs_layout_state,
+ NFS_INO_LAYOUT_ALLOC,
pnfs_wait_schedule, TASK_KILLABLE);
if (res) {
lo = ERR_PTR(res);
@@ -943,8 +944,10 @@ get_lock_alloc_layout(struct inode *ino)
lo = ERR_PTR(-ENOMEM);

/* release the NFS_INO_LAYOUT_ALLOC bit and wake up waiters */
- clear_bit_unlock(NFS_INO_LAYOUT_ALLOC, &nfsi->pnfs_layout_state);
- wake_up_bit(&nfsi->pnfs_layout_state, NFS_INO_LAYOUT_ALLOC);
+ clear_bit_unlock(NFS_INO_LAYOUT_ALLOC,
+ &nfsi->layout.pnfs_layout_state);
+ wake_up_bit(&nfsi->layout.pnfs_layout_state,
+ NFS_INO_LAYOUT_ALLOC);
break;
}

@@ -1104,13 +1107,13 @@ pnfs_update_layout(struct inode *ino,
}

/* if get layout already failed once goto out */
- if (test_bit(lo_fail_bit(iomode), &nfsi->pnfs_layout_state)) {
- if (unlikely(nfsi->pnfs_layout_suspend &&
- get_seconds() >= nfsi->pnfs_layout_suspend)) {
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout.pnfs_layout_state)) {
+ if (unlikely(nfsi->layout.pnfs_layout_suspend &&
+ get_seconds() >= nfsi->layout.pnfs_layout_suspend)) {
dprintk("%s: layout_get resumed\n", __func__);
clear_bit(lo_fail_bit(iomode),
- &nfsi->pnfs_layout_state);
- nfsi->pnfs_layout_suspend = 0;
+ &nfsi->layout.pnfs_layout_state);
+ nfsi->layout.pnfs_layout_suspend = 0;
} else {
result = 1;
goto out_put;
@@ -1126,7 +1129,8 @@ pnfs_update_layout(struct inode *ino,
result = get_layout(ino, ctx, &arg, lsegpp, lo);
out:
dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
- __func__, result, nfsi->pnfs_layout_state, lseg);
+ __func__, result, nfsi->layout.pnfs_layout_state,
+ lseg);
return result;
out_put:
if (lsegpp)
@@ -1231,13 +1235,14 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)

get_out:
/* remember that get layout failed and suspend trying */
- nfsi->pnfs_layout_suspend = suspend;
- set_bit(lo_fail_bit(lgp->args.lseg.iomode), &nfsi->pnfs_layout_state);
+ nfsi->layout.pnfs_layout_suspend = suspend;
+ set_bit(lo_fail_bit(lgp->args.lseg.iomode),
+ &nfsi->layout.pnfs_layout_state);
dprintk("%s: layout_get suspended until %ld\n",
__func__, suspend);
out:
dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
- __func__, lgp->status, nfsi->pnfs_layout_state, lseg);
+ __func__, lgp->status, nfsi->layout.pnfs_layout_state, lseg);
return;
}

@@ -2009,12 +2014,12 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
/* Clear layoutcommit properties in the inode so
* new lc info can be generated
*/
- write_begin_pos = nfsi->pnfs_write_begin_pos;
- write_end_pos = nfsi->pnfs_write_end_pos;
- data->cred = nfsi->lo_cred;
- nfsi->pnfs_write_begin_pos = 0;
- nfsi->pnfs_write_end_pos = 0;
- nfsi->lo_cred = NULL;
+ write_begin_pos = nfsi->layout.pnfs_write_begin_pos;
+ write_end_pos = nfsi->layout.pnfs_write_end_pos;
+ data->cred = nfsi->layout.lo_cred;
+ nfsi->layout.pnfs_write_begin_pos = 0;
+ nfsi->layout.pnfs_write_end_pos = 0;
+ nfsi->layout.lo_cred = NULL;
pnfs_get_layout_stateid(&data->args.stateid, &nfsi->layout);

spin_unlock(&nfsi->lo_lock);
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 84d2e95..53626d4 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -83,7 +83,7 @@ has_layout(struct nfs_inode *nfsi)
static inline bool
layoutcommit_needed(struct nfs_inode *nfsi)
{
- return nfsi->lo_cred != NULL;
+ return nfsi->layout.lo_cred != NULL;
}

#endif /* CONFIG_NFS_V4_1 */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2762b2c..45846c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -106,6 +106,17 @@ struct pnfs_layout_type {
seqlock_t seqlock; /* Protects the stateid */
nfs4_stateid stateid;
void *ld_data; /* layout driver private data */
+ unsigned long pnfs_layout_state;
+ #define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
+ #define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
+ #define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
+ time_t pnfs_layout_suspend;
+ struct rpc_cred *lo_cred; /* layoutcommit credential */
+ /* DH: These vars keep track of the maximum write range
+ * so the values can be used for layoutcommit.
+ */
+ loff_t pnfs_write_begin_pos;
+ loff_t pnfs_write_end_pos;
};

/*
@@ -198,20 +209,9 @@ struct nfs_inode {
/* Inodes having layouts */
struct list_head lo_inodes;

- unsigned long pnfs_layout_state;
-#define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
-#define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
-#define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
- time_t pnfs_layout_suspend;
- struct rpc_cred *lo_cred; /* layoutcommit credential */
wait_queue_head_t lo_waitq;
spinlock_t lo_lock;
struct pnfs_layout_type layout;
- /* DH: These vars keep track of the maximum write range
- * so the values can be used for layoutcommit.
- */
- loff_t pnfs_write_begin_pos;
- loff_t pnfs_write_end_pos;
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
--
1.6.2.5


2010-06-08 22:14:51

by Alexandros Batsakis

[permalink] [raw]
Subject: [PATCH 7/7] pnfs-submit: support for CB_RECALL_ANY (layouts)

CB_RECALL_ANY serves as a hint to the client to return some server state.
We reply immediately and we clean the layouts asycnhronously.

FIXME: currently we return _all_ layouts
FIXME: eventually we should treat layouts as delegations, marked them expired
and fire the state manager to clean them.

Signed-off-by: Alexandros Batsakis <[email protected]>
---
fs/nfs/callback.h | 7 +++++
fs/nfs/callback_proc.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 73f21bc..b39ac86 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -115,6 +115,13 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,

#define RCA4_TYPE_MASK_RDATA_DLG 0
#define RCA4_TYPE_MASK_WDATA_DLG 1
+#define RCA4_TYPE_MASK_DIR_DLG 2
+#define RCA4_TYPE_MASK_FILE_LAYOUT 3
+#define RCA4_TYPE_MASK_BLK_LAYOUT 4
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15

struct cb_recallanyargs {
struct sockaddr *craa_addr;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index abdbf40..a598b5a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -339,6 +339,27 @@ out_put_no_client:
return status;
}

+static int pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+ struct cb_pnfs_layoutrecallargs rl;
+ struct inode *inode;
+ int status = 0;
+
+ rl.cbl_recall_type = RETURN_ALL;
+ rl.cbl_seg.iomode = IOMODE_ANY;
+ rl.cbl_seg.offset = 0;
+ rl.cbl_seg.length = NFS4_MAX_UINT64;
+
+ /* we need the inode to get the nfs_server struct */
+ inode = nfs_layoutrecall_find_inode(clp, &rl);
+ if (!inode)
+ return status;
+ status = pnfs_async_return_layout(clp, inode, &rl);
+ iput(inode);
+
+ return status;
+}
+
__be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
void *dummy)
{
@@ -606,13 +627,37 @@ out:
return status;
}

+static inline bool
+validate_bitmap_values(const unsigned long *mask)
+{
+ int i;
+
+ if (*mask == 0)
+ return true;
+ if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
+ test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
+ test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
+ test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
+ test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
+ return true;
+ for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
+ i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
+ if (test_bit(i, mask))
+ return true;
+ for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
+ i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
+ if (test_bit(i, mask))
+ return true;
+ return false;
+}
+
__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
{
struct nfs_client *clp;
__be32 status;
fmode_t flags = 0;

- status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+ status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
clp = nfs_find_client(args->craa_addr, 4);
if (clp == NULL)
goto out;
@@ -620,16 +665,25 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
dprintk("NFS: RECALL_ANY callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));

+ status = cpu_to_be32(NFS4ERR_INVAL);
+ if (!validate_bitmap_values((const unsigned long *)
+ &args->craa_type_mask))
+ return status;
+
+ status = cpu_to_be32(NFS4_OK);
if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags = FMODE_READ;
if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags |= FMODE_WRITE;
+ if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
+ &args->craa_type_mask))
+ if (pnfs_recall_all_layouts(clp) == -EAGAIN)
+ status = cpu_to_be32(NFS4ERR_DELAY);

if (flags)
nfs_expire_all_delegation_types(clp, flags);
- status = htonl(NFS4_OK);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
--
1.6.2.5