2010-06-22 15:45:14

by Suresh Jayaraman

[permalink] [raw]
Subject: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

Define inode-level data storage objects (managed by cifsInodeInfo structs).
Each inode-level object is created in a super-block level object and is itself
a data storage object in to which pages from the inode are stored.

The inode object is keyed by UniqueId. The coherency data being used is
LastWriteTime and the file size.

Signed-off-by: Suresh Jayaraman <[email protected]>
---
fs/cifs/cache.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/cifs/cifsfs.c | 7 ++++
fs/cifs/cifsglob.h | 3 +
fs/cifs/file.c | 6 +++
fs/cifs/fscache.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
fs/cifs/fscache.h | 12 +++++++
fs/cifs/inode.c | 4 ++
7 files changed, 180 insertions(+)

Index: cifs-2.6/fs/cifs/cache.c
===================================================================
--- cifs-2.6.orig/fs/cifs/cache.c
+++ cifs-2.6/fs/cifs/cache.c
@@ -138,3 +138,83 @@ const struct fscache_cookie_def cifs_fsc
.get_key = cifs_super_get_key,
};

+/*
+ * Auxiliary data attached to CIFS inode within the cache
+ */
+struct cifs_fscache_inode_auxdata {
+ struct timespec last_write_time;
+ loff_t size;
+};
+
+static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
+ void *buffer, uint16_t maxbuf)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+ uint16_t keylen;
+
+ /* use the UniqueId as the key */
+ keylen = sizeof(cifsi->uniqueid);
+ if (keylen > maxbuf)
+ keylen = 0;
+ else
+ memcpy(buffer, &cifsi->uniqueid, keylen);
+
+ return keylen;
+}
+
+static void
+cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
+{
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ *size = cifsi->vfs_inode.i_size;
+}
+
+static uint16_t
+cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
+ uint16_t maxbuf)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ const struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.size = cifsi->vfs_inode.i_size;
+ auxdata.last_write_time = cifsi->vfs_inode.i_ctime;
+
+ if (maxbuf > sizeof(auxdata))
+ maxbuf = sizeof(auxdata);
+
+ memcpy(buffer, &auxdata, maxbuf);
+
+ return maxbuf;
+}
+
+static enum
+fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen)
+{
+ struct cifs_fscache_inode_auxdata auxdata;
+ struct cifsInodeInfo *cifsi = cookie_netfs_data;
+
+ if (datalen != sizeof(auxdata))
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ memset(&auxdata, 0, sizeof(auxdata));
+ auxdata.size = cifsi->vfs_inode.i_size;
+ auxdata.last_write_time = cifsi->vfs_inode.i_ctime;
+
+ if (memcmp(data, &auxdata, datalen) != 0)
+ return FSCACHE_CHECKAUX_OBSOLETE;
+
+ return FSCACHE_CHECKAUX_OKAY;
+}
+
+const struct fscache_cookie_def cifs_fscache_inode_object_def = {
+ .name = "CIFS.uniqueid",
+ .type = FSCACHE_COOKIE_TYPE_DATAFILE,
+ .get_key = cifs_fscache_inode_get_key,
+ .get_attr = cifs_fscache_inode_get_attr,
+ .get_aux = cifs_fscache_inode_get_aux,
+ .check_aux = cifs_fscache_inode_check_aux,
+};
Index: cifs-2.6/fs/cifs/cifsfs.c
===================================================================
--- cifs-2.6.orig/fs/cifs/cifsfs.c
+++ cifs-2.6/fs/cifs/cifsfs.c
@@ -330,6 +330,12 @@ cifs_destroy_inode(struct inode *inode)
}

static void
+cifs_clear_inode(struct inode *inode)
+{
+ cifs_fscache_release_inode_cookie(inode);
+}
+
+static void
cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
{
seq_printf(s, ",addr=");
@@ -490,6 +496,7 @@ static const struct super_operations cif
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
.drop_inode = cifs_drop_inode,
+ .clear_inode = cifs_clear_inode,
/* .delete_inode = cifs_delete_inode, */ /* Do not need above
function unless later we add lazy close of inodes or unless the
kernel forgets to call us with the same number of releases (closes)
Index: cifs-2.6/fs/cifs/cifsglob.h
===================================================================
--- cifs-2.6.orig/fs/cifs/cifsglob.h
+++ cifs-2.6/fs/cifs/cifsglob.h
@@ -407,6 +407,9 @@ struct cifsInodeInfo {
bool invalid_mapping:1; /* pagecache is invalid */
u64 server_eof; /* current file size on server */
u64 uniqueid; /* server inode number */
+#ifdef CONFIG_CIFS_FSCACHE
+ struct fscache_cookie *fscache;
+#endif
struct inode vfs_inode;
};

Index: cifs-2.6/fs/cifs/file.c
===================================================================
--- cifs-2.6.orig/fs/cifs/file.c
+++ cifs-2.6/fs/cifs/file.c
@@ -40,6 +40,7 @@
#include "cifs_unicode.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"

static inline int cifs_convert_flags(unsigned int flags)
{
@@ -282,6 +283,9 @@ int cifs_open(struct inode *inode, struc
CIFSSMBClose(xid, tcon, netfid);
rc = -ENOMEM;
}
+
+ cifs_fscache_set_inode_cookie(inode, file);
+
goto out;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
@@ -373,6 +377,8 @@ int cifs_open(struct inode *inode, struc
goto out;
}

+ cifs_fscache_set_inode_cookie(inode, file);
+
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
Index: cifs-2.6/fs/cifs/fscache.c
===================================================================
--- cifs-2.6.orig/fs/cifs/fscache.c
+++ cifs-2.6/fs/cifs/fscache.c
@@ -62,3 +62,71 @@ void cifs_fscache_release_super_cookie(s
tcon->fscache = NULL;
}

+static void cifs_fscache_enable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifsi->fscache)
+ return;
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: got FH cookie (0x%p/0x%p/0x%p)\n",
+ cifs_sb->tcon, cifsi, cifsi->fscache);
+}
+
+void cifs_fscache_release_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS releasing inode cookie (0x%p/0x%p)\n",
+ cifsi, cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 0);
+ cifsi->fscache = NULL;
+ }
+}
+
+static void cifs_fscache_disable_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+ if (cifsi->fscache) {
+ cFYI(1, "CIFS disabling inode cookie (0x%p/0x%p)\n",
+ cifsi, cifsi->fscache);
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+ cifsi->fscache = NULL;
+ }
+}
+
+void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
+{
+ /* BB: parallel opens - need locking? */
+ if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+ cifs_fscache_disable_inode_cookie(inode);
+ else {
+ cifs_fscache_enable_inode_cookie(inode);
+ cFYI(1, "CIFS: fscache inode cookie set\n");
+ }
+}
+
+void cifs_fscache_reset_inode_cookie(struct inode *inode)
+{
+ struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct fscache_cookie *old = cifsi->fscache;
+
+ if (cifsi->fscache) {
+ /* retire the current fscache cache and get a new one */
+ fscache_relinquish_cookie(cifsi->fscache, 1);
+
+ cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache,
+ &cifs_fscache_inode_object_def,
+ cifsi);
+ cFYI(1, "CIFS: new cookie (0x%p/0x%p) oldcookie 0x%p\n",
+ cifsi, cifsi->fscache, old);
+ }
+}
+
Index: cifs-2.6/fs/cifs/fscache.h
===================================================================
--- cifs-2.6.orig/fs/cifs/fscache.h
+++ cifs-2.6/fs/cifs/fscache.h
@@ -29,6 +29,8 @@
extern struct fscache_netfs cifs_fscache_netfs;
extern const struct fscache_cookie_def cifs_fscache_server_index_def;
extern const struct fscache_cookie_def cifs_fscache_super_index_def;
+extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
+

extern int cifs_fscache_register(void);
extern void cifs_fscache_unregister(void);
@@ -41,6 +43,10 @@ extern void cifs_fscache_release_client_
extern void cifs_fscache_get_super_cookie(struct cifsTconInfo *);
extern void cifs_fscache_release_super_cookie(struct cifsTconInfo *);

+extern void cifs_fscache_release_inode_cookie(struct inode *);
+extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *);
+extern void cifs_fscache_reset_inode_cookie(struct inode *);
+
#else /* CONFIG_CIFS_FSCACHE */
static inline int cifs_fscache_register(void) { return 0; }
static inline void cifs_fscache_unregister(void) {}
@@ -53,6 +59,12 @@ static inline void cifs_fscache_get_supe
static inline void
cifs_fscache_release_super_cookie(struct cifsTconInfo *tcon) {}

+static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
+static inline void cifs_fscache_set_inode_cookie(struct inode *inode,
+ struct file *filp) {}
+static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {}
+
+
#endif /* CONFIG_CIFS_FSCACHE */

#endif /* _CIFS_FSCACHE_H */
Index: cifs-2.6/fs/cifs/inode.c
===================================================================
--- cifs-2.6.orig/fs/cifs/inode.c
+++ cifs-2.6/fs/cifs/inode.c
@@ -29,6 +29,7 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
+#include "fscache.h"


static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
@@ -776,6 +777,8 @@ retry_iget5_locked:
inode->i_flags |= S_NOATIME | S_NOCMTIME;
if (inode->i_state & I_NEW) {
inode->i_ino = hash;
+ /* initialize per-inode cache cookie pointer */
+ CIFS_I(inode)->fscache = NULL;
unlock_new_inode(inode);
}
}
@@ -1568,6 +1571,7 @@ cifs_invalidate_mapping(struct inode *in
cifs_i->write_behind_rc = rc;
}
invalidate_remote_inode(inode);
+ cifs_fscache_reset_inode_cookie(inode);
}

int cifs_revalidate_file(struct file *filp)


2010-06-23 17:03:03

by David Howells

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

Suresh Jayaraman <[email protected]> wrote:

> Define inode-level data storage objects (managed by cifsInodeInfo structs).
> Each inode-level object is created in a super-block level object and is
> itself a data storage object in to which pages from the inode are stored.
>
> The inode object is keyed by UniqueId. The coherency data being used is
> LastWriteTime and the file size.

Isn't there a file creation time too?

I take it you don't support caching on files that are open for writing at this
time?

David

2010-06-25 12:50:24

by Suresh Jayaraman

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

On 06/23/2010 10:32 PM, David Howells wrote:
> Suresh Jayaraman <[email protected]> wrote:
>
>> Define inode-level data storage objects (managed by cifsInodeInfo structs).
>> Each inode-level object is created in a super-block level object and is
>> itself a data storage object in to which pages from the inode are stored.
>>
>> The inode object is keyed by UniqueId. The coherency data being used is
>> LastWriteTime and the file size.
>
> Isn't there a file creation time too?

I think the creation time is currently being ignored as we won't be able
to accomodate in POSIX stat struct.

> I take it you don't support caching on files that are open for writing at this
> time?
>

Yes.


--
Suresh Jayaraman

2010-06-25 12:56:04

by David Howells

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

Suresh Jayaraman <[email protected]> wrote:

> I think the creation time is currently being ignored as we won't be able
> to accomodate in POSIX stat struct.

The FS-Cache interface doesn't use the POSIX stat struct, but it could be
really useful to save it and use it for cache coherency inside the kernel.

Out of interest, what does Samba do when it comes to generating a creation time
for UNIX where one does not exist?

David

2010-06-25 16:53:11

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

On Fri, 25 Jun 2010 13:55:49 +0100
David Howells <[email protected]> wrote:

> Suresh Jayaraman <[email protected]> wrote:
>
> > I think the creation time is currently being ignored as we won't be able
> > to accomodate in POSIX stat struct.
>
> The FS-Cache interface doesn't use the POSIX stat struct, but it could be
> really useful to save it and use it for cache coherency inside the kernel.
>
> Out of interest, what does Samba do when it comes to generating a creation time
> for UNIX where one does not exist?
>

(cc'ing samba-technical since we're talking about the create time)

Looks like it mostly uses the ctime. IMO, the mtime would be a better
choice since it changes less frequently, but I don't guess that it
matters very much.

I have a few patches that make the cifs_iget code do more stringent
checks. One of those makes it use the create time like an i_generation
field to guard against matching inodes that have the same number but
that have undergone a delete/create cycle. They need a bit more testing
but I'm planning to post them in time for 2.6.36.

Because of how samba generates this number, it could be somewhat
problematic to do this. What may save us though is that Linux<->Samba
mostly uses unix extensions unless someone has specifically disabled
them on either end. The unix extension calls don't generally send any
sort of create time field, so we can't rely on it in those codepaths
anyway.

--
Jeff Layton <[email protected]>

2010-06-25 21:46:50

by David Howells

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

Jeff Layton <[email protected]> wrote:

> Looks like it mostly uses the ctime. IMO, the mtime would be a better
> choice since it changes less frequently, but I don't guess that it
> matters very much.

I'd've thought mtime changes more frequently since that's altered when data is
written. ctime is changed when attributes are changed.

Note that Ext4 appears to have a file creation time field in its inode
(struct ext4_inode::i_crtime[_extra]). Can Samba be made to use that?

David

2010-06-25 22:26:56

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

On Fri, 25 Jun 2010 22:46:38 +0100
David Howells <[email protected]> wrote:

> Jeff Layton <[email protected]> wrote:
>
> > Looks like it mostly uses the ctime. IMO, the mtime would be a better
> > choice since it changes less frequently, but I don't guess that it
> > matters very much.
>
> I'd've thought mtime changes more frequently since that's altered when data is
> written. ctime is changed when attributes are changed.
>

IIUC, updating mtime for a write is also an attribute change, and that
affects ctime. According to the stat(2) manpage:

The field st_ctime is changed by writing or by setting inode informa-
tion (i.e., owner, group, link count, mode, etc.).

> Note that Ext4 appears to have a file creation time field in its inode
> (struct ext4_inode::i_crtime[_extra]). Can Samba be made to use that?
>

Is it exposed to userspace in any (standard) way? It would be handy to
have that. While we're wishing...it might also be nice to have a
standard way to get at the i_generation from userspace too.

--
Jeff Layton <[email protected]>

2010-06-25 23:04:42

by David Howells

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

Jeff Layton <[email protected]> wrote:

> IIUC, updating mtime for a write is also an attribute change, and that
> affects ctime. According to the stat(2) manpage:

You're right. Okay, ctime is the more frequently changed.

> > Note that Ext4 appears to have a file creation time field in its inode
> > (struct ext4_inode::i_crtime[_extra]). Can Samba be made to use that?
>
> Is it exposed to userspace in any (standard) way? It would be handy to
> have that. While we're wishing...it might also be nice to have a
> standard way to get at the i_generation from userspace too.

Not at present, but it's something that could be exported by ioctl() or
getxattr().

David

2010-06-25 23:05:33

by Steve French

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

On Fri, Jun 25, 2010 at 5:26 PM, Jeff Layton <[email protected]> wrote:
>
> On Fri, 25 Jun 2010 22:46:38 +0100
> David Howells <[email protected]> wrote:
>
> > Jeff Layton <[email protected]> wrote:
> >
> > > Looks like it mostly uses the ctime. IMO, the mtime would be a better
> > > choice since it changes less frequently, but I don't guess that it
> > > matters very much.
> >
> > I'd've thought mtime changes more frequently since that's altered when data is
> > written. ?ctime is changed when attributes are changed.
> >
>
> IIUC, updating mtime for a write is also an attribute change, and that
> affects ctime. According to the stat(2) manpage:
>
> ? ? ? The field st_ctime is changed by writing or by setting ?inode ?informa-
> ? ? ? tion (i.e., owner, group, link count, mode, etc.).
>
> > Note that Ext4 appears to have a file creation time field in its inode
> > (struct ext4_inode::i_crtime[_extra]). ?Can Samba be made to use that?
> >
>
> Is it exposed to userspace in any (standard) way? It would be handy to
> have that. While we're wishing...it might also be nice to have a
> standard way to get at the i_generation from userspace too.
>

Yes - I have talked with MingMing and Aneesh about those (NFS may
someday be able to use those too).? An obstacle in the past had been
that samba server stores its own fake creation time in an ndr encoded
xattr which complicates things.

MingMing/Annesh -
Xattr or other way to get at birth time?


--
Thanks,

Steve

2010-06-27 18:17:46

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

On Fri, 25 Jun 2010 17:52:24 -0700, Mingming Cao <[email protected]> wrote:
>
>
> Steve French <[email protected]> wrote on 06/25/2010 04:05:30 PM:
>
> > Steve French <[email protected]>
> > 06/25/2010 04:05 PM
> >
> > To
> >
> > Jeff Layton <[email protected]>, "Aneesh Kumar K.V"
> > <[email protected]>, Mingming Cao/Beaverton/IBM@IBMUS
> >
> > cc
> >
> > David Howells <[email protected]>, Suresh Jayaraman
> > <[email protected]>, [email protected], linux-
> > [email protected], [email protected], samba-
> > [email protected], Jeff Layton <[email protected]>
> >
> > Subject
> >
> > Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and
> > register them
> >
> > On Fri, Jun 25, 2010 at 5:26 PM, Jeff Layton <[email protected]> wrote:
> > >
> > > On Fri, 25 Jun 2010 22:46:38 +0100
> > > David Howells <[email protected]> wrote:
> > >
> > > > Jeff Layton <[email protected]> wrote:
> > > >
> > > > > Looks like it mostly uses the ctime. IMO, the mtime would be a
> better
> > > > > choice since it changes less frequently, but I don't guess that it
> > > > > matters very much.
> > > >
> > > > I'd've thought mtime changes more frequently since that's
> > altered when data is
> > > > written.  ctime is changed when attributes are changed.
> > > >
> > >
> > > IIUC, updating mtime for a write is also an attribute change, and that
> > > affects ctime. According to the stat(2) manpage:
> > >
> > >       The field st_ctime is changed by writing or by setting
> >  inode  informa-
> > >       tion (i.e., owner, group, link count, mode, etc.).
> > >
> > > > Note that Ext4 appears to have a file creation time field in its
> inode
> > > > (struct ext4_inode::i_crtime[_extra]).  Can Samba be made to use
> that?
> > > >
> > >
> > > Is it exposed to userspace in any (standard) way? It would be handy to
> > > have that. While we're wishing...it might also be nice to have a
> > > standard way to get at the i_generation from userspace too.
> > >
> >
> > Yes - I have talked with MingMing and Aneesh about those (NFS may
> > someday be able to use those too).  An obstacle in the past had been
> > that samba server stores its own fake creation time in an ndr encoded
> > xattr which complicates things.
> >
> > MingMing/Annesh -
> > Xattr or other way to get at birth time?
> >
> >
>
> Not yet,
> The ext4 file creation time only accesable from the kernel at the moment.
> There were discussion
> to make this information avaliable via xattr before, but was rejected,
> since most people
> agree that making this info avalibele via stat() is more standard. However
> modifying stat() would imply
> big interface change. thus no action has been taken yet.

NFS ganesha pNFS also had a requirement for getting i_generation and
inode number in userspace. So may be we should now look at updating
stat or add a variant syscall that include i_generation and create time
in the return value

-aneesh

2010-06-27 18:22:45

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC][PATCH 06/10] cifs: define inode-level cache object and register them

On Sun, Jun 27, 2010 at 11:47:21PM +0530, Aneesh Kumar K. V wrote:
> NFS ganesha pNFS also had a requirement for getting i_generation and
> inode number in userspace. So may be we should now look at updating
> stat or add a variant syscall that include i_generation and create time
> in the return value

What's missing in knfsd that you feel the sudden urge to move backwards
to a userspace nfsd (one with a horribly crappy codebase, too).