Make the file creation time, inode data version number and inode generation
number available on Ext4 by as xattrs named:
file.crtime
file.i_generation
file.i_version (directories only for ext4)
This could then be used by Samba as the SMB protocol passes the file creation
time to the client.
With this patch, you can see the xattrs providing binary data:
[root@andromeda ~]# getfattr -d /var/cache/fscache -e hex -m\.*
getfattr: Removing leading '/' from absolute path names
# file: var/cache/fscache
file.crtime=0x53ba244c000000000000000000000000
file.i_generation=0x0000000000000000
file.i_version=0x0400000000000000
security.selinux=0x73797374656d5f753a6f626a6563745f723a636163686566696c65735f7661725f743a733000
[root@andromeda ~]# getfattr -d /var/cache/fscache/cull_atimes -e hex -m\.*
getfattr: Removing leading '/' from absolute path names
# file: var/cache/fscache/cull_atimes
file.crtime=0x83ba244c0000000019a3632a00000000
file.i_generation=0x73ab85f500000000
security.selinux=0x73797374656d5f753a6f626a6563745f723a636163686566696c65735f7661725f743a733000
user.CacheFiles.atime_base=0x30303030303030303463323464363239
Signed-off-by: David Howells <[email protected]>
---
fs/ext4/Makefile | 4 +-
fs/ext4/xattr.c | 39 ++++++++++++------
fs/ext4/xattr.h | 2 +
fs/ext4/xattr_file.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/xattr.h | 3 +
5 files changed, 142 insertions(+), 14 deletions(-)
create mode 100644 fs/ext4/xattr_file.c
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a..034dd27 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,6 +8,8 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
-ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ext4-$(CONFIG_EXT4_FS_XATTR) += \
+ xattr.o xattr_user.o xattr_trusted.o xattr_file.o
+
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 0433800..1360f7c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -111,6 +111,7 @@ static const struct xattr_handler *ext4_xattr_handler_map[] = {
const struct xattr_handler *ext4_xattr_handlers[] = {
&ext4_xattr_user_handler,
+ &ext4_xattr_file_handler,
&ext4_xattr_trusted_handler,
#ifdef CONFIG_EXT4_FS_POSIX_ACL
&ext4_xattr_acl_access_handler,
@@ -427,23 +428,35 @@ cleanup:
static int
ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
- int i_error, b_error;
+ int ret, result = 0;
down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
- if (i_error < 0) {
- b_error = 0;
- } else {
- if (buffer) {
- buffer += i_error;
- buffer_size -= i_error;
- }
- b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
- if (b_error < 0)
- i_error = 0;
+ ret = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto error;
+ result += ret;
+ if (buffer) {
+ buffer += ret;
+ buffer_size -= ret;
+ }
+
+ ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto error;
+ result += ret;
+ if (buffer) {
+ buffer += ret;
+ buffer_size -= ret;
}
+
+ ret = ext4_xattr_file_list(dentry, buffer, buffer_size);
+ if (ret < 0)
+ goto error;
+ result += ret;
+
+error:
up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
- return i_error + b_error;
+ return ret < 0 ? ret : result;
}
/*
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e..f0e3aaf 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -66,12 +66,14 @@ struct ext4_xattr_entry {
# ifdef CONFIG_EXT4_FS_XATTR
extern const struct xattr_handler ext4_xattr_user_handler;
+extern const struct xattr_handler ext4_xattr_file_handler;
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_acl_access_handler;
extern const struct xattr_handler ext4_xattr_acl_default_handler;
extern const struct xattr_handler ext4_xattr_security_handler;
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
+extern int ext4_xattr_file_list(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
diff --git a/fs/ext4/xattr_file.c b/fs/ext4/xattr_file.c
new file mode 100644
index 0000000..81044c5
--- /dev/null
+++ b/fs/ext4/xattr_file.c
@@ -0,0 +1,108 @@
+/* File-specific xattrs
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells ([email protected])
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "xattr.h"
+
+static const char *ext4_file_xattrs[] = {
+ "crtime",
+ "i_generation"
+};
+
+static const char *ext4_dir_xattrs[] = {
+ "i_version",
+};
+
+int
+ext4_xattr_file_list(struct dentry *dentry, char *list, size_t list_size)
+{
+ struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
+ const size_t prefix_len = XATTR_FILE_PREFIX_LEN;
+ int total_len = 0;
+ int loop;
+
+ for (loop = 0; loop < ARRAY_SIZE(ext4_file_xattrs); loop++) {
+ const char *fxname = ext4_file_xattrs[loop];
+ int fxnlen = strlen(fxname);
+
+ total_len += prefix_len + fxnlen + 1;
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_FILE_PREFIX, prefix_len);
+ list += prefix_len;
+ memcpy(list, fxname, fxnlen + 1);
+ list += fxnlen + 1;
+ }
+ }
+
+ if (!S_ISDIR(ei->vfs_inode.i_mode))
+ goto out;
+
+ /* Ext4 only supports i_version on directories */
+ for (loop = 0; loop < ARRAY_SIZE(ext4_dir_xattrs); loop++) {
+ const char *fxname = ext4_dir_xattrs[loop];
+ int fxnlen = strlen(fxname);
+
+ total_len += prefix_len + fxnlen + 1;
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_FILE_PREFIX, prefix_len);
+ list += prefix_len;
+ memcpy(list, fxname, fxnlen + 1);
+ list += fxnlen + 1;
+ }
+ }
+
+out:
+ return total_len;
+}
+
+static int
+ext4_xattr_file_get(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
+{
+ struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
+ size_t result_size;
+ union {
+ struct timespec ts;
+ u64 val;
+ } result;
+
+ if (strcmp(name, "crtime") == 0) {
+ result_size = sizeof(struct timespec);
+ result.ts = ei->i_crtime;
+ } else if (strcmp(name, "i_version") == 0) {
+ if (!S_ISDIR(ei->vfs_inode.i_mode))
+ return -ENOTDIR;
+ result_size = sizeof(u64);
+ result.val = ei->vfs_inode.i_version;
+ } else if (strcmp(name, "i_generation") == 0) {
+ result_size = sizeof(u64);
+ result.val = ei->vfs_inode.i_generation;
+ } else {
+ return -EINVAL;
+ }
+
+ if (size == 0)
+ return result_size;
+ if (size < result_size)
+ return -E2BIG;
+ memcpy(buffer, &result, result_size);
+ return result_size;
+}
+
+const struct xattr_handler ext4_xattr_file_handler = {
+ .prefix = XATTR_FILE_PREFIX,
+ .get = ext4_xattr_file_get,
+};
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 0cfa1e9..e52a8ce 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -33,6 +33,9 @@
#define XATTR_USER_PREFIX "user."
#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
+#define XATTR_FILE_PREFIX "file."
+#define XATTR_FILE_PREFIX_LEN (sizeof (XATTR_FILE_PREFIX) - 1)
+
struct inode;
struct dentry;
On Mon, Jun 28, 2010 at 11:26 AM, David Howells <[email protected]> wrote:
> Make the file creation time, inode data version number and inode generation
> number available on Ext4 by as xattrs named:
>
> ? ? ? ?file.crtime
> ? ? ? ?file.i_generation
> ? ? ? ?file.i_version (directories only for ext4)
>
> This could then be used by Samba as the SMB protocol passes the file creation
> time to the client.
>
> With this patch, you can see the xattrs providing binary data:
>
> [root@andromeda ~]# getfattr -d /var/cache/fscache -e hex -m\.*
> getfattr: Removing leading '/' from absolute path names
> # file: var/cache/fscache
> file.crtime=0x53ba244c000000000000000000000000
> file.i_generation=0x0000000000000000
> file.i_version=0x0400000000000000
It would be easy enough to do something similar for crtime for cifs
(it may also be possible to do something similar to
i_generation and i_version at least for smb2 but haven't
experimented to see which servers could return something
similar to version and generation). I did have a request for
someone doing a backup application over cifs to return creation
time so at least this would make sense.
--
Thanks,
Steve
On Mon, Jun 28, 2010 at 11:33:10AM -0500, Steve French wrote:
>
> It would be easy enough to do something similar for crtime for cifs
> (it may also be possible to do something similar to
> i_generation and i_version at least for smb2 but haven't
> experimented to see which servers could return something
> similar to version and generation). I did have a request for
> someone doing a backup application over cifs to return creation
> time so at least this would make sense.
We already have code in Samba to detect "birthtime"
(st_btime) as a returned member of a stat struct.
This already works on many other platforms, so I'd
rather just have Linux stat fixed to work with
btime.
Jeremy.
On Mon, Jun 28, 2010 at 11:48 AM, Jeremy Allison <[email protected]> wrote:
> On Mon, Jun 28, 2010 at 11:33:10AM -0500, Steve French wrote:
>>
>> It would be easy enough to do something similar for crtime for cifs
>> (it may also be possible to do something similar to
>> i_generation and i_version at least for smb2 but haven't
>> experimented to see which servers could return something
>> similar to version and generation). ?I did have a request for
>> someone doing a backup application over cifs to return creation
>> time so at least this would make sense.
>
> We already have code in Samba to detect "birthtime"
> (st_btime) as a returned member of a stat struct.
>
> This already works on many other platforms, so I'd
> rather just have Linux stat fixed to work with
> btime.
That sounds better but it hits the fs, vfs and libc
and seems like libc is near impossible to get changes into.
Seems like in the short term adding xattrs is harmless, and
doesn't hurt in the longrun
--
Thanks,
Steve
On Mon, Jun 28, 2010 at 12:04:09PM -0500, Steve French wrote:
> On Mon, Jun 28, 2010 at 11:48 AM, Jeremy Allison <[email protected]> wrote:
> > On Mon, Jun 28, 2010 at 11:33:10AM -0500, Steve French wrote:
> >>
> >> It would be easy enough to do something similar for crtime for cifs
> >> (it may also be possible to do something similar to
> >> i_generation and i_version at least for smb2 but haven't
> >> experimented to see which servers could return something
> >> similar to version and generation). ?I did have a request for
> >> someone doing a backup application over cifs to return creation
> >> time so at least this would make sense.
> >
> > We already have code in Samba to detect "birthtime"
> > (st_btime) as a returned member of a stat struct.
> >
> > This already works on many other platforms, so I'd
> > rather just have Linux stat fixed to work with
> > btime.
>
> That sounds better but it hits the fs, vfs and libc
> and seems like libc is near impossible to get changes into.
It's a software project, it gets new features. This
is a new feature that is necessary to support new
file system features.
> Seems like in the short term adding xattrs is harmless, and
> doesn't hurt in the longrun
Yes it does. It's trying to do an end-run around
adding this into the entire stack where it'll be
useful for all applications. Samba releases live
forever. Do you want to be still maintaining the
"btime as xattr" stuff in 20 years ?
Fix it once, fix it right.
Jeremy.
On 2010-06-28, at 10:26, David Howells wrote:
> Make the file creation time, inode data version number and inode generation
> number available on Ext4 by as xattrs named:
>
> file.crtime
> file.i_generation
> file.i_version (directories only for ext4)
Some minor nits:
- I'd prefer calling these "file.generation" and "file.version".
I don't think there is value in the "i_" prefix adds anything,
and it seems more like an internal detail to me
- why not expose the ".version" field for regular files? It seems
that all of them are applicable for all file types.
- it would be good to not introduce a new xattr namespace, since
tools like tar (even the RHEL-patched one) will not backup and
restore these namespaces. Using "trusted." would allow them to
be backed up and restored using existing xattr-patched GNU tar
by root, but wouldn't allow them to be modified by regular users.
I think this is important for proper backup/restore of a filesystem,
but can have correctness implications and shouldn't be accessible
to regular users.
> file.crtime=0x53ba244c000000000000000000000000
Is this a binary (host-endian) struct timespec?
> file.i_generation=0x0000000000000000
This seems odd, i_generation should never be zero, AFAIK.
Cheers, Andreas
On Mon, Jun 28, 2010 at 12:04:09PM -0500, Steve French wrote:
> That sounds better but it hits the fs, vfs and libc
> and seems like libc is near impossible to get changes into.
If you do it properly it's not hard to get changes into libc.
Let's do it properly instead of adding hacks like that. Synthetic
xattrs causes much more problems than they solve.
Andreas Dilger <[email protected]> wrote:
> - I'd prefer calling these "file.generation" and "file.version".
> I don't think there is value in the "i_" prefix adds anything,
> and it seems more like an internal detail to me
That's reasonable.
> - why not expose the ".version" field for regular files? It seems
> that all of them are applicable for all file types.
Because Ext4 doesn't support it for anything other than directories.
> - it would be good to not introduce a new xattr namespace, since
> tools like tar (even the RHEL-patched one) will not backup and
> restore these namespaces. Using "trusted." would allow them to
> be backed up and restored using existing xattr-patched GNU tar
> by root, but wouldn't allow them to be modified by regular users.
> I think this is important for proper backup/restore of a filesystem,
> but can have correctness implications and shouldn't be accessible
> to regular users.
Does backing them up make sense, though? They are filesystem structural
attributes. Can you restore the inode number, for example? If not, then you
can't restore i_generation either. Restoring i_version might make sense, but
what if it winds i_version backwards whilst maintaining i_ino and i_generation,
that means there'll be a time in the future where the three values are once
again what might have been already published - and may already be in someone's
persistent cache.
> > file.crtime=0x53ba244c000000000000000000000000
>
> Is this a binary (host-endian) struct timespec?
Yes. That might not be the best representation, however. It could also be,
say "<decimal-secs>.<decimal-nsecs>", eg: "1234.000000567".
> > file.i_generation=0x0000000000000000
>
> This seems odd, i_generation should never be zero, AFAIK.
That might be because it's the root directory, and so cannot be replaced.
David
On Mon, Jun 28, 2010 at 2:38 PM, David Howells <[email protected]> wrote:
> Andreas Dilger <[email protected]> wrote:
>
>> - I'd prefer calling these "file.generation" and "file.version".
>> ? I don't think there is value in the "i_" prefix adds anything,
>> ? and it seems more like an internal detail to me
>
> That's reasonable.
>
>> - why not expose the ".version" field for regular files? ?It seems
>> ? that all of them are applicable for all file types.
>
> Because Ext4 doesn't support it for anything other than directories.
>
>> - it would be good to not introduce a new xattr namespace, since
>> ? tools like tar (even the RHEL-patched one) will not backup and
>> ? restore these namespaces. ?Using "trusted." would allow them to
>> ? be backed up and restored using existing xattr-patched GNU tar
>> ? by root, but wouldn't allow them to be modified by regular users.
>> ? I think this is important for proper backup/restore of a filesystem,
>> ? but can have correctness implications and shouldn't be accessible
>> ? to regular users.
>
> Does backing them up make sense, though? ?They are filesystem structural
> attributes. ?Can you restore the inode number, for example? ?If not, then you
> can't restore i_generation either. ?Restoring i_version might make sense, but
> what if it winds i_version backwards whilst maintaining i_ino and i_generation,
> that means there'll be a time in the future where the three values are once
> again what might have been already published - and may already be in someone's
> persistent cache.
I think backing them up makes sense, even if they can't easily
be restored (ie just for reporting).
Are there security differences between the "trusted" namespace that
would make it harder for an app to read them (the man page did not list
the security differences between trusted and user xattrs).
--
Thanks,
Steve
Jeremy Allison <[email protected]> wrote:
> We already have code in Samba to detect "birthtime"
> (st_btime) as a returned member of a stat struct.
Is it, though?
Googling for st_btime suggests it could also be taken as the time last
archived. That may just be a NetWareism though.
David
On Tue, Jun 29, 2010 at 11:44:37PM +0100, David Howells wrote:
> Jeremy Allison <[email protected]> wrote:
>
> > We already have code in Samba to detect "birthtime"
> > (st_btime) as a returned member of a stat struct.
>
> Is it, though?
>
> Googling for st_btime suggests it could also be taken as the time last
> archived. That may just be a NetWareism though.
It's a *BSD'ism.
http://www.daemon-systems.org/man/fstat.2.html
#if defined(_NETBSD_SOURCE)
struct timespec st_birthtimespec; /* time of inode creation */
#else
time_t st_birthtime; /* time of inode creation */
long st_birthtimensec; /* nsec of inode creation */
#endif
http://www.unix.com/man-page/FreeBSD/2/stat/
st_birthtime Time when the inode was created.
Of course, for Samba's use we also have to be
able to *write* to st_birthtime as Windows clients
can change this. But that's what the EA is for
(and I'm happy with a system that can only read
st_birthtime, not write it).
Jeremy.
Jeremy Allison <[email protected]> wrote:
> > Googling for st_btime suggests it could also be taken as the time last
> > archived. That may just be a NetWareism though.
>
> It's a *BSD'ism.
I meant that st_btime meaning "last archive time" may be a NetWareism. That
seems to get more hits than anything else.
David