2010-06-15 18:39:44

by Valerie Aurora

[permalink] [raw]
Subject: [PATCH 14/38] fallthru: ext2 fallthru support

Add support for fallthru directory entries to ext2.

XXX What to do for d_ino for fallthrus? If we return the inode from
the the underlying file system, it comes from a different inode
"namespace" and that will produce spurious matches. This argues for
implementation of fallthrus as symlinks because they have to allocate
an inode (and inode number) anyway, and we can later reuse it if we
copy the file up.

Cc: Theodore Tso <[email protected]>
Cc: [email protected]
Signed-off-by: Valerie Aurora <[email protected]>
Signed-off-by: Jan Blunck <[email protected]>
---
fs/ext2/dir.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--
fs/ext2/ext2.h | 1 +
fs/ext2/namei.c | 22 +++++++++++
include/linux/ext2_fs.h | 1 +
4 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 030bd46..f3b4aff 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -219,7 +219,8 @@ static inline int ext2_match (int len, const char * const name,
{
if (len != de->name_len)
return 0;
- if (!de->inode && (de->file_type != EXT2_FT_WHT))
+ if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
+ (de->file_type != EXT2_FT_FALLTHRU)))
return 0;
return !memcmp(name, de->name, len);
}
@@ -256,6 +257,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
[EXT2_FT_SOCK] = DT_SOCK,
[EXT2_FT_SYMLINK] = DT_LNK,
[EXT2_FT_WHT] = DT_WHT,
+ [EXT2_FT_FALLTHRU] = DT_UNKNOWN,
};

#define S_SHIFT 12
@@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
ext2_put_page(page);
return 0;
}
+ } else if (de->file_type == EXT2_FT_FALLTHRU) {
+ int over;
+ unsigned char d_type = DT_UNKNOWN;
+
+ offset = (char *)de - kaddr;
+ /* XXX We don't know the inode number
+ * of the directory entry in the
+ * underlying file system. Should
+ * look it up, either on fallthru
+ * creation at first readdir or now at
+ * filldir time. */
+ over = filldir(dirent, de->name, de->name_len,
+ (n<<PAGE_CACHE_SHIFT) | offset,
+ 123 /* Made up ino */, d_type);
+ if (over) {
+ ext2_put_page(page);
+ return 0;
+ }
}
filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
}
@@ -463,6 +483,10 @@ ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_WHITEOUT;
spin_unlock(&dentry->d_lock);
+ } else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_FALLTHRU;
+ spin_unlock(&dentry->d_lock);
}
ext2_put_page(page);
}
@@ -532,6 +556,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
de->name_len = 0;
de->rec_len = ext2_rec_len_to_disk(chunk_size);
de->inode = 0;
+ de->file_type = 0;
goto got_it;
}
if (de->rec_len == 0) {
@@ -545,6 +570,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
name_len = EXT2_DIR_REC_LEN(de->name_len);
rec_len = ext2_rec_len_from_disk(de->rec_len);
if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
+ (de->file_type != EXT2_FT_FALLTHRU) &&
(rec_len >= reclen))
goto got_it;
if (rec_len >= name_len + reclen)
@@ -587,7 +613,8 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)

err = -EEXIST;
if (ext2_match (namelen, name, de)) {
- if (de->file_type == EXT2_FT_WHT)
+ if ((de->file_type == EXT2_FT_WHT) ||
+ (de->file_type == EXT2_FT_FALLTHRU))
goto got_it;
goto out_unlock;
}
@@ -602,7 +629,8 @@ got_it:
&page, NULL);
if (err)
goto out_unlock;
- if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
+ if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+ (de->file_type == EXT2_FT_FALLTHRU)) &&
!ext2_match (namelen, name, de))) {
ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
@@ -627,6 +655,60 @@ out_unlock:
}

/*
+ * Create a fallthru entry.
+ */
+int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
+{
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ unsigned short rec_len, name_len;
+ ext2_dirent * de;
+ struct page *page;
+ loff_t pos;
+ int err;
+
+ de = ext2_append_entry(dentry, &page);
+ if (IS_ERR(de))
+ return PTR_ERR(de);
+
+ err = -EEXIST;
+ if (ext2_match (namelen, name, de))
+ goto out_unlock;
+
+ name_len = EXT2_DIR_REC_LEN(de->name_len);
+ rec_len = ext2_rec_len_from_disk(de->rec_len);
+
+ pos = page_offset(page) +
+ (char*)de - (char*)page_address(page);
+ err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
+ &page, NULL);
+ if (err)
+ goto out_unlock;
+ if (de->inode || (de->file_type == EXT2_FT_WHT) ||
+ (de->file_type == EXT2_FT_FALLTHRU)) {
+ ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
+ de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
+ de->rec_len = ext2_rec_len_to_disk(name_len);
+ de = de1;
+ }
+ de->name_len = namelen;
+ memcpy(de->name, name, namelen);
+ de->inode = 0;
+ de->file_type = EXT2_FT_FALLTHRU;
+ err = ext2_commit_chunk(page, pos, rec_len);
+ dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+ EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
+ mark_inode_dirty(dir);
+ /* OFFSET_CACHE */
+out_put:
+ ext2_put_page(page);
+ return err;
+out_unlock:
+ unlock_page(page);
+ goto out_put;
+}
+
+/*
* ext2_delete_entry deletes a directory entry by merging it with the
* previous entry. Page is up-to-date. Releases the page.
*/
@@ -711,7 +793,9 @@ int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
*/
if (ext2_match (namelen, name, de))
de->inode = 0;
- if (de->inode || (de->file_type == EXT2_FT_WHT)) {
+ if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
+ (de->file_type == EXT2_FT_FALLTHRU)) &&
+ !ext2_match (namelen, name, de))) {
ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
de->rec_len = ext2_rec_len_to_disk(name_len);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 44d190c..2fa32b3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -108,6 +108,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
extern int ext2_whiteout_entry (struct inode *, struct dentry *,
struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_fallthru_entry (struct inode *, struct dentry *);
extern int ext2_empty_dir (struct inode *);
extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 12195a5..f28154c 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -349,6 +349,7 @@ static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
goto out;

spin_lock(&new_dentry->d_lock);
+ new_dentry->d_flags &= ~DCACHE_FALLTHRU;
new_dentry->d_flags |= DCACHE_WHITEOUT;
spin_unlock(&new_dentry->d_lock);
d_add(new_dentry, NULL);
@@ -367,6 +368,26 @@ out:
return err;
}

+/*
+ * Create a fallthru entry.
+ */
+static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
+{
+ int err;
+
+ dquot_initialize(dir);
+
+ err = ext2_fallthru_entry(dir, dentry);
+ if (err)
+ return err;
+
+ d_instantiate(dentry, NULL);
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_FALLTHRU;
+ spin_unlock(&dentry->d_lock);
+ return 0;
+}
+
static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
struct inode * new_dir, struct dentry * new_dentry )
{
@@ -470,6 +491,7 @@ const struct inode_operations ext2_dir_inode_operations = {
.rmdir = ext2_rmdir,
.mknod = ext2_mknod,
.whiteout = ext2_whiteout,
+ .fallthru = ext2_fallthru,
.rename = ext2_rename,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
index 20468bd..cb3d400 100644
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -577,6 +577,7 @@ enum {
EXT2_FT_SOCK = 6,
EXT2_FT_SYMLINK = 7,
EXT2_FT_WHT = 8,
+ EXT2_FT_FALLTHRU = 9,
EXT2_FT_MAX
};

--
1.6.3.3


2010-07-13 04:30:54

by Ian Kent

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Tue, Jun 15, 2010 at 11:39:44AM -0700, Valerie Aurora wrote:
> Add support for fallthru directory entries to ext2.
>
> XXX What to do for d_ino for fallthrus? If we return the inode from
> the the underlying file system, it comes from a different inode
> "namespace" and that will produce spurious matches. This argues for
> implementation of fallthrus as symlinks because they have to allocate
> an inode (and inode number) anyway, and we can later reuse it if we
> copy the file up.
>
> Cc: Theodore Tso <[email protected]>
> Cc: [email protected]
> Signed-off-by: Valerie Aurora <[email protected]>
> Signed-off-by: Jan Blunck <[email protected]>
> ---
> fs/ext2/dir.c | 92 ++++++++++++++++++++++++++++++++++++++++++++--
> fs/ext2/ext2.h | 1 +
> fs/ext2/namei.c | 22 +++++++++++
> include/linux/ext2_fs.h | 1 +
> 4 files changed, 112 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
> index 030bd46..f3b4aff 100644
> --- a/fs/ext2/dir.c
> +++ b/fs/ext2/dir.c
> @@ -219,7 +219,8 @@ static inline int ext2_match (int len, const char * const name,
> {
> if (len != de->name_len)
> return 0;
> - if (!de->inode && (de->file_type != EXT2_FT_WHT))
> + if (!de->inode && ((de->file_type != EXT2_FT_WHT) &&
> + (de->file_type != EXT2_FT_FALLTHRU)))
> return 0;
> return !memcmp(name, de->name, len);
> }
> @@ -256,6 +257,7 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
> [EXT2_FT_SOCK] = DT_SOCK,
> [EXT2_FT_SYMLINK] = DT_LNK,
> [EXT2_FT_WHT] = DT_WHT,
> + [EXT2_FT_FALLTHRU] = DT_UNKNOWN,
> };
>
> #define S_SHIFT 12
> @@ -342,6 +344,24 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
> ext2_put_page(page);
> return 0;
> }
> + } else if (de->file_type == EXT2_FT_FALLTHRU) {
> + int over;
> + unsigned char d_type = DT_UNKNOWN;
> +
> + offset = (char *)de - kaddr;
> + /* XXX We don't know the inode number
> + * of the directory entry in the
> + * underlying file system. Should
> + * look it up, either on fallthru
> + * creation at first readdir or now at
> + * filldir time. */
> + over = filldir(dirent, de->name, de->name_len,
> + (n<<PAGE_CACHE_SHIFT) | offset,
> + 123 /* Made up ino */, d_type);
> + if (over) {
> + ext2_put_page(page);
> + return 0;
> + }
> }
> filp->f_pos += ext2_rec_len_from_disk(de->rec_len);
> }
> @@ -463,6 +483,10 @@ ino_t ext2_inode_by_dentry(struct inode *dir, struct dentry *dentry)
> spin_lock(&dentry->d_lock);
> dentry->d_flags |= DCACHE_WHITEOUT;
> spin_unlock(&dentry->d_lock);
> + } else if(!res && de->file_type == EXT2_FT_FALLTHRU) {
> + spin_lock(&dentry->d_lock);
> + dentry->d_flags |= DCACHE_FALLTHRU;
> + spin_unlock(&dentry->d_lock);
> }
> ext2_put_page(page);
> }
> @@ -532,6 +556,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
> de->name_len = 0;
> de->rec_len = ext2_rec_len_to_disk(chunk_size);
> de->inode = 0;
> + de->file_type = 0;
> goto got_it;
> }
> if (de->rec_len == 0) {
> @@ -545,6 +570,7 @@ static ext2_dirent * ext2_append_entry(struct dentry * dentry,
> name_len = EXT2_DIR_REC_LEN(de->name_len);
> rec_len = ext2_rec_len_from_disk(de->rec_len);
> if (!de->inode && (de->file_type != EXT2_FT_WHT) &&
> + (de->file_type != EXT2_FT_FALLTHRU) &&
> (rec_len >= reclen))
> goto got_it;
> if (rec_len >= name_len + reclen)
> @@ -587,7 +613,8 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
>
> err = -EEXIST;
> if (ext2_match (namelen, name, de)) {
> - if (de->file_type == EXT2_FT_WHT)
> + if ((de->file_type == EXT2_FT_WHT) ||
> + (de->file_type == EXT2_FT_FALLTHRU))
> goto got_it;
> goto out_unlock;
> }
> @@ -602,7 +629,8 @@ got_it:
> &page, NULL);
> if (err)
> goto out_unlock;
> - if (de->inode || ((de->file_type == EXT2_FT_WHT) &&
> + if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
> + (de->file_type == EXT2_FT_FALLTHRU)) &&
> !ext2_match (namelen, name, de))) {
> ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> @@ -627,6 +655,60 @@ out_unlock:
> }
>
> /*
> + * Create a fallthru entry.
> + */
> +int ext2_fallthru_entry (struct inode *dir, struct dentry *dentry)
> +{
> + const char *name = dentry->d_name.name;
> + int namelen = dentry->d_name.len;
> + unsigned short rec_len, name_len;
> + ext2_dirent * de;
> + struct page *page;
> + loff_t pos;
> + int err;
> +
> + de = ext2_append_entry(dentry, &page);
> + if (IS_ERR(de))
> + return PTR_ERR(de);
> +
> + err = -EEXIST;
> + if (ext2_match (namelen, name, de))
> + goto out_unlock;
> +
> + name_len = EXT2_DIR_REC_LEN(de->name_len);
> + rec_len = ext2_rec_len_from_disk(de->rec_len);
> +
> + pos = page_offset(page) +
> + (char*)de - (char*)page_address(page);
> + err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
> + &page, NULL);
> + if (err)
> + goto out_unlock;
> + if (de->inode || (de->file_type == EXT2_FT_WHT) ||
> + (de->file_type == EXT2_FT_FALLTHRU)) {
> + ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> + de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> + de->rec_len = ext2_rec_len_to_disk(name_len);
> + de = de1;
> + }

And again, what's foing on here?

> + de->name_len = namelen;
> + memcpy(de->name, name, namelen);
> + de->inode = 0;
> + de->file_type = EXT2_FT_FALLTHRU;
> + err = ext2_commit_chunk(page, pos, rec_len);
> + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
> + EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
> + mark_inode_dirty(dir);
> + /* OFFSET_CACHE */
> +out_put:
> + ext2_put_page(page);
> + return err;
> +out_unlock:
> + unlock_page(page);
> + goto out_put;
> +}
> +
> +/*
> * ext2_delete_entry deletes a directory entry by merging it with the
> * previous entry. Page is up-to-date. Releases the page.
> */
> @@ -711,7 +793,9 @@ int ext2_whiteout_entry (struct inode * dir, struct dentry * dentry,
> */
> if (ext2_match (namelen, name, de))
> de->inode = 0;
> - if (de->inode || (de->file_type == EXT2_FT_WHT)) {
> + if (de->inode || (((de->file_type == EXT2_FT_WHT) ||
> + (de->file_type == EXT2_FT_FALLTHRU)) &&
> + !ext2_match (namelen, name, de))) {
> ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
> de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
> de->rec_len = ext2_rec_len_to_disk(name_len);
> diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
> index 44d190c..2fa32b3 100644
> --- a/fs/ext2/ext2.h
> +++ b/fs/ext2/ext2.h
> @@ -108,6 +108,7 @@ extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *,
> extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
> extern int ext2_whiteout_entry (struct inode *, struct dentry *,
> struct ext2_dir_entry_2 *, struct page *);
> +extern int ext2_fallthru_entry (struct inode *, struct dentry *);
> extern int ext2_empty_dir (struct inode *);
> extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
> extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
> diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
> index 12195a5..f28154c 100644
> --- a/fs/ext2/namei.c
> +++ b/fs/ext2/namei.c
> @@ -349,6 +349,7 @@ static int ext2_whiteout(struct inode *dir, struct dentry *dentry,
> goto out;
>
> spin_lock(&new_dentry->d_lock);
> + new_dentry->d_flags &= ~DCACHE_FALLTHRU;
> new_dentry->d_flags |= DCACHE_WHITEOUT;
> spin_unlock(&new_dentry->d_lock);
> d_add(new_dentry, NULL);
> @@ -367,6 +368,26 @@ out:
> return err;
> }
>
> +/*
> + * Create a fallthru entry.
> + */
> +static int ext2_fallthru (struct inode *dir, struct dentry *dentry)
> +{
> + int err;
> +
> + dquot_initialize(dir);
> +
> + err = ext2_fallthru_entry(dir, dentry);
> + if (err)
> + return err;
> +
> + d_instantiate(dentry, NULL);
> + spin_lock(&dentry->d_lock);
> + dentry->d_flags |= DCACHE_FALLTHRU;
> + spin_unlock(&dentry->d_lock);
> + return 0;
> +}
> +
> static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
> struct inode * new_dir, struct dentry * new_dentry )
> {
> @@ -470,6 +491,7 @@ const struct inode_operations ext2_dir_inode_operations = {
> .rmdir = ext2_rmdir,
> .mknod = ext2_mknod,
> .whiteout = ext2_whiteout,
> + .fallthru = ext2_fallthru,
> .rename = ext2_rename,
> #ifdef CONFIG_EXT2_FS_XATTR
> .setxattr = generic_setxattr,
> diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h
> index 20468bd..cb3d400 100644
> --- a/include/linux/ext2_fs.h
> +++ b/include/linux/ext2_fs.h
> @@ -577,6 +577,7 @@ enum {
> EXT2_FT_SOCK = 6,
> EXT2_FT_SYMLINK = 7,
> EXT2_FT_WHT = 8,
> + EXT2_FT_FALLTHRU = 9,
> EXT2_FT_MAX
> };
>
> --
> 1.6.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2010-08-04 14:44:10

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Tue, 15 Jun 2010, Valerie Aurora wrote:
> Add support for fallthru directory entries to ext2.

If a previously used ext2 filesystem with is mounted again then
fallthroughs don't appear to work as expected. Stat returns ENOENT
for these entries.

Can't see anything obviously wrong with the code.

>
> XXX What to do for d_ino for fallthrus? If we return the inode from
> the the underlying file system, it comes from a different inode
> "namespace" and that will produce spurious matches. This argues for
> implementation of fallthrus as symlinks because they have to allocate
> an inode (and inode number) anyway, and we can later reuse it if we
> copy the file up.

That's an idea, but I guess it won't make everyone happy since it
wastes both disk space and memory.

One of the key differentiators for union mounts concept was that it
doesn't duplicate inodes and dentries from the layers. With the
directory copyup on lookup that's already partially lost, but that can
be justified by the fact that non-directories usually far outnumber
directories.

Another idea is to use an internal inode and make all fallthroughs be
hard links to that.

I think the same would work for whiteouts as well. I don't like the
fact that whiteouts are invisible even when not mounted as part of a
union.

Miklos

2010-08-04 22:48:46

by Valerie Aurora

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > Add support for fallthru directory entries to ext2.
>
> If a previously used ext2 filesystem with is mounted again then
> fallthroughs don't appear to work as expected. Stat returns ENOENT
> for these entries.
>
> Can't see anything obviously wrong with the code.

Hm, I wrote one test case for this that worked (attached). Can you
give me more details on your test case? Thanks,

-VAL


Attachments:
(No filename) (504.00 B)
ext2_remount_fallthrus.sh (1.22 kB)
Download all attachments

2010-08-04 23:04:54

by Valerie Aurora

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > Add support for fallthru directory entries to ext2.
>
> If a previously used ext2 filesystem with is mounted again then
> fallthroughs don't appear to work as expected. Stat returns ENOENT
> for these entries.
>
> Can't see anything obviously wrong with the code.
>
> >
> > XXX What to do for d_ino for fallthrus? If we return the inode from
> > the the underlying file system, it comes from a different inode
> > "namespace" and that will produce spurious matches. This argues for
> > implementation of fallthrus as symlinks because they have to allocate
> > an inode (and inode number) anyway, and we can later reuse it if we
> > copy the file up.
>
> That's an idea, but I guess it won't make everyone happy since it
> wastes both disk space and memory.

Hm, I should probably remove this comment - I've talked over the
symlink implementation with a few people and it seems like it
introduces more problems than it solves.

> One of the key differentiators for union mounts concept was that it
> doesn't duplicate inodes and dentries from the layers. With the
> directory copyup on lookup that's already partially lost, but that can
> be justified by the fact that non-directories usually far outnumber
> directories.

And it solves all the readdir() problems in one go. :)

> Another idea is to use an internal inode and make all fallthroughs be
> hard links to that.
>
> I think the same would work for whiteouts as well. I don't like the
> fact that whiteouts are invisible even when not mounted as part of a
> union.

I don't know if this helps, but I just wrote support for removing ext2
whiteouts and fallthrus using tune2fs and e2fsck. I think this does
what people want from a "visible" whiteout feature without adding more
complexity to the VFS. It also takes away all consideration of race
conditions and dentry conversion that happens with online removal of
whiteouts and fallthrus.

What are your thoughts on what a visible whiteout/fallthru would look
like?

Thanks,

-VAL

2010-08-05 10:37:08

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Wed, 4 Aug 2010, Valerie Aurora wrote:
> --zYM0uCDKw75PZbzx
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
>
> On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > Add support for fallthru directory entries to ext2.
> >
> > If a previously used ext2 filesystem with is mounted again then
> > fallthroughs don't appear to work as expected. Stat returns ENOENT
> > for these entries.
> >
> > Can't see anything obviously wrong with the code.
>
> Hm, I wrote one test case for this that worked (attached). Can you
> give me more details on your test case? Thanks,

uml:~# mount -oloop -r ext3-2.img /mnt/img/
uml:~# mount -oloop -r ext3.img /mnt/img/
uml:~# losetup -f ovl.img
uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
uml:~# "ls" /mnt/img
bunion lost+found union
uml:~# "ls" /mnt/img/union
1 2 3
uml:~# "ls" /mnt/img/union/1
a x
uml:~# umount /mnt/img/
uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
uml:~# ls -l /mnt/img/
total 14
drwxr-xr-x 2 root root 1024 Aug 5 09:56 bunion
drwx------ 2 root root 12288 Aug 5 09:41 lost+found
drwxr-xr-x 3 root root 1024 Aug 5 09:56 union
uml:~# ls -l /mnt/img/union/
ls: cannot access /mnt/img/union/3: No such file or directory
ls: cannot access /mnt/img/union/2: No such file or directory
total 1
drwxr-xr-x 2 root root 1024 Aug 5 09:56 1
?????????? ? ? ? ? ? 2
?????????? ? ? ? ? ? 3
uml:~# ls -l /mnt/img/union/1
ls: cannot access /mnt/img/union/1/a: No such file or directory
ls: cannot access /mnt/img/union/1/x: No such file or directory
total 0
?????????? ? ? ? ? ? a
?????????? ? ? ? ? ? x
uml:~#

Thanks,
Miklos

2010-08-05 11:14:07

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > Another idea is to use an internal inode and make all fallthroughs be
> > hard links to that.
> >
> > I think the same would work for whiteouts as well. I don't like the
> > fact that whiteouts are invisible even when not mounted as part of a
> > union.
>
> I don't know if this helps, but I just wrote support for removing ext2
> whiteouts and fallthrus using tune2fs and e2fsck. I think this does
> what people want from a "visible" whiteout feature without adding more
> complexity to the VFS. It also takes away all consideration of race
> conditions and dentry conversion that happens with online removal of
> whiteouts and fallthrus.
>
> What are your thoughts on what a visible whiteout/fallthru would look
> like?

Best would be if it didn't need any modification to filesystems. All
this having to upgrade util-linux, e2fsprogs, having incompatible
filesystem features is a pain for users (just been through that).

What we already have in most filesystems:

- extended attributes, e.g. use the system.union.* namespace and
denote whiteouts and falltroughs with such an attribute

- hard links to make sure a separate inode is not necessary for each
whiteout/fallthrough entry

- some way for the user to easily identify such files when not
mounted as part of a union e.g. make it a symlink pointing to
"(deleted)" or whatever

Later the extended attributes can also be used for other things like
e.g. chmod()/chown() only copying up metadata, not data, and
indicating that data is still found on the lower layers.

Miklos

2010-08-05 23:30:51

by Valerie Aurora

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Thu, Aug 05, 2010 at 12:36:55PM +0200, Miklos Szeredi wrote:
> On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > --zYM0uCDKw75PZbzx
> > Content-Type: text/plain; charset=us-ascii
> > Content-Disposition: inline
> >
> > On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > > Add support for fallthru directory entries to ext2.
> > >
> > > If a previously used ext2 filesystem with is mounted again then
> > > fallthroughs don't appear to work as expected. Stat returns ENOENT
> > > for these entries.
> > >
> > > Can't see anything obviously wrong with the code.
> >
> > Hm, I wrote one test case for this that worked (attached). Can you
> > give me more details on your test case? Thanks,

Cool, thanks. Yes, I suppose the fallthrus should be ignored if they
don't fall through to anything. If I do a proper lookup for d_ino, I
can kill two birds with one stone, since that will tell us whether
there is anything below the fallthru and thus whether to return this
directory entry.

-VAL

> uml:~# mount -oloop -r ext3-2.img /mnt/img/
> uml:~# mount -oloop -r ext3.img /mnt/img/
> uml:~# losetup -f ovl.img
> uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> uml:~# "ls" /mnt/img
> bunion lost+found union
> uml:~# "ls" /mnt/img/union
> 1 2 3
> uml:~# "ls" /mnt/img/union/1
> a x
> uml:~# umount /mnt/img/
> uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> uml:~# ls -l /mnt/img/
> total 14
> drwxr-xr-x 2 root root 1024 Aug 5 09:56 bunion
> drwx------ 2 root root 12288 Aug 5 09:41 lost+found
> drwxr-xr-x 3 root root 1024 Aug 5 09:56 union
> uml:~# ls -l /mnt/img/union/
> ls: cannot access /mnt/img/union/3: No such file or directory
> ls: cannot access /mnt/img/union/2: No such file or directory
> total 1
> drwxr-xr-x 2 root root 1024 Aug 5 09:56 1
> ?????????? ? ? ? ? ? 2
> ?????????? ? ? ? ? ? 3
> uml:~# ls -l /mnt/img/union/1
> ls: cannot access /mnt/img/union/1/a: No such file or directory
> ls: cannot access /mnt/img/union/1/x: No such file or directory
> total 0
> ?????????? ? ? ? ? ? a
> ?????????? ? ? ? ? ? x
> uml:~#
>
> Thanks,
> Miklos

2010-08-06 08:15:36

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Thu, 5 Aug 2010, Valerie Aurora wrote:
> On Thu, Aug 05, 2010 at 12:36:55PM +0200, Miklos Szeredi wrote:
> > On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > --zYM0uCDKw75PZbzx
> > > Content-Type: text/plain; charset=us-ascii
> > > Content-Disposition: inline
> > >
> > > On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > > > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > > > Add support for fallthru directory entries to ext2.
> > > >
> > > > If a previously used ext2 filesystem with is mounted again then
> > > > fallthroughs don't appear to work as expected. Stat returns ENOENT
> > > > for these entries.
> > > >
> > > > Can't see anything obviously wrong with the code.
> > >
> > > Hm, I wrote one test case for this that worked (attached). Can you
> > > give me more details on your test case? Thanks,
>
> Cool, thanks. Yes, I suppose the fallthrus should be ignored if they
> don't fall through to anything.

That's not the case here. They _should_ fall through to ext3.img, yet
apparently after ovl.img being mounted again they don't seem to work.

Note, ovl.img starts out as a completely empty filesystem.

The difference between your test case and mine mitght be that yours
not only reads the directory but stats the files as well, while mine
does not.

Thanks,
Miklos


>
> > uml:~# mount -oloop -r ext3-2.img /mnt/img/
> > uml:~# mount -oloop -r ext3.img /mnt/img/
> > uml:~# losetup -f ovl.img
> > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > uml:~# "ls" /mnt/img
> > bunion lost+found union
> > uml:~# "ls" /mnt/img/union
> > 1 2 3
> > uml:~# "ls" /mnt/img/union/1
> > a x
> > uml:~# umount /mnt/img/
> > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > uml:~# ls -l /mnt/img/
> > total 14
> > drwxr-xr-x 2 root root 1024 Aug 5 09:56 bunion
> > drwx------ 2 root root 12288 Aug 5 09:41 lost+found
> > drwxr-xr-x 3 root root 1024 Aug 5 09:56 union
> > uml:~# ls -l /mnt/img/union/
> > ls: cannot access /mnt/img/union/3: No such file or directory
> > ls: cannot access /mnt/img/union/2: No such file or directory
> > total 1
> > drwxr-xr-x 2 root root 1024 Aug 5 09:56 1
> > ?????????? ? ? ? ? ? 2
> > ?????????? ? ? ? ? ? 3
> > uml:~# ls -l /mnt/img/union/1
> > ls: cannot access /mnt/img/union/1/a: No such file or directory
> > ls: cannot access /mnt/img/union/1/x: No such file or directory
> > total 0
> > ?????????? ? ? ? ? ? a
> > ?????????? ? ? ? ? ? x
> > uml:~#
> >
> > Thanks,
> > Miklos
>

2010-08-06 17:12:23

by Valerie Aurora

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Thu, Aug 05, 2010 at 01:13:55PM +0200, Miklos Szeredi wrote:
> On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > Another idea is to use an internal inode and make all fallthroughs be
> > > hard links to that.
> > >
> > > I think the same would work for whiteouts as well. I don't like the
> > > fact that whiteouts are invisible even when not mounted as part of a
> > > union.
> >
> > I don't know if this helps, but I just wrote support for removing ext2
> > whiteouts and fallthrus using tune2fs and e2fsck. I think this does
> > what people want from a "visible" whiteout feature without adding more
> > complexity to the VFS. It also takes away all consideration of race
> > conditions and dentry conversion that happens with online removal of
> > whiteouts and fallthrus.
> >
> > What are your thoughts on what a visible whiteout/fallthru would look
> > like?
>
> Best would be if it didn't need any modification to filesystems. All
> this having to upgrade util-linux, e2fsprogs, having incompatible
> filesystem features is a pain for users (just been through that).
>
> What we already have in most filesystems:
>
> - extended attributes, e.g. use the system.union.* namespace and
> denote whiteouts and falltroughs with such an attribute
>
> - hard links to make sure a separate inode is not necessary for each
> whiteout/fallthrough entry
>
> - some way for the user to easily identify such files when not
> mounted as part of a union e.g. make it a symlink pointing to
> "(deleted)" or whatever
>
> Later the extended attributes can also be used for other things like
> e.g. chmod()/chown() only copying up metadata, not data, and
> indicating that data is still found on the lower layers.

Just a quick note to say that my explicit design was to do as much as
possible in the VFS, except when adding a little support to the
low-level fs would make it significantly faster, simpler, and more
correct. I think for union mounts to perform moderately well, and to
avoid namespace problems, we can't build it 100% out of existing file
system parts like xattrs. However, I could be wrong and I will
definitely give any other implementation serious consideration.

-VAL

2010-08-06 17:16:40

by Valerie Aurora

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Fri, Aug 06, 2010 at 10:15:14AM +0200, Miklos Szeredi wrote:
> On Thu, 5 Aug 2010, Valerie Aurora wrote:
> > On Thu, Aug 05, 2010 at 12:36:55PM +0200, Miklos Szeredi wrote:
> > > On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > > --zYM0uCDKw75PZbzx
> > > > Content-Type: text/plain; charset=us-ascii
> > > > Content-Disposition: inline
> > > >
> > > > On Wed, Aug 04, 2010 at 04:44:10PM +0200, Miklos Szeredi wrote:
> > > > > On Tue, 15 Jun 2010, Valerie Aurora wrote:
> > > > > > Add support for fallthru directory entries to ext2.
> > > > >
> > > > > If a previously used ext2 filesystem with is mounted again then
> > > > > fallthroughs don't appear to work as expected. Stat returns ENOENT
> > > > > for these entries.
> > > > >
> > > > > Can't see anything obviously wrong with the code.
> > > >
> > > > Hm, I wrote one test case for this that worked (attached). Can you
> > > > give me more details on your test case? Thanks,
> >
> > Cool, thanks. Yes, I suppose the fallthrus should be ignored if they
> > don't fall through to anything.
>
> That's not the case here. They _should_ fall through to ext3.img, yet
> apparently after ovl.img being mounted again they don't seem to work.

Oh, "mmount -b 8" == "mount -o union". Is this the mmount from mtools
or something else?

> Note, ovl.img starts out as a completely empty filesystem.
>
> The difference between your test case and mine mitght be that yours
> not only reads the directory but stats the files as well, while mine
> does not.

Okay, I'll experiment more and see what I can do.

-VAL

> Thanks,
> Miklos
>
>
> >
> > > uml:~# mount -oloop -r ext3-2.img /mnt/img/
> > > uml:~# mount -oloop -r ext3.img /mnt/img/
> > > uml:~# losetup -f ovl.img
> > > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > > uml:~# "ls" /mnt/img
> > > bunion lost+found union
> > > uml:~# "ls" /mnt/img/union
> > > 1 2 3
> > > uml:~# "ls" /mnt/img/union/1
> > > a x
> > > uml:~# umount /mnt/img/
> > > uml:~# mmount -b 8 -t ext2 /dev/loop2 /mnt/img/
> > > uml:~# ls -l /mnt/img/
> > > total 14
> > > drwxr-xr-x 2 root root 1024 Aug 5 09:56 bunion
> > > drwx------ 2 root root 12288 Aug 5 09:41 lost+found
> > > drwxr-xr-x 3 root root 1024 Aug 5 09:56 union
> > > uml:~# ls -l /mnt/img/union/
> > > ls: cannot access /mnt/img/union/3: No such file or directory
> > > ls: cannot access /mnt/img/union/2: No such file or directory
> > > total 1
> > > drwxr-xr-x 2 root root 1024 Aug 5 09:56 1
> > > ?????????? ? ? ? ? ? 2
> > > ?????????? ? ? ? ? ? 3
> > > uml:~# ls -l /mnt/img/union/1
> > > ls: cannot access /mnt/img/union/1/a: No such file or directory
> > > ls: cannot access /mnt/img/union/1/x: No such file or directory
> > > total 0
> > > ?????????? ? ? ? ? ? a
> > > ?????????? ? ? ? ? ? x
> > > uml:~#
> > >
> > > Thanks,
> > > Miklos
> >

2010-08-06 17:44:36

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Fri, 6 Aug 2010, Valerie Aurora wrote:
> Oh, "mmount -b 8" == "mount -o union".

Right.

> Is this the mmount from mtools
> or something else?

It's primitive utility that basically just wraps the mount(2) syscall
without any fstab/mtab support:

http://www.kernel.org/pub/linux/kernel/people/mszeredi/mmount/

Miklos

2010-08-17 22:27:53

by Valerie Aurora

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Thu, Aug 05, 2010 at 01:13:55PM +0200, Miklos Szeredi wrote:
> On Wed, 4 Aug 2010, Valerie Aurora wrote:
> > > Another idea is to use an internal inode and make all fallthroughs be
> > > hard links to that.
> > >
> > > I think the same would work for whiteouts as well. I don't like the
> > > fact that whiteouts are invisible even when not mounted as part of a
> > > union.
> >
> > I don't know if this helps, but I just wrote support for removing ext2
> > whiteouts and fallthrus using tune2fs and e2fsck. I think this does
> > what people want from a "visible" whiteout feature without adding more
> > complexity to the VFS. It also takes away all consideration of race
> > conditions and dentry conversion that happens with online removal of
> > whiteouts and fallthrus.
> >
> > What are your thoughts on what a visible whiteout/fallthru would look
> > like?
>
> Best would be if it didn't need any modification to filesystems. All
> this having to upgrade util-linux, e2fsprogs, having incompatible
> filesystem features is a pain for users (just been through that).
>
> What we already have in most filesystems:
>
> - extended attributes, e.g. use the system.union.* namespace and
> denote whiteouts and falltroughs with such an attribute

Jan Kara helped convince me this might be better than fs-specific
fallthrus and whiteouts. See my email on get_unlinked_inode().

> - hard links to make sure a separate inode is not necessary for each
> whiteout/fallthrough entry

The problem with hard links is that you run into hard link limits. I
don't think we can do hard links for whiteouts and fallthrus. Each
whiteout or fallthru will cost an inode if we implement them as
extended attributes. This cost has to be balanced against the cost of
implementing them as dentries, which is mainly code complexity in
individual file systems.

> - some way for the user to easily identify such files when not
> mounted as part of a union e.g. make it a symlink pointing to
> "(deleted)" or whatever

Perhaps we can simply not interpret the whiteout/fallthru extended
attributes when the file system is not unioned and let userland
operate on them via getxattr()/setxattr().

> Later the extended attributes can also be used for other things like
> e.g. chmod()/chown() only copying up metadata, not data, and
> indicating that data is still found on the lower layers.

It would certainly be more extensible than in-dentry flags.

-VAL

2010-08-18 08:26:41

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 14/38] fallthru: ext2 fallthru support

On Tue, 17 Aug 2010, Valerie Aurora wrote:
> > - hard links to make sure a separate inode is not necessary for each
> > whiteout/fallthrough entry
>
> The problem with hard links is that you run into hard link limits. I
> don't think we can do hard links for whiteouts and fallthrus. Each
> whiteout or fallthru will cost an inode if we implement them as
> extended attributes. This cost has to be balanced against the cost of
> implementing them as dentries, which is mainly code complexity in
> individual file systems.

get_unlinked_inode() is a great idea. But I feel that individual
inodes for each fallthrough is excessive. It'll make the first
readdir() really really expensive and wastes a lot of disk and memory
for no good reason.

Not sure how to fix the hard link limits problem though...

Thanks,
Miklos