Signed-off-by: Hiroyuki Machida <[email protected]>
---
file.c | 4 ++++
1 files changed, 4 insertions(+)
--- linux-2.6.13/fs/fat/file.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13.new/fs/fat/file.c 2005-09-11 12:26:51.031743750 +0900
@@ -201,6 +183,10 @@ int fat_notify_change(struct dentry *den
else
mask = sbi->options.fs_fmask;
inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
+
+ if ( (!error) && IS_SYNC(inode)) {
+ error = write_inode_now(inode, 1);
+ }
out:
unlock_kernel();
return error;
"Machida, Hiroyuki" <[email protected]> writes:
> + if ( (!error) && IS_SYNC(inode)) {
> + error = write_inode_now(inode, 1);
> + }
We don't need to sync the data pages at all here. And I think it is
not right place for doing this. If we need this, since we need to see
O_SYNC for fchxxx() VFS would be right place to do it.
But, personally, I'd like to kill the "-o sync" stuff for these
independent meta data rather. Then...
--
OGAWA Hirofumi <[email protected]>
OGAWA Hirofumi wrote:
> "Machida, Hiroyuki" <[email protected]> writes:
>
>
>>+ if ( (!error) && IS_SYNC(inode)) {
>>+ error = write_inode_now(inode, 1);
>>+ }
>
>
> We don't need to sync the data pages at all here. And I think it is
> not right place for doing this. If we need this, since we need to see
> O_SYNC for fchxxx() VFS would be right place to do it.
I see, I'll look into those.
> But, personally, I'd like to kill the "-o sync" stuff for these
> independent meta data rather. Then...
--
Hiroyuki Machida [email protected]
SSW Dept. HENC, Sony Corp.
This patch adds inode-sync after attribute changes, if needed.
* fs-sync-attr.patch for 2.6.13
fs/fs-writeback.c | 19 +++++++++++++++++++
fs/open.c | 12 ++++++++++++
include/linux/fs.h | 1 +
4 files changed, 32 insertions(+)
Signed-off-by: Hiroyuki Machida <[email protected]>
--- linux-2.6.13/fs/fs-writeback.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/fs-writeback.c 2005-09-29 12:56:21.052335295 +0900
@@ -593,6 +593,25 @@ int sync_inode(struct inode *inode, stru
EXPORT_SYMBOL(sync_inode);
/**
+ * sync_inode_wodata - sync(write and wait) inode to disk, without it's data.
+ * @inode: the inode to sync
+ *
+ * sync_inode_wodata() will write an inode then wait. It will also
+ * correctly update the inode on its superblock's dirty inode lists
+ * and will update inode->i_state.
+ *
+ * The caller must have a ref on the inode.
+ */
+int sync_inode_wodata(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL, /* wait */
+ .nr_to_write = 0,/* no data to be written */
+ };
+ return sync_inode(inode, &wbc);
+}
+
+/**
* generic_osync_inode - flush all dirty data for a given inode to disk
* @inode: inode to write
* @mapping: the address_space that should be flushed
diff -upr linux-2.6.13/fs/open.c linux-2.6.13-sync-attr/fs/open.c
--- linux-2.6.13/fs/open.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/open.c 2005-09-30 01:29:45.279437136 +0900
@@ -207,6 +207,8 @@ int do_truncate(struct dentry *dentry, l
down(&dentry->d_inode->i_sem);
err = notify_change(dentry, &newattrs);
+ if (!err && IS_SYNC(dentry->d_inode))
+ sync_inode_wodata(dentry->d_inode);
up(&dentry->d_inode->i_sem);
return err;
}
@@ -394,6 +396,8 @@ asmlinkage long sys_utime(char __user *
}
down(&inode->i_sem);
error = notify_change(nd.dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
dput_and_out:
path_release(&nd);
@@ -447,6 +451,8 @@ long do_utimes(char __user * filename, s
}
down(&inode->i_sem);
error = notify_change(nd.dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
dput_and_out:
path_release(&nd);
@@ -620,6 +626,8 @@ asmlinkage long sys_fchmod(unsigned int
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
err = notify_change(dentry, &newattrs);
+ if (!err && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
out_putf:
@@ -654,6 +662,8 @@ asmlinkage long sys_chmod(const char __u
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
error = notify_change(nd.dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
dput_and_out:
@@ -692,6 +702,8 @@ static int chown_common(struct dentry *
newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
down(&inode->i_sem);
error = notify_change(dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
out:
return error;
diff -upr linux-2.6.13/include/linux/fs.h linux-2.6.13-sync-attr/include/linux/fs.h
--- linux-2.6.13/include/linux/fs.h 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/include/linux/fs.h 2005-09-30 01:29:06.278507000 +0900
@@ -1082,6 +1082,7 @@ static inline void file_accessed(struct
}
int sync_inode(struct inode *inode, struct writeback_control *wbc);
+int sync_inode_wodata(struct inode *inode);
/**
* struct export_operations - for nfsd to communicate with file systems
This patch replaces ext2_sync_inode() with sync_inode_wodata().
* ext2-inode-sync.patch for 2.6.13
ext2.h | 1 -
fsync.c | 2 +-
inode.c | 11 +----------
xattr.c | 2 +-
Signed-off-by: Hiroyuki Machida <[email protected]>
4 files changed, 3 insertions(+), 13 deletions(-)
--- linux-2.6.13/fs/ext2/ext2.h 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/ext2/ext2.h 2005-09-29 12:56:15.942213423 +0900
@@ -127,7 +127,6 @@ extern void ext2_read_inode (struct inod
extern int ext2_write_inode (struct inode *, int);
extern void ext2_put_inode (struct inode *);
extern void ext2_delete_inode (struct inode *);
-extern int ext2_sync_inode (struct inode *);
extern void ext2_discard_prealloc (struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern void ext2_truncate (struct inode *);
diff -upr linux-2.6.13/fs/ext2/fsync.c linux-2.6.13-sync-attr/fs/ext2/fsync.c
--- linux-2.6.13/fs/ext2/fsync.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/ext2/fsync.c 2005-09-30 01:31:54.492518751 +0900
@@ -44,7 +44,7 @@ int ext2_sync_file(struct file *file, st
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return ret;
- err = ext2_sync_inode(inode);
+ err = sync_inode_wodata(inode);
if (ret == 0)
ret = err;
return ret;
diff -upr linux-2.6.13/fs/ext2/inode.c linux-2.6.13-sync-attr/fs/ext2/inode.c
--- linux-2.6.13/fs/ext2/inode.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/ext2/inode.c 2005-09-30 01:30:32.750569279 +0900
@@ -993,7 +993,7 @@ do_indirects:
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
- ext2_sync_inode (inode);
+ sync_inode_wodata(inode);
} else {
mark_inode_dirty(inode);
}
@@ -1282,15 +1282,6 @@ int ext2_write_inode(struct inode *inode
return ext2_update_inode(inode, wait);
}
-int ext2_sync_inode(struct inode *inode)
-{
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 0, /* sys_fsync did this */
- };
- return sync_inode(inode, &wbc);
-}
-
int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
diff -upr linux-2.6.13/fs/ext2/xattr.c linux-2.6.13-sync-attr/fs/ext2/xattr.c
--- linux-2.6.13/fs/ext2/xattr.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/ext2/xattr.c 2005-09-30 01:30:43.070815408 +0900
@@ -705,7 +705,7 @@ ext2_xattr_set2(struct inode *inode, str
EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
inode->i_ctime = CURRENT_TIME_SEC;
if (IS_SYNC(inode)) {
- error = ext2_sync_inode (inode);
+ error = sync_inode_wodata(inode);
/* In case sync failed due to ENOSPC the inode was actually
* written (only some dirty data were not) so we just proceed
* as if nothing happened and cleanup the unused block */
This patch adds inode-sync after attribute changes, if needed.
* fs-sync-attr_2.patch for 2.6.13
fs/fs-writeback.c | 20 ++++++++++++++++++++
fs/open.c | 12 ++++++++++++
include/linux/fs.h | 1 +
3 files changed, 33 insertions(+)
Signed-off-by: Hiroyuki Machida <[email protected]>
diff -upr linux-2.6.13/fs/fs-writeback.c linux-2.6.13-sync-attr/fs/fs-writeback.c
--- linux-2.6.13/fs/fs-writeback.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/fs-writeback.c 2005-09-30 04:07:27.000000000 +0900
@@ -593,6 +593,26 @@ int sync_inode(struct inode *inode, stru
EXPORT_SYMBOL(sync_inode);
/**
+ * sync_inode_wodata - sync(write and wait) inode to disk, without it's data.
+ * @inode: the inode to sync
+ *
+ * sync_inode_wodata() will write an inode then wait. It will also
+ * correctly update the inode on its superblock's dirty inode lists
+ * and will update inode->i_state.
+ *
+ * The caller must have a ref on the inode.
+ */
+int sync_inode_wodata(struct inode *inode)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL, /* wait */
+ .nr_to_write = 0,/* no data to be written */
+ };
+ return sync_inode(inode, &wbc);
+}
+EXPORT_SYMBOL(sync_inode_wodata);
+
+/**
* generic_osync_inode - flush all dirty data for a given inode to disk
* @inode: inode to write
* @mapping: the address_space that should be flushed
diff -upr linux-2.6.13/fs/open.c linux-2.6.13-sync-attr/fs/open.c
--- linux-2.6.13/fs/open.c 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/fs/open.c 2005-09-30 01:29:45.000000000 +0900
@@ -207,6 +207,8 @@ int do_truncate(struct dentry *dentry, l
down(&dentry->d_inode->i_sem);
err = notify_change(dentry, &newattrs);
+ if (!err && IS_SYNC(dentry->d_inode))
+ sync_inode_wodata(dentry->d_inode);
up(&dentry->d_inode->i_sem);
return err;
}
@@ -394,6 +396,8 @@ asmlinkage long sys_utime(char __user *
}
down(&inode->i_sem);
error = notify_change(nd.dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
dput_and_out:
path_release(&nd);
@@ -447,6 +451,8 @@ long do_utimes(char __user * filename, s
}
down(&inode->i_sem);
error = notify_change(nd.dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
dput_and_out:
path_release(&nd);
@@ -620,6 +626,8 @@ asmlinkage long sys_fchmod(unsigned int
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
err = notify_change(dentry, &newattrs);
+ if (!err && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
out_putf:
@@ -654,6 +662,8 @@ asmlinkage long sys_chmod(const char __u
newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
error = notify_change(nd.dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
dput_and_out:
@@ -692,6 +702,8 @@ static int chown_common(struct dentry *
newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
down(&inode->i_sem);
error = notify_change(dentry, &newattrs);
+ if (!error && IS_SYNC(inode))
+ sync_inode_wodata(inode);
up(&inode->i_sem);
out:
return error;
diff -upr linux-2.6.13/include/linux/fs.h linux-2.6.13-sync-attr/include/linux/fs.h
--- linux-2.6.13/include/linux/fs.h 2005-08-29 08:41:01.000000000 +0900
+++ linux-2.6.13-sync-attr/include/linux/fs.h 2005-09-30 01:29:06.000000000 +0900
@@ -1082,6 +1082,7 @@ static inline void file_accessed(struct
}
int sync_inode(struct inode *inode, struct writeback_control *wbc);
+int sync_inode_wodata(struct inode *inode);
/**
* struct export_operations - for nfsd to communicate with file systems
"Machida, Hiroyuki" <[email protected]> wrote:
>
> This patch adds inode-sync after attribute changes, if needed.
>
> * fs-sync-attr.patch for 2.6.13
>
> fs/fs-writeback.c | 19 +++++++++++++++++++
> fs/open.c | 12 ++++++++++++
> include/linux/fs.h | 1 +
> 4 files changed, 32 insertions(+)
>
> Signed-off-by: Hiroyuki Machida <[email protected]>
>
> --- linux-2.6.13/fs/fs-writeback.c 2005-08-29 08:41:01.000000000 +0900
> +++ linux-2.6.13-sync-attr/fs/fs-writeback.c 2005-09-29 12:56:21.052335295 +0900
> @@ -593,6 +593,25 @@ int sync_inode(struct inode *inode, stru
> EXPORT_SYMBOL(sync_inode);
>
> /**
> + * sync_inode_wodata - sync(write and wait) inode to disk, without it's data.
> + * @inode: the inode to sync
> + *
> + * sync_inode_wodata() will write an inode then wait. It will also
> + * correctly update the inode on its superblock's dirty inode lists
> + * and will update inode->i_state.
> + *
> + * The caller must have a ref on the inode.
> + */
> +int sync_inode_wodata(struct inode *inode)
> +{
> + struct writeback_control wbc = {
> + .sync_mode = WB_SYNC_ALL, /* wait */
> + .nr_to_write = 0,/* no data to be written */
> + };
> + return sync_inode(inode, &wbc);
> +}
> +
I think this function duplicates write_inode_now()?
Andrew Morton <[email protected]> writes:
>> /**
>> + * sync_inode_wodata - sync(write and wait) inode to disk, without it's data.
>> + * @inode: the inode to sync
>> + *
>> + * sync_inode_wodata() will write an inode then wait. It will also
>> + * correctly update the inode on its superblock's dirty inode lists
>> + * and will update inode->i_state.
>> + *
>> + * The caller must have a ref on the inode.
>> + */
>> +int sync_inode_wodata(struct inode *inode)
>> +{
>> + struct writeback_control wbc = {
>> + .sync_mode = WB_SYNC_ALL, /* wait */
>> + .nr_to_write = 0,/* no data to be written */
>> + };
>> + return sync_inode(inode, &wbc);
>> +}
>> +
>
> I think this function duplicates write_inode_now()?
write_inode_now() seems to write data pages, but this doesn't write
(.nr_to_write = 0).
--
OGAWA Hirofumi <[email protected]>
OGAWA Hirofumi <[email protected]> wrote:
>
> Andrew Morton <[email protected]> writes:
>
> >> /**
> >> + * sync_inode_wodata - sync(write and wait) inode to disk, without it's data.
> >> + * @inode: the inode to sync
> >> + *
> >> + * sync_inode_wodata() will write an inode then wait. It will also
> >> + * correctly update the inode on its superblock's dirty inode lists
> >> + * and will update inode->i_state.
> >> + *
> >> + * The caller must have a ref on the inode.
> >> + */
> >> +int sync_inode_wodata(struct inode *inode)
> >> +{
> >> + struct writeback_control wbc = {
> >> + .sync_mode = WB_SYNC_ALL, /* wait */
> >> + .nr_to_write = 0,/* no data to be written */
> >> + };
> >> + return sync_inode(inode, &wbc);
> >> +}
> >> +
> >
> > I think this function duplicates write_inode_now()?
>
> write_inode_now() seems to write data pages, but this doesn't write
> (.nr_to_write = 0).
hm, OK.
However there's not much point in writing a brand-new function when
write_inode_now() almost does the right thing. We can share the
implementation within fs-writeback.c.
<looks>
Isn't write_inode_now() buggy? If !mapping_cap_writeback_dirty() we
should still write the inode itself?
diff -puN fs/fs-writeback.c~write_inode_now-write-inode-if-not-bdi_cap_no_writeback fs/fs-writeback.c
--- devel/fs/fs-writeback.c~write_inode_now-write-inode-if-not-bdi_cap_no_writeback 2005-10-11 21:13:25.000000000 -0700
+++ devel-akpm/fs/fs-writeback.c 2005-10-11 21:13:40.000000000 -0700
@@ -558,7 +558,7 @@ int write_inode_now(struct inode *inode,
};
if (!mapping_cap_writeback_dirty(inode->i_mapping))
- return 0;
+ wbc.nr_to_write = 0;
might_sleep();
spin_lock(&inode_lock);
_
Andrew Morton <[email protected]> writes:
> However there's not much point in writing a brand-new function when
> write_inode_now() almost does the right thing. We can share the
> implementation within fs-writeback.c.
Indeed. We use the generic_osync_inode() for it?
> Isn't write_inode_now() buggy? If !mapping_cap_writeback_dirty() we
> should still write the inode itself?
Indeed. It seems we should write the dirty inode to backing device's buffers.
sync_sb_inodes() too? If so, really buggy.. I'll check it.
--
OGAWA Hirofumi <[email protected]>
OGAWA Hirofumi <[email protected]> writes:
>> Isn't write_inode_now() buggy? If !mapping_cap_writeback_dirty() we
>> should still write the inode itself?
>
> Indeed. It seems we should write the dirty inode to backing device's buffers.
> sync_sb_inodes() too? If so, really buggy.. I'll check it.
write_inode_now() seems ok.
If !mapping_cap_writeback_dirty(), the inode is bdev_inode itself or
ram-based fs's inode, so ->write_inode() is not needed. And if
backing_dev is ramdisk, mapping->backing_dev_info was setted the
following special bdi.
static struct backing_dev_info rd_file_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */
.unplug_io_fn = default_unplug_io_fn,
};
--
OGAWA Hirofumi <[email protected]>
OGAWA Hirofumi wrote:
> Andrew Morton <[email protected]> writes:
>
>
>>However there's not much point in writing a brand-new function when
>>write_inode_now() almost does the right thing. We can share the
>>implementation within fs-writeback.c.
>
>
> Indeed. We use the generic_osync_inode() for it?
Please let me confirm.
Using generic_osync_inode(inode, NULL, OSYNC_INODE) instaed of
sync_inode_wodata(inode) is peferable for changes on fs/open.c,
even it would write data. Is it correct?
--
Hiroyuki Machida [email protected]
"Machida, Hiroyuki" <[email protected]> wrote:
>
>
>
> OGAWA Hirofumi wrote:
> > Andrew Morton <[email protected]> writes:
> >
> >
> >>However there's not much point in writing a brand-new function when
> >>write_inode_now() almost does the right thing. We can share the
> >>implementation within fs-writeback.c.
> >
> >
> > Indeed. We use the generic_osync_inode() for it?
>
> Please let me confirm.
> Using generic_osync_inode(inode, NULL, OSYNC_INODE) instaed of
> sync_inode_wodata(inode) is peferable for changes on fs/open.c,
> even it would write data. Is it correct?
>
I don't know. It depends on what you're actually trying to do, and I don't
recall anyone having described that!
"Machida, Hiroyuki" <[email protected]> writes:
> OGAWA Hirofumi wrote:
>> Andrew Morton <[email protected]> writes:
>>
>>>However there's not much point in writing a brand-new function when
>>>write_inode_now() almost does the right thing. We can share the
>>>implementation within fs-writeback.c.
>> Indeed. We use the generic_osync_inode() for it?
>
> Please let me confirm.
> Using generic_osync_inode(inode, NULL, OSYNC_INODE) instaed of
> sync_inode_wodata(inode) is peferable for changes on fs/open.c,
> even it would write data. Is it correct?
No, I only thought the interface is good. I don't know why it writes
data pages even if OSYNC_INODE only.
--
OGAWA Hirofumi <[email protected]>
Andrew Morton wrote:
> "Machida, Hiroyuki" <[email protected]> wrote:
>
>>
>>
>>OGAWA Hirofumi wrote:
>>
>>>Andrew Morton <[email protected]> writes:
>>>
>>>
>>>
>>>>However there's not much point in writing a brand-new function when
>>>>write_inode_now() almost does the right thing. We can share the
>>>>implementation within fs-writeback.c.
>>>
>>>
>>>Indeed. We use the generic_osync_inode() for it?
>>
>>Please let me confirm.
>>Using generic_osync_inode(inode, NULL, OSYNC_INODE) instaed of
>>sync_inode_wodata(inode) is peferable for changes on fs/open.c,
>>even it would write data. Is it correct?
>>
>
>
> I don't know. It depends on what you're actually trying to do, and I don't
> recall anyone having described that!
I'm just little confused, because I realized generic_osync_inode(,,OSYNC_INODE)
calls sync_inode_now(), however Ogawasa-san pointed out sync_inode_now() which
my first patch used is writing data page.
--
Hiroyuki Machida [email protected]
SSW Dept. HENC, Sony Corp.
OGAWA Hirofumi wrote:
> "Machida, Hiroyuki" <[email protected]> writes:
>
>
>>OGAWA Hirofumi wrote:
>>
>>>Andrew Morton <[email protected]> writes:
>>>
>>>
>>>>However there's not much point in writing a brand-new function when
>>>>write_inode_now() almost does the right thing. We can share the
>>>>implementation within fs-writeback.c.
>>>
>>>Indeed. We use the generic_osync_inode() for it?
>>
>>Please let me confirm.
>>Using generic_osync_inode(inode, NULL, OSYNC_INODE) instaed of
>>sync_inode_wodata(inode) is peferable for changes on fs/open.c,
>>even it would write data. Is it correct?
>
>
> No, I only thought the interface is good. I don't know why it writes
> data pages even if OSYNC_INODE only.
I checked 2.6.13 tree, following functions call generic_osync_inode().
However noone calls it with OSYNC_INODE. SO I can't find intention of
it's usage.
Does anyone know why generic_osync_inode() trys to write data page,
even if OSYNC_INODE only passed ?
- fs/reiserfs/file.c
reiserfs_file_write() OSYNC_METADATA | OSYNC_DATA
- mm/filemap.c
sync_page_range() OSYNC_METADATA
sync_page_range_nolock() OSYNC_METADATA
generic_file_direct_write OSYNC_METADATA
--
Hiroyuki Machida [email protected]
Signed-off-by: Hiroyuki Machida <[email protected]>
--- fs/fs-writeback.c.ORG 2005-08-29 08:41:01.000000000 +0900
+++ fs/fs-writeback.c 2005-10-14 21:22:37.329301605 +0900
@@ -614,6 +614,7 @@ int generic_osync_inode(struct inode *in
int err = 0;
int need_write_inode_now = 0;
int err2;
+ long nr_write;
current->flags |= PF_SYNCWRITE;
if (what & OSYNC_DATA)
@@ -632,13 +633,23 @@ int generic_osync_inode(struct inode *in
spin_lock(&inode_lock);
if ((inode->i_state & I_DIRTY) &&
- ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
+ ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC))) {
need_write_inode_now = 1;
+ nr_write = (what == OSYNC_INODE) ? 0 : LONG_MAX;
+ }
spin_unlock(&inode_lock);
- if (need_write_inode_now) {
- err2 = write_inode_now(inode, 1);
- if (!err)
+ if (need_write_inode_now &&
+ mapping_cap_writeback_dirty(inode->i_mapping)) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ };
+
+ wbc.nr_to_write = nr_write;
+ might_sleep();
+ err2 = sync_inode(inode, &wbc);
+ wait_on_inode(inode);
+ if (!err)
err = err2;
}
else
generic_osync_inode() seems to do over-writing, if OSYNC_INODE only passed.
According comments of generic_osync_inode(), it is expected to write inode
itself only. Current implementation trys to write data page, even if
OSYNC_INODE only passed. This patch fixes it.
This patch is preparetion of other patch to fix "miss-sync changes on
attributes".
Machida, Hiroyuki wrote:
>
>
> OGAWA Hirofumi wrote:
>
>> "Machida, Hiroyuki" <[email protected]> writes:
>>
>>
>>> OGAWA Hirofumi wrote:
>>>
>>>> Andrew Morton <[email protected]> writes:
>>>>
>>>>
>>>>> However there's not much point in writing a brand-new function when
>>>>> write_inode_now() almost does the right thing. We can share the
>>>>> implementation within fs-writeback.c.
>>>>
>>>>
>>>> Indeed. We use the generic_osync_inode() for it?
>>>
>>>
>>> Please let me confirm.
>>> Using generic_osync_inode(inode, NULL, OSYNC_INODE) instaed of
>>> sync_inode_wodata(inode) is peferable for changes on fs/open.c,
>>> even it would write data. Is it correct?
>>
>>
>>
>> No, I only thought the interface is good. I don't know why it writes
>> data pages even if OSYNC_INODE only.
>
>
>
> I checked 2.6.13 tree, following functions call generic_osync_inode().
> However noone calls it with OSYNC_INODE. SO I can't find intention of
> it's usage.
>
> Does anyone know why generic_osync_inode() trys to write data page,
> even if OSYNC_INODE only passed ?
>
> - fs/reiserfs/file.c
> reiserfs_file_write() OSYNC_METADATA | OSYNC_DATA
> - mm/filemap.c
> sync_page_range() OSYNC_METADATA
> sync_page_range_nolock() OSYNC_METADATA
> generic_file_direct_write OSYNC_METADATA
>
--
Hiroyuki Machida [email protected]
Andrew Morton <[email protected]> writes:
> Isn't write_inode_now() buggy? If !mapping_cap_writeback_dirty() we
> should still write the inode itself?
>
> diff -puN fs/fs-writeback.c~write_inode_now-write-inode-if-not-bdi_cap_no_writeback fs/fs-writeback.c
> --- devel/fs/fs-writeback.c~write_inode_now-write-inode-if-not-bdi_cap_no_writeback 2005-10-11 21:13:25.000000000 -0700
> +++ devel-akpm/fs/fs-writeback.c 2005-10-11 21:13:40.000000000 -0700
> @@ -558,7 +558,7 @@ int write_inode_now(struct inode *inode,
> };
>
> if (!mapping_cap_writeback_dirty(inode->i_mapping))
> - return 0;
> + wbc.nr_to_write = 0;
>
> might_sleep();
> spin_lock(&inode_lock);
You are right, and I was wrong.
Yes, if block device has BDI_CAP_NO_WRITEBACK and inode->i_mapping
was changing to bd_inode->i_mapping,
the !mapping_cap_writeback_dirty(inode->i_mapping) is true, so we need
to write the inode itself of block special file.
I think we need to fix sync_sb_inodes() too. What do you think?
Since wbc.nr_to_write includes the information on how many pages were
written, we can't change wbc.nr_to_write in sync_sb_inodes().
This patch fixed the following case,
# mke2fs -m0 -F file
# mount -t ext2 file mnt -o loop
# cd mnt
# mknod ram b 1 0
# cat ram
# cd ..
# umount mnt
# e2fsck -f file
"mnt/ram" wasn't flushed and it was corrupted
Thanks.
--
OGAWA Hirofumi <[email protected]>
Signed-off-by: OGAWA Hirofumi <[email protected]>
---
fs/fs-writeback.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff -puN fs/fs-writeback.c~device-inode-write-fix fs/fs-writeback.c
--- linux-2.6.14/fs/fs-writeback.c~device-inode-write-fix 2005-10-29 08:06:28.000000000 +0900
+++ linux-2.6.14-hirofumi/fs/fs-writeback.c 2005-10-29 08:06:28.000000000 +0900
@@ -151,7 +151,8 @@ static int write_inode(struct inode *ino
* Called under inode_lock.
*/
static int
-__sync_single_inode(struct inode *inode, struct writeback_control *wbc)
+__sync_single_inode(struct inode *inode, struct writeback_control *wbc,
+ int inode_only)
{
unsigned dirty;
struct address_space *mapping = inode->i_mapping;
@@ -168,7 +169,9 @@ __sync_single_inode(struct inode *inode,
spin_unlock(&inode_lock);
- ret = do_writepages(mapping, wbc);
+ ret = 0;
+ if (!inode_only)
+ ret = do_writepages(mapping, wbc);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
@@ -177,7 +180,7 @@ __sync_single_inode(struct inode *inode,
ret = err;
}
- if (wait) {
+ if (!inode_only && wait) {
int err = filemap_fdatawait(mapping);
if (ret == 0)
ret = err;
@@ -242,7 +245,7 @@ __sync_single_inode(struct inode *inode,
*/
static int
__writeback_single_inode(struct inode *inode,
- struct writeback_control *wbc)
+ struct writeback_control *wbc, int inode_only)
{
wait_queue_head_t *wqh;
@@ -267,7 +270,7 @@ __writeback_single_inode(struct inode *i
spin_lock(&inode_lock);
} while (inode->i_state & I_LOCK);
}
- return __sync_single_inode(inode, wbc);
+ return __sync_single_inode(inode, wbc, inode_only);
}
/*
@@ -304,6 +307,7 @@ static void
sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
{
const unsigned long start = jiffies; /* livelock avoidance */
+ int inode_only;
if (!wbc->for_kupdate || list_empty(&sb->s_io))
list_splice_init(&sb->s_dirty, &sb->s_io);
@@ -315,7 +319,8 @@ sync_sb_inodes(struct super_block *sb, s
struct backing_dev_info *bdi = mapping->backing_dev_info;
long pages_skipped;
- if (!bdi_cap_writeback_dirty(bdi)) {
+ inode_only = 0;
+ if (!mapping_cap_writeback_dirty(mapping)) {
list_move(&inode->i_list, &sb->s_dirty);
if (sb == blockdev_superblock) {
/*
@@ -324,12 +329,7 @@ sync_sb_inodes(struct super_block *sb, s
*/
continue;
}
- /*
- * Dirty memory-backed inode against a filesystem other
- * than the kernel-internal bdev filesystem. Skip the
- * entire superblock.
- */
- break;
+ inode_only = 1;
}
if (wbc->nonblocking && bdi_write_congested(bdi)) {
@@ -363,7 +363,7 @@ sync_sb_inodes(struct super_block *sb, s
BUG_ON(inode->i_state & I_FREEING);
__iget(inode);
pages_skipped = wbc->pages_skipped;
- __writeback_single_inode(inode, wbc);
+ __writeback_single_inode(inode, wbc, inode_only);
if (wbc->sync_mode == WB_SYNC_HOLD) {
inode->dirtied_when = jiffies;
list_move(&inode->i_list, &sb->s_dirty);
@@ -551,18 +551,18 @@ void sync_inodes(int wait)
int write_inode_now(struct inode *inode, int sync)
{
- int ret;
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_ALL,
};
+ int ret, inode_only = 0;
if (!mapping_cap_writeback_dirty(inode->i_mapping))
- return 0;
+ inode_only = 1;
might_sleep();
spin_lock(&inode_lock);
- ret = __writeback_single_inode(inode, &wbc);
+ ret = __writeback_single_inode(inode, &wbc, inode_only);
spin_unlock(&inode_lock);
if (sync)
wait_on_inode(inode);
@@ -586,7 +586,7 @@ int sync_inode(struct inode *inode, stru
int ret;
spin_lock(&inode_lock);
- ret = __writeback_single_inode(inode, wbc);
+ ret = __writeback_single_inode(inode, wbc, 0);
spin_unlock(&inode_lock);
return ret;
}
_
Hi,
If inodes of filesystem has BDI_CAP_NO_WRITEBACK, it shouldn't be
needed to flush the inodes by pdflush at all. So, this patch stops
puting the inode on the sb->s_dirty, by setting I_DIRTY as initial
state.
With this patch, I think unnecessary flush is stopped.
BTW, probably all most of on memory filesystems also doesn't need to
flush by pdflush...?
Thanks.
--
OGAWA Hirofumi <[email protected]>
Signed-off-by: OGAWA Hirofumi <[email protected]>
---
fs/hugetlbfs/inode.c | 1 +
fs/pipe.c | 8 +-------
fs/ramfs/inode.c | 1 +
fs/relayfs/inode.c | 1 +
fs/sysfs/inode.c | 1 +
include/linux/fs.h | 10 ++++++++++
kernel/cpuset.c | 1 +
mm/shmem.c | 1 +
8 files changed, 17 insertions(+), 7 deletions(-)
diff -puN fs/hugetlbfs/inode.c~add-set_inode_noflush fs/hugetlbfs/inode.c
--- linux-2.6.14/fs/hugetlbfs/inode.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/fs/hugetlbfs/inode.c 2005-10-29 08:13:57.000000000 +0900
@@ -394,6 +394,7 @@ static struct inode *hugetlbfs_get_inode
inode = new_inode(sb);
if (inode) {
struct hugetlbfs_inode_info *info;
+ set_inode_noflush(inode);
inode->i_mode = mode;
inode->i_uid = uid;
inode->i_gid = gid;
diff -puN fs/pipe.c~add-set_inode_noflush fs/pipe.c
--- linux-2.6.14/fs/pipe.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/fs/pipe.c 2005-10-29 08:13:57.000000000 +0900
@@ -697,13 +697,7 @@ static struct inode * get_pipe_inode(voi
PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
inode->i_fop = &rdwr_pipe_fops;
- /*
- * Mark the inode dirty from the very beginning,
- * that way it will never be moved to the dirty
- * list because "mark_inode_dirty()" will think
- * that it already _is_ on the dirty list.
- */
- inode->i_state = I_DIRTY;
+ set_inode_noflush(inode);
inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
diff -puN fs/ramfs/inode.c~add-set_inode_noflush fs/ramfs/inode.c
--- linux-2.6.14/fs/ramfs/inode.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/fs/ramfs/inode.c 2005-10-29 08:13:57.000000000 +0900
@@ -55,6 +55,7 @@ struct inode *ramfs_get_inode(struct sup
struct inode * inode = new_inode(sb);
if (inode) {
+ set_inode_noflush(inode);
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
diff -puN fs/relayfs/inode.c~add-set_inode_noflush fs/relayfs/inode.c
--- linux-2.6.14/fs/relayfs/inode.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/fs/relayfs/inode.c 2005-10-29 08:13:57.000000000 +0900
@@ -52,6 +52,7 @@ static struct inode *relayfs_get_inode(s
return NULL;
}
+ set_inode_noflush(inode);
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
diff -puN fs/sysfs/inode.c~add-set_inode_noflush fs/sysfs/inode.c
--- linux-2.6.14/fs/sysfs/inode.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/fs/sysfs/inode.c 2005-10-29 08:13:57.000000000 +0900
@@ -113,6 +113,7 @@ struct inode * sysfs_new_inode(mode_t mo
{
struct inode * inode = new_inode(sysfs_sb);
if (inode) {
+ set_inode_noflush(inode);
inode->i_blksize = PAGE_CACHE_SIZE;
inode->i_blocks = 0;
inode->i_mapping->a_ops = &sysfs_aops;
diff -puN include/linux/fs.h~add-set_inode_noflush include/linux/fs.h
--- linux-2.6.14/include/linux/fs.h~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/include/linux/fs.h 2005-10-29 08:13:57.000000000 +0900
@@ -1075,6 +1075,16 @@ static inline void file_accessed(struct
touch_atime(file->f_vfsmnt, file->f_dentry);
}
+static inline void set_inode_noflush(struct inode *inode)
+{
+ /*
+ * Mark the inode dirty from the very beginning, that way it
+ * will never be moved to the dirty list because "mark_inode_dirty()"
+ * will think that it already _is_ on the dirty list.
+ */
+ inode->i_state |= I_DIRTY;
+}
+
int sync_inode(struct inode *inode, struct writeback_control *wbc);
/**
diff -puN kernel/cpuset.c~add-set_inode_noflush kernel/cpuset.c
--- linux-2.6.14/kernel/cpuset.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/kernel/cpuset.c 2005-10-29 08:13:57.000000000 +0900
@@ -236,6 +236,7 @@ static struct inode *cpuset_new_inode(mo
struct inode *inode = new_inode(cpuset_sb);
if (inode) {
+ set_inode_noflush(inode);
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
diff -puN mm/shmem.c~add-set_inode_noflush mm/shmem.c
--- linux-2.6.14/mm/shmem.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
+++ linux-2.6.14-hirofumi/mm/shmem.c 2005-10-29 08:13:57.000000000 +0900
@@ -1283,6 +1283,7 @@ shmem_get_inode(struct super_block *sb,
inode = new_inode(sb);
if (inode) {
+ set_inode_noflush(inode);
inode->i_mode = mode;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
_
OGAWA Hirofumi <[email protected]> writes:
> diff -puN fs/hugetlbfs/inode.c~add-set_inode_noflush fs/hugetlbfs/inode.c
> --- linux-2.6.14/fs/hugetlbfs/inode.c~add-set_inode_noflush 2005-10-29 08:13:57.000000000 +0900
> +++ linux-2.6.14-hirofumi/fs/hugetlbfs/inode.c 2005-10-29 08:13:57.000000000 +0900
> @@ -394,6 +394,7 @@ static struct inode *hugetlbfs_get_inode
> inode = new_inode(sb);
> if (inode) {
> struct hugetlbfs_inode_info *info;
> + set_inode_noflush(inode);
> inode->i_mode = mode;
> inode->i_uid = uid;
> inode->i_gid = gid;
Sigh, I'm forgetting block special file again. Sorry for wrong patch.
--
OGAWA Hirofumi <[email protected]>