We would like to get notified when we are doing a write on mmap section.
This is needed with respect to preallocated area. We split the preallocated
area into initialzed extent and uninitialzed extent in the call back. This
let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
that would result in data loss. The changes are also needed to handle ENOSPC
when writing to an mmap section of files with holes.
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/file.c | 19 ++++++++++++++++++-
fs/ext4/inode.c | 6 ++++++
include/linux/ext4_fs.h | 1 +
3 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 20507a2..77341c1 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -123,6 +123,23 @@ force_commit:
return ret;
}
+static struct vm_operations_struct ext4_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = ext4_page_mkwrite,
+};
+
+static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct address_space *mapping = file->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+ file_accessed(file);
+ vma->vm_ops = &ext4_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+ return 0;
+}
+
const struct file_operations ext4_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = ext4_file_mmap,
.open = generic_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 34f3eb6..81faa67 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3466,3 +3466,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return err;
}
+
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ return block_page_mkwrite(vma, page, ext4_get_block);
+}
+
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 22810b1..8f5a563 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -1059,6 +1059,7 @@ extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
struct address_space *mapping, loff_t from);
+extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
--
1.5.4.1.97.g40aab-dirty
IF we fail allocate blocks don't call ext4_error. Also don't hide errors
from ext4_get_blocks_wrap
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/extents.c | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5b22f71..9a27e88 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2653,13 +2653,14 @@ retry:
ret = ext4_get_blocks_wrap(handle, inode, block,
max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0);
- WARN_ON(ret <= 0);
if (ret <= 0) {
- ext4_error(inode->i_sb, "ext4_fallocate",
- "ext4_ext_get_blocks returned error: "
- "inode#%lu, block=%u, max_blocks=%lu",
+#ifdef EXT4FS_DEBUG
+ WARN_ON(ret <= 0);
+ printk(KERN_ERR "%s: ext4_ext_get_blocks "
+ "returned error inode#%lu, block=%u, "
+ "max_blocks=%lu", __FUNCTION__,
inode->i_ino, block, max_blocks);
- ret = -EIO;
+#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
break;
--
1.5.4.1.97.g40aab-dirty
On Tue, 2008-02-19 at 09:14 +0530, Aneesh Kumar K.V wrote:
> IF we fail allocate blocks don't call ext4_error. Also don't hide errors
> from ext4_get_blocks_wrap
>
Aced. And added to patch queue.
Thanks,
Mingming
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/extents.c | 11 ++++++-----
> 1 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 5b22f71..9a27e88 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2653,13 +2653,14 @@ retry:
> ret = ext4_get_blocks_wrap(handle, inode, block,
> max_blocks, &map_bh,
> EXT4_CREATE_UNINITIALIZED_EXT, 0);
> - WARN_ON(ret <= 0);
> if (ret <= 0) {
> - ext4_error(inode->i_sb, "ext4_fallocate",
> - "ext4_ext_get_blocks returned error: "
> - "inode#%lu, block=%u, max_blocks=%lu",
> +#ifdef EXT4FS_DEBUG
> + WARN_ON(ret <= 0);
> + printk(KERN_ERR "%s: ext4_ext_get_blocks "
> + "returned error inode#%lu, block=%u, "
> + "max_blocks=%lu", __FUNCTION__,
> inode->i_ino, block, max_blocks);
> - ret = -EIO;
> +#endif
> ext4_mark_inode_dirty(handle, inode);
> ret2 = ext4_journal_stop(handle);
> break;
On Tue, 2008-02-19 at 09:13 +0530, Aneesh Kumar K.V wrote:
> We would like to get notified when we are doing a write on mmap section.
> This is needed with respect to preallocated area. We split the preallocated
> area into initialzed extent and uninitialzed extent in the call back. This
> let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
> that would result in data loss. The changes are also needed to handle ENOSPC
> when writing to an mmap section of files with holes.
>
Acked. Added to patch queue.
Thanks,
Mingming
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/file.c | 19 ++++++++++++++++++-
> fs/ext4/inode.c | 6 ++++++
> include/linux/ext4_fs.h | 1 +
> 3 files changed, 25 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 20507a2..77341c1 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -123,6 +123,23 @@ force_commit:
> return ret;
> }
>
> +static struct vm_operations_struct ext4_file_vm_ops = {
> + .fault = filemap_fault,
> + .page_mkwrite = ext4_page_mkwrite,
> +};
> +
> +static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct address_space *mapping = file->f_mapping;
> +
> + if (!mapping->a_ops->readpage)
> + return -ENOEXEC;
> + file_accessed(file);
> + vma->vm_ops = &ext4_file_vm_ops;
> + vma->vm_flags |= VM_CAN_NONLINEAR;
> + return 0;
> +}
> +
> const struct file_operations ext4_file_operations = {
> .llseek = generic_file_llseek,
> .read = do_sync_read,
> @@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
> #ifdef CONFIG_COMPAT
> .compat_ioctl = ext4_compat_ioctl,
> #endif
> - .mmap = generic_file_mmap,
> + .mmap = ext4_file_mmap,
> .open = generic_file_open,
> .release = ext4_release_file,
> .fsync = ext4_sync_file,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 34f3eb6..81faa67 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3466,3 +3466,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
>
> return err;
> }
> +
> +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
> +{
> + return block_page_mkwrite(vma, page, ext4_get_block);
> +}
> +
> diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
> index 22810b1..8f5a563 100644
> --- a/include/linux/ext4_fs.h
> +++ b/include/linux/ext4_fs.h
> @@ -1059,6 +1059,7 @@ extern void ext4_set_aops(struct inode *inode);
> extern int ext4_writepage_trans_blocks(struct inode *);
> extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
> struct address_space *mapping, loff_t from);
> +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
On Tue, 2008-02-19 at 09:13 +0530, Aneesh Kumar K.V wrote:
> We would like to get notified when we are doing a write on mmap section.
> This is needed with respect to preallocated area. We split the preallocated
> area into initialzed extent and uninitialzed extent in the call back. This
> let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
> that would result in data loss. The changes are also needed to handle ENOSPC
> when writing to an mmap section of files with holes.
>
Hi Aneesh,
I have a concern, it seems we missed journalling the allocation activity
for the mmaped write. See comments below...
Another thing, perhaps similar patch should be ported to ext2/3, as this
also addressed the mmaped write ENOSPC error without
preallocation/deleyed allocation.
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/file.c | 19 ++++++++++++++++++-
> fs/ext4/inode.c | 6 ++++++
> include/linux/ext4_fs.h | 1 +
> 3 files changed, 25 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 20507a2..77341c1 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -123,6 +123,23 @@ force_commit:
> return ret;
> }
>
> +static struct vm_operations_struct ext4_file_vm_ops = {
> + .fault = filemap_fault,
> + .page_mkwrite = ext4_page_mkwrite,
> +};
> +
> +static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + struct address_space *mapping = file->f_mapping;
> +
> + if (!mapping->a_ops->readpage)
> + return -ENOEXEC;
> + file_accessed(file);
> + vma->vm_ops = &ext4_file_vm_ops;
> + vma->vm_flags |= VM_CAN_NONLINEAR;
> + return 0;
> +}
> +
> const struct file_operations ext4_file_operations = {
> .llseek = generic_file_llseek,
> .read = do_sync_read,
> @@ -133,7 +150,7 @@ const struct file_operations ext4_file_operations = {
> #ifdef CONFIG_COMPAT
> .compat_ioctl = ext4_compat_ioctl,
> #endif
> - .mmap = generic_file_mmap,
> + .mmap = ext4_file_mmap,
> .open = generic_file_open,
> .release = ext4_release_file,
> .fsync = ext4_sync_file,
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 34f3eb6..81faa67 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3466,3 +3466,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
>
> return err;
> }
> +
> +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
> +{
> + return block_page_mkwrite(vma, page, ext4_get_block);
> +}
> +
I don't see block allocation being journalled here. block_page_mkwrite()
eventually calling block_prepare_write() which invokes ext4_get_block()
without starting a new journal handle.
Perhaps call ext4 write_begin() and write_end() inode operations, that
would taking care of different write_begin and write_end for three
different journalling mode.
Mingming
> diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
> index 22810b1..8f5a563 100644
> --- a/include/linux/ext4_fs.h
> +++ b/include/linux/ext4_fs.h
> @@ -1059,6 +1059,7 @@ extern void ext4_set_aops(struct inode *inode);
> extern int ext4_writepage_trans_blocks(struct inode *);
> extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
> struct address_space *mapping, loff_t from);
> +extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
>
> /* ioctl.c */
> extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
On Thu, Feb 21, 2008 at 09:39:20AM -0800, Mingming Cao wrote:
> On Tue, 2008-02-19 at 09:13 +0530, Aneesh Kumar K.V wrote:
> > We would like to get notified when we are doing a write on mmap section.
> > This is needed with respect to preallocated area. We split the preallocated
> > area into initialzed extent and uninitialzed extent in the call back. This
> > let us handle ENOSPC better. Otherwise we get ENOSPC in the writepage and
> > that would result in data loss. The changes are also needed to handle ENOSPC
> > when writing to an mmap section of files with holes.
> >
>
> Hi Aneesh,
>
> I have a concern, it seems we missed journalling the allocation activity
> for the mmaped write. See comments below...
>
> Another thing, perhaps similar patch should be ported to ext2/3, as this
> also addressed the mmaped write ENOSPC error without
> preallocation/deleyed allocation.
>
> > Signed-off-by: Aneesh Kumar K.V <[email protected]>
......
> > +
> > +int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
> > +{
> > + return block_page_mkwrite(vma, page, ext4_get_block);
> > +}
> > +
>
> I don't see block allocation being journalled here. block_page_mkwrite()
> eventually calling block_prepare_write() which invokes ext4_get_block()
> without starting a new journal handle.
>
> Perhaps call ext4 write_begin() and write_end() inode operations, that
> would taking care of different write_begin and write_end for three
> different journalling mode.
>
You are correct. I missed the journalling details when working on the
patch. Will send the one with fix.
-aneesh