2018-06-20 22:15:03

by Ross Zwisler

[permalink] [raw]
Subject: [PATCH 2/2] ext4: handle layout changes to pinned DAX mappings

Follow the lead of xfs_break_dax_layouts() and add synchronization between
operations in ext4 which remove blocks from an inode (hole punch, truncate
down, etc.) and pages which are pinned due to DAX DMA operations.

Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>
---
fs/ext4/ext4.h | 1 +
fs/ext4/extents.c | 12 ++++++++++++
fs/ext4/inode.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/truncate.h | 1 +
4 files changed, 62 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0b127853c584..34bccd64d83d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2460,6 +2460,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
+extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0057fe3f248d..a6aef06f455b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4820,6 +4820,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* released from page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ goto out_mutex;
+ }
+
ret = ext4_update_disksize_before_punch(inode, offset, len);
if (ret) {
up_write(&EXT4_I(inode)->i_mmap_sem);
@@ -5493,6 +5500,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret)
+ goto out_mmap;
+
/*
* Need to round down offset to be aligned with page size boundary
* for page size > block size.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2ea07efbe016..c795e5118745 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4193,6 +4193,41 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
return 0;
}

+static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock)
+{
+ *did_unlock = true;
+ up_write(&ei->i_mmap_sem);
+ schedule();
+ down_write(&ei->i_mmap_sem);
+}
+
+int ext4_break_layouts(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct page *page;
+ bool retry;
+ int error;
+
+ if (unlikely(!rwsem_is_locked(&ei->i_mmap_sem))) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ do {
+ retry = false;
+ page = dax_layout_busy_page(inode->i_mapping);
+ if (!page)
+ return 0;
+
+ error = ___wait_var_event(&page->_refcount,
+ atomic_read(&page->_refcount) == 1,
+ TASK_INTERRUPTIBLE, 0, 0,
+ ext4_wait_dax_page(ei, &retry));
+ } while (error == 0 && retry);
+
+ return error;
+}
+
/*
* ext4_punch_hole: punches a hole in a file by releasing the blocks
* associated with the given offset and length
@@ -4266,6 +4301,11 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* page cache.
*/
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_break_layouts(inode);
+ if (ret)
+ goto out_dio;
+
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;

@@ -5554,6 +5594,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_wait_for_tail_page_commit(inode);
}
down_write(&EXT4_I(inode)->i_mmap_sem);
+
+ rc = ext4_break_layouts(inode);
+ if (rc) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ error = rc;
+ goto err_out;
+ }
+
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 0cb13badf473..a3b78241e9f6 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -12,6 +12,7 @@
static inline void ext4_truncate_failed_write(struct inode *inode)
{
down_write(&EXT4_I(inode)->i_mmap_sem);
+ ext4_break_layouts(inode);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
up_write(&EXT4_I(inode)->i_mmap_sem);
--
2.14.4


2018-06-22 08:19:15

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: handle layout changes to pinned DAX mappings

On Wed 20-06-18 16:15:03, Ross Zwisler wrote:
> Follow the lead of xfs_break_dax_layouts() and add synchronization between
> operations in ext4 which remove blocks from an inode (hole punch, truncate
> down, etc.) and pages which are pinned due to DAX DMA operations.
>
> Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/extents.c | 12 ++++++++++++
> fs/ext4/inode.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
> fs/ext4/truncate.h | 1 +
> 4 files changed, 62 insertions(+)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 0b127853c584..34bccd64d83d 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2460,6 +2460,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
> extern int ext4_inode_attach_jinode(struct inode *inode);
> extern int ext4_can_truncate(struct inode *inode);
> extern int ext4_truncate(struct inode *);
> +extern int ext4_break_layouts(struct inode *);
> extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
> extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
> extern void ext4_set_inode_flags(struct inode *);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 0057fe3f248d..a6aef06f455b 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4820,6 +4820,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> * released from page cache.
> */
> down_write(&EXT4_I(inode)->i_mmap_sem);
> +
> + ret = ext4_break_layouts(inode);
> + if (ret) {
> + up_write(&EXT4_I(inode)->i_mmap_sem);
> + goto out_mutex;
> + }
> +
> ret = ext4_update_disksize_before_punch(inode, offset, len);
> if (ret) {
> up_write(&EXT4_I(inode)->i_mmap_sem);
> @@ -5493,6 +5500,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
> * page cache.
> */
> down_write(&EXT4_I(inode)->i_mmap_sem);
> +
> + ret = ext4_break_layouts(inode);
> + if (ret)
> + goto out_mmap;
> +
> /*
> * Need to round down offset to be aligned with page size boundary
> * for page size > block size.
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 2ea07efbe016..c795e5118745 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4193,6 +4193,41 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
> return 0;
> }
>
> +static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock)
> +{
> + *did_unlock = true;
> + up_write(&ei->i_mmap_sem);
> + schedule();
> + down_write(&ei->i_mmap_sem);
> +}
> +
> +int ext4_break_layouts(struct inode *inode)
> +{
> + struct ext4_inode_info *ei = EXT4_I(inode);
> + struct page *page;
> + bool retry;
> + int error;
> +
> + if (unlikely(!rwsem_is_locked(&ei->i_mmap_sem))) {
> + WARN_ON_ONCE(1);
> + return -EINVAL;
> + }

This could be shortened as:

if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) {
}

couldn't it?

Besides that the patch looks to me. You can add:

Reviewed-by: Jan Kara <[email protected]>

And I'm really wondering which protection are we still missing that you are
still able to hit the warning with these patches applied.

Honza
--
Jan Kara <jack-IBi9RG/[email protected]>
SUSE Labs, CR

2018-06-22 08:25:33

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: handle layout changes to pinned DAX mappings

On Wed 20-06-18 16:15:03, Ross Zwisler wrote:
> diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
> index 0cb13badf473..a3b78241e9f6 100644
> --- a/fs/ext4/truncate.h
> +++ b/fs/ext4/truncate.h
> @@ -12,6 +12,7 @@
> static inline void ext4_truncate_failed_write(struct inode *inode)
> {
> down_write(&EXT4_I(inode)->i_mmap_sem);
> + ext4_break_layouts(inode);
> truncate_inode_pages(inode->i_mapping, inode->i_size);
> ext4_truncate(inode);
> up_write(&EXT4_I(inode)->i_mmap_sem);

One comment here: I don't think ext4_break_layouts() is necessary here.
ext4_truncate_failed_write() exists to truncate blocks beyond EOF. As such
these blocks could never be even mmaped, let alone pinned by GUP. Maybe
just add a comment that blocks we are truncating here were never visible to
userspace and so we don't need ext4_break_layouts() protection.

Honza
--
Jan Kara <jack-IBi9RG/[email protected]>
SUSE Labs, CR

2018-06-27 20:10:37

by Ross Zwisler

[permalink] [raw]
Subject: Re: [PATCH 2/2] ext4: handle layout changes to pinned DAX mappings

On Fri, Jun 22, 2018 at 10:19:15AM +0200, Jan Kara wrote:
> On Wed 20-06-18 16:15:03, Ross Zwisler wrote:
> > Follow the lead of xfs_break_dax_layouts() and add synchronization between
> > operations in ext4 which remove blocks from an inode (hole punch, truncate
> > down, etc.) and pages which are pinned due to DAX DMA operations.
> >
> > Signed-off-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/[email protected]>
> > ---
> > fs/ext4/ext4.h | 1 +
> > fs/ext4/extents.c | 12 ++++++++++++
> > fs/ext4/inode.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
> > fs/ext4/truncate.h | 1 +
> > 4 files changed, 62 insertions(+)
> >
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 0b127853c584..34bccd64d83d 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -2460,6 +2460,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
> > extern int ext4_inode_attach_jinode(struct inode *inode);
> > extern int ext4_can_truncate(struct inode *inode);
> > extern int ext4_truncate(struct inode *);
> > +extern int ext4_break_layouts(struct inode *);
> > extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
> > extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
> > extern void ext4_set_inode_flags(struct inode *);
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 0057fe3f248d..a6aef06f455b 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -4820,6 +4820,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
> > * released from page cache.
> > */
> > down_write(&EXT4_I(inode)->i_mmap_sem);
> > +
> > + ret = ext4_break_layouts(inode);
> > + if (ret) {
> > + up_write(&EXT4_I(inode)->i_mmap_sem);
> > + goto out_mutex;
> > + }
> > +
> > ret = ext4_update_disksize_before_punch(inode, offset, len);
> > if (ret) {
> > up_write(&EXT4_I(inode)->i_mmap_sem);
> > @@ -5493,6 +5500,11 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
> > * page cache.
> > */
> > down_write(&EXT4_I(inode)->i_mmap_sem);
> > +
> > + ret = ext4_break_layouts(inode);
> > + if (ret)
> > + goto out_mmap;
> > +
> > /*
> > * Need to round down offset to be aligned with page size boundary
> > * for page size > block size.
> > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> > index 2ea07efbe016..c795e5118745 100644
> > --- a/fs/ext4/inode.c
> > +++ b/fs/ext4/inode.c
> > @@ -4193,6 +4193,41 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
> > return 0;
> > }
> >
> > +static void ext4_wait_dax_page(struct ext4_inode_info *ei, bool *did_unlock)
> > +{
> > + *did_unlock = true;
> > + up_write(&ei->i_mmap_sem);
> > + schedule();
> > + down_write(&ei->i_mmap_sem);
> > +}
> > +
> > +int ext4_break_layouts(struct inode *inode)
> > +{
> > + struct ext4_inode_info *ei = EXT4_I(inode);
> > + struct page *page;
> > + bool retry;
> > + int error;
> > +
> > + if (unlikely(!rwsem_is_locked(&ei->i_mmap_sem))) {
> > + WARN_ON_ONCE(1);
> > + return -EINVAL;
> > + }
>
> This could be shortened as:
>
> if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) {
> }
>
> couldn't it?

Yep, that's much better. Thank you for the review.

> Besides that the patch looks to me. You can add:
>
> Reviewed-by: Jan Kara <[email protected]>
>
> And I'm really wondering which protection are we still missing that you are
> still able to hit the warning with these patches applied.

I'm also very curious and am going to dig into that this week.