2021-05-12 13:47:17

by Jan Kara

[permalink] [raw]
Subject: [PATCH 10/11] ceph: Fix race between hole punch and page fault

Ceph has a following race between hole punching and page fault:

CPU1 CPU2
ceph_fallocate()
...
ceph_zero_pagecache_range()
ceph_filemap_fault()
faults in page in the range being
punched
ceph_zero_objects()

And now we have a page in punched range with invalid data. Fix the
problem by using mapping->invalidate_lock similarly to other
filesystems. Note that using invalidate_lock also fixes a similar race
wrt ->readpage().

CC: Jeff Layton <[email protected]>
CC: [email protected]
Signed-off-by: Jan Kara <[email protected]>
---
fs/ceph/addr.c | 9 ++++++---
fs/ceph/file.c | 2 ++
2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c1570fada3d8..6d868faf97b5 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1401,9 +1401,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
} else {
struct address_space *mapping = inode->i_mapping;
- struct page *page = find_or_create_page(mapping, 0,
- mapping_gfp_constraint(mapping,
- ~__GFP_FS));
+ struct page *page;
+
+ down_read(&mapping->invalidate_lock);
+ page = find_or_create_page(mapping, 0,
+ mapping_gfp_constraint(mapping, ~__GFP_FS));
if (!page) {
ret = VM_FAULT_OOM;
goto out_inline;
@@ -1424,6 +1426,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
+ up_read(&mapping->invalidate_lock);
dout("filemap_fault %p %llu read inline data ret %x\n",
inode, off, ret);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 77fc037d5beb..91693d8b458e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2083,6 +2083,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (ret < 0)
goto unlock;

+ down_write(&inode->i_mapping->invalidate_lock);
ceph_zero_pagecache_range(inode, offset, length);
ret = ceph_zero_objects(inode, offset, length);

@@ -2095,6 +2096,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (dirty)
__mark_inode_dirty(inode, dirty);
}
+ up_write(&inode->i_mapping->invalidate_lock);

ceph_put_cap_refs(ci, got);
unlock:
--
2.26.2


2021-05-12 15:50:44

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 10/11] ceph: Fix race between hole punch and page fault

On Wed, 2021-05-12 at 15:46 +0200, Jan Kara wrote:
> Ceph has a following race between hole punching and page fault:
>
> CPU1 CPU2
> ceph_fallocate()
> ...
> ceph_zero_pagecache_range()
> ceph_filemap_fault()
> faults in page in the range being
> punched
> ceph_zero_objects()
>
> And now we have a page in punched range with invalid data. Fix the
> problem by using mapping->invalidate_lock similarly to other
> filesystems. Note that using invalidate_lock also fixes a similar race
> wrt ->readpage().
>
> CC: Jeff Layton <[email protected]>
> CC: [email protected]
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ceph/addr.c | 9 ++++++---
> fs/ceph/file.c | 2 ++
> 2 files changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index c1570fada3d8..6d868faf97b5 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -1401,9 +1401,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
> ret = VM_FAULT_SIGBUS;
> } else {
> struct address_space *mapping = inode->i_mapping;
> - struct page *page = find_or_create_page(mapping, 0,
> - mapping_gfp_constraint(mapping,
> - ~__GFP_FS));
> + struct page *page;
> +
> + down_read(&mapping->invalidate_lock);
> + page = find_or_create_page(mapping, 0,
> + mapping_gfp_constraint(mapping, ~__GFP_FS));
> if (!page) {
> ret = VM_FAULT_OOM;
> goto out_inline;
> @@ -1424,6 +1426,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
> vmf->page = page;
> ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
> out_inline:
> + up_read(&mapping->invalidate_lock);
> dout("filemap_fault %p %llu read inline data ret %x\n",
> inode, off, ret);
> }
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 77fc037d5beb..91693d8b458e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2083,6 +2083,7 @@ static long ceph_fallocate(struct file *file, int mode,
> if (ret < 0)
> goto unlock;
>
> + down_write(&inode->i_mapping->invalidate_lock);
> ceph_zero_pagecache_range(inode, offset, length);
> ret = ceph_zero_objects(inode, offset, length);
>
> @@ -2095,6 +2096,7 @@ static long ceph_fallocate(struct file *file, int mode,
> if (dirty)
> __mark_inode_dirty(inode, dirty);
> }
> + up_write(&inode->i_mapping->invalidate_lock);
>
> ceph_put_cap_refs(ci, got);
> unlock:

Assuming the basic concept is sound, then this looks reasonable.

Reviewed-by: Jeff Layton <[email protected]>