2023-05-31 07:52:21

by Christoph Hellwig

[permalink] [raw]
Subject: cleanup the filemap / direct I/O interaction v3 (full series now)

[Sorry for the previous attempt that stopped at patch 8]

Hi all,

this series cleans up some of the generic write helper calling
conventions and the page cache writeback / invalidation for
direct I/O. This is a spinoff from the no-bufferhead kernel
project, for which we'll want to an use iomap based buffered
write path in the block layer.

Changes since v2:
- stick to the existing behavior of returning a short write
if the buffer fallback write or sync fails
- bring back "fuse: use direct_write_fallback" which accidentally
got lost in v2

Changes since v1:
- remove current->backing_dev_info entirely
- fix the pos/end calculation in direct_write_fallback
- rename kiocb_invalidate_post_write to
kiocb_invalidate_post_direct_write
- typo fixes

diffstat:
block/fops.c | 18 +-----
fs/btrfs/file.c | 6 --
fs/ceph/file.c | 6 --
fs/direct-io.c | 10 ---
fs/ext4/file.c | 11 +---
fs/f2fs/file.c | 3 -
fs/fuse/file.c | 4 -
fs/gfs2/file.c | 6 --
fs/iomap/buffered-io.c | 9 ++-
fs/iomap/direct-io.c | 88 ++++++++++++---------------------
fs/nfs/file.c | 6 --
fs/ntfs/file.c | 2
fs/ntfs3/file.c | 3 -
fs/xfs/xfs_file.c | 6 --
fs/zonefs/file.c | 4 -
include/linux/fs.h | 5 -
include/linux/pagemap.h | 4 +
include/linux/sched.h | 3 -
mm/filemap.c | 126 ++++++++++++++++++++++++++----------------------
19 files changed, 125 insertions(+), 195 deletions(-)


2023-05-31 07:52:22

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 01/12] backing_dev: remove current->backing_dev_info

The last user of current->backing_dev_info disappeared in commit
b9b1335e6403 ("remove bdi_congested() and wb_congested() and related
functions"). Remove the field and all assignments to it.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Reviewed-by: Darrick J. Wong <[email protected]>
---
fs/btrfs/file.c | 6 +-----
fs/ceph/file.c | 4 ----
fs/ext4/file.c | 2 --
fs/f2fs/file.c | 2 --
fs/fuse/file.c | 4 ----
fs/gfs2/file.c | 2 --
fs/nfs/file.c | 5 +----
fs/ntfs/file.c | 2 --
fs/ntfs3/file.c | 3 ---
fs/xfs/xfs_file.c | 4 ----
include/linux/sched.h | 3 ---
mm/filemap.c | 3 ---
12 files changed, 2 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f649647392e0e4..ecd43ab66fa6c7 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1145,7 +1145,6 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
return -EAGAIN;

- current->backing_dev_info = inode_to_bdi(inode);
ret = file_remove_privs(file);
if (ret)
return ret;
@@ -1165,10 +1164,8 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);

ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
- if (ret) {
- current->backing_dev_info = NULL;
+ if (ret)
return ret;
- }
}

return 0;
@@ -1689,7 +1686,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
if (sync)
atomic_dec(&inode->sync_writers);

- current->backing_dev_info = NULL;
return num_written;
}

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f4d8bf7dec88a8..c8ef72f723badd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1791,9 +1791,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
else
ceph_start_io_write(inode);

- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
if (iocb->ki_flags & IOCB_APPEND) {
err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
if (err < 0)
@@ -1940,7 +1937,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
ceph_end_io_write(inode);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
- current->backing_dev_info = NULL;
return written ? written : err;
}

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d101b3b0c7dad8..bc430270c23c19 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -285,9 +285,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
if (ret <= 0)
goto out;

- current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;

out:
inode_unlock(inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5ac53d2627d20d..4f423d367a44b9 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4517,9 +4517,7 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT)
return -EOPNOTSUPP;

- current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;

if (ret > 0) {
iocb->ki_pos += ret;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 89d97f6188e05e..97d435874b14aa 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1362,9 +1362,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
writethrough:
inode_lock(inode);

- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
err = generic_write_checks(iocb, from);
if (err <= 0)
goto out;
@@ -1409,7 +1406,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->ki_pos += written;
}
out:
- current->backing_dev_info = NULL;
inode_unlock(inode);
if (written > 0)
written = generic_write_sync(iocb, written);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 300844f50dcd28..904a0d6ac1a1a9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1041,11 +1041,9 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
goto out_unlock;
}

- current->backing_dev_info = inode_to_bdi(inode);
pagefault_disable();
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
pagefault_enable();
- current->backing_dev_info = NULL;
if (ret > 0) {
iocb->ki_pos += ret;
written += ret;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f0edf5a36237d1..665ce3fc62eaf4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -648,11 +648,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
since = filemap_sample_wb_err(file->f_mapping);
nfs_start_io_write(inode);
result = generic_write_checks(iocb, from);
- if (result > 0) {
- current->backing_dev_info = inode_to_bdi(inode);
+ if (result > 0)
result = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
- }
nfs_end_io_write(inode);
if (result <= 0)
goto out;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c481b14e4fd989..e296f804a9c442 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1911,11 +1911,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)

inode_lock(vi);
/* We can write back this queue in page reclaim. */
- current->backing_dev_info = inode_to_bdi(vi);
err = ntfs_prepare_file_for_write(iocb, from);
if (iov_iter_count(from) && !err)
written = ntfs_perform_write(file, from, iocb->ki_pos);
- current->backing_dev_info = NULL;
inode_unlock(vi);
iocb->ki_pos += written;
if (likely(written > 0))
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 9a3d55c367d920..86d16a2c8339ca 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -820,7 +820,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
if (!pages)
return -ENOMEM;

- current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err)
goto out;
@@ -993,8 +992,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
out:
kfree(pages);

- current->backing_dev_info = NULL;
-
if (err < 0)
return err;

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index aede746541f8ae..431c3fd0e2b598 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -717,9 +717,6 @@ xfs_file_buffered_write(
if (ret)
goto out;

- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
trace_xfs_file_buffered_write(iocb, from);
ret = iomap_file_buffered_write(iocb, from,
&xfs_buffered_write_iomap_ops);
@@ -753,7 +750,6 @@ xfs_file_buffered_write(
goto write_retry;
}

- current->backing_dev_info = NULL;
out:
if (iolock)
xfs_iunlock(ip, iolock);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eed5d65b8d1f4d..54780571fe9a07 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,7 +41,6 @@

/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
-struct backing_dev_info;
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
@@ -1186,8 +1185,6 @@ struct task_struct {
/* VM state: */
struct reclaim_state *reclaim_state;

- struct backing_dev_info *backing_dev_info;
-
struct io_context *io_context;

#ifdef CONFIG_COMPACTION
diff --git a/mm/filemap.c b/mm/filemap.c
index b4c9bd368b7e58..33b54660ad2b39 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3991,8 +3991,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t err;
ssize_t status;

- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err)
goto out;
@@ -4053,7 +4051,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->ki_pos += written;
}
out:
- current->backing_dev_info = NULL;
return written ? written : err;
}
EXPORT_SYMBOL(__generic_file_write_iter);
--
2.39.2


2023-05-31 07:52:26

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 02/12] iomap: update ki_pos a little later in iomap_dio_complete

Move the ki_pos update down a bit to prepare for a better common
helper that invalidates pages based of an iocb.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Reviewed-by: Darrick J. Wong <[email protected]>
---
fs/iomap/direct-io.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 019cc87d0fb339..6207a59d2162e1 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -94,7 +94,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - offset;
- iocb->ki_pos += ret;
}

/*
@@ -120,19 +119,21 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
}

inode_dio_end(file_inode(iocb->ki_filp));
- /*
- * If this is a DSYNC write, make sure we push it to stable storage now
- * that we've written data.
- */
- if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
- ret = generic_write_sync(iocb, ret);

- if (ret > 0)
- ret += dio->done_before;
+ if (ret > 0) {
+ iocb->ki_pos += ret;

+ /*
+ * If this is a DSYNC write, make sure we push it to stable
+ * storage now that we've written data.
+ */
+ if (dio->flags & IOMAP_DIO_NEED_SYNC)
+ ret = generic_write_sync(iocb, ret);
+ if (ret > 0)
+ ret += dio->done_before;
+ }
trace_iomap_dio_complete(iocb, dio->error, ret);
kfree(dio);
-
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
--
2.39.2


2023-05-31 07:52:29

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 03/12] filemap: update ki_pos in generic_perform_write

All callers of generic_perform_write need to updated ki_pos, move it into
common code.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Xiubo Li <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Acked-by: Darrick J. Wong <[email protected]>
---
fs/ceph/file.c | 2 --
fs/ext4/file.c | 9 +++------
fs/f2fs/file.c | 1 -
fs/nfs/file.c | 1 -
mm/filemap.c | 8 ++++----
5 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c8ef72f723badd..767f4dfe7def64 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1891,8 +1891,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
* can not run at the same time
*/
written = generic_perform_write(iocb, from);
- if (likely(written >= 0))
- iocb->ki_pos = pos + written;
ceph_end_io_write(inode);
}

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index bc430270c23c19..ea0ada3985cba2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -289,12 +289,9 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,

out:
inode_unlock(inode);
- if (likely(ret > 0)) {
- iocb->ki_pos += ret;
- ret = generic_write_sync(iocb, ret);
- }
-
- return ret;
+ if (unlikely(ret <= 0))
+ return ret;
+ return generic_write_sync(iocb, ret);
}

static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 4f423d367a44b9..7134fe8bd008cb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4520,7 +4520,6 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
ret = generic_perform_write(iocb, from);

if (ret > 0) {
- iocb->ki_pos += ret;
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_IO, ret);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 665ce3fc62eaf4..e8bb4c48a3210a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -655,7 +655,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
goto out;

written = result;
- iocb->ki_pos += written;
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);

if (mntflags & NFS_MOUNT_WRITE_EAGER) {
diff --git a/mm/filemap.c b/mm/filemap.c
index 33b54660ad2b39..15907af4a57ff5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3957,7 +3957,10 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));

- return written ? written : status;
+ if (!written)
+ return status;
+ iocb->ki_pos += written;
+ return written;
}
EXPORT_SYMBOL(generic_perform_write);

@@ -4034,7 +4037,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
endbyte = pos + status - 1;
err = filemap_write_and_wait_range(mapping, pos, endbyte);
if (err == 0) {
- iocb->ki_pos = endbyte + 1;
written += status;
invalidate_mapping_pages(mapping,
pos >> PAGE_SHIFT,
@@ -4047,8 +4049,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
} else {
written = generic_perform_write(iocb, from);
- if (likely(written > 0))
- iocb->ki_pos += written;
}
out:
return written ? written : err;
--
2.39.2


2023-05-31 07:52:43

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 04/12] filemap: add a kiocb_write_and_wait helper

Factor out a helper that does filemap_write_and_wait_range for the range
covered by a read kiocb, or returns -EAGAIN if the kiocb is marked as
nowait and there would be pages to write.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Acked-by: Darrick J. Wong <[email protected]>
---
block/fops.c | 18 +++---------------
include/linux/pagemap.h | 2 ++
mm/filemap.c | 30 ++++++++++++++++++------------
3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index 58d0aebc7313a8..575171049c5d83 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -576,21 +576,9 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
goto reexpand; /* skip atime */

if (iocb->ki_flags & IOCB_DIRECT) {
- struct address_space *mapping = iocb->ki_filp->f_mapping;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, pos,
- pos + count - 1)) {
- ret = -EAGAIN;
- goto reexpand;
- }
- } else {
- ret = filemap_write_and_wait_range(mapping, pos,
- pos + count - 1);
- if (ret < 0)
- goto reexpand;
- }
-
+ ret = kiocb_write_and_wait(iocb, count);
+ if (ret < 0)
+ goto reexpand;
file_accessed(iocb->ki_filp);

ret = blkdev_direct_IO(iocb, to);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a56308a9d1a450..36fc2cea13ce20 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,6 +30,7 @@ static inline void invalidate_remote_inode(struct inode *inode)
int invalidate_inode_pages2(struct address_space *mapping);
int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+
int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *);
int filemap_flush(struct address_space *);
@@ -54,6 +55,7 @@ int filemap_check_errors(struct address_space *mapping);
void __filemap_set_wb_err(struct address_space *mapping, int err);
int filemap_fdatawrite_wbc(struct address_space *mapping,
struct writeback_control *wbc);
+int kiocb_write_and_wait(struct kiocb *iocb, size_t count);

static inline int filemap_write_and_wait(struct address_space *mapping)
{
diff --git a/mm/filemap.c b/mm/filemap.c
index 15907af4a57ff5..5fcd5227f9cae2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2762,6 +2762,21 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
}
EXPORT_SYMBOL_GPL(filemap_read);

+int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ loff_t pos = iocb->ki_pos;
+ loff_t end = pos + count - 1;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (filemap_range_needs_writeback(mapping, pos, end))
+ return -EAGAIN;
+ return 0;
+ }
+
+ return filemap_write_and_wait_range(mapping, pos, end);
+}
+
/**
* generic_file_read_iter - generic filesystem read routine
* @iocb: kernel I/O control block
@@ -2797,18 +2812,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;

- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
- iocb->ki_pos + count - 1))
- return -EAGAIN;
- } else {
- retval = filemap_write_and_wait_range(mapping,
- iocb->ki_pos,
- iocb->ki_pos + count - 1);
- if (retval < 0)
- return retval;
- }
-
+ retval = kiocb_write_and_wait(iocb, count);
+ if (retval < 0)
+ return retval;
file_accessed(file);

retval = mapping->a_ops->direct_IO(iocb, iter);
--
2.39.2


2023-05-31 07:52:47

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 05/12] filemap: add a kiocb_invalidate_pages helper

Factor out a helper that calls filemap_write_and_wait_range and
invalidate_inode_pages2_range for the range covered by a write kiocb or
returns -EAGAIN if the kiocb is marked as nowait and there would be pages
to write or invalidate.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Acked-by: Darrick J. Wong <[email protected]>
---
include/linux/pagemap.h | 1 +
mm/filemap.c | 48 ++++++++++++++++++++++++-----------------
2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 36fc2cea13ce20..6e4c9ee40baa99 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,6 +30,7 @@ static inline void invalidate_remote_inode(struct inode *inode)
int invalidate_inode_pages2(struct address_space *mapping);
int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);

int write_inode_now(struct inode *, int sync);
int filemap_fdatawrite(struct address_space *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 5fcd5227f9cae2..a1cb01a4b8046a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2777,6 +2777,33 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
return filemap_write_and_wait_range(mapping, pos, end);
}

+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ loff_t pos = iocb->ki_pos;
+ loff_t end = pos + count - 1;
+ int ret;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* we could block if there are any pages in the range */
+ if (filemap_range_has_page(mapping, pos, end))
+ return -EAGAIN;
+ } else {
+ ret = filemap_write_and_wait_range(mapping, pos, end);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that we can return
+ * without clobbering -EIOCBQUEUED from ->direct_IO().
+ */
+ return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
+ end >> PAGE_SHIFT);
+}
+
/**
* generic_file_read_iter - generic filesystem read routine
* @iocb: kernel I/O control block
@@ -3820,30 +3847,11 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;

- if (iocb->ki_flags & IOCB_NOWAIT) {
- /* If there are pages to writeback, return */
- if (filemap_range_has_page(file->f_mapping, pos,
- pos + write_len - 1))
- return -EAGAIN;
- } else {
- written = filemap_write_and_wait_range(mapping, pos,
- pos + write_len - 1);
- if (written)
- goto out;
- }
-
- /*
- * After a write we want buffered reads to be sure to go to disk to get
- * the new data. We invalidate clean cached page from the region we're
- * about to write. We do this *before* the write so that we can return
- * without clobbering -EIOCBQUEUED from ->direct_IO().
- */
- written = invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT, end);
/*
* If a page can not be invalidated, return 0 to fall back
* to buffered write.
*/
+ written = kiocb_invalidate_pages(iocb, write_len);
if (written) {
if (written == -EBUSY)
return 0;
--
2.39.2


2023-05-31 07:53:09

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 07/12] iomap: update ki_pos in iomap_file_buffered_write

All callers of iomap_file_buffered_write need to updated ki_pos, move it
into common code.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Andreas Gruenbacher <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Reviewed-by: Darrick J. Wong <[email protected]>
Acked-by: Damien Le Moal <[email protected]>
---
fs/gfs2/file.c | 4 +---
fs/iomap/buffered-io.c | 9 ++++++---
fs/xfs/xfs_file.c | 2 --
fs/zonefs/file.c | 4 +---
4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 904a0d6ac1a1a9..c6a7555d5ad8bb 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1044,10 +1044,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
pagefault_disable();
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
pagefault_enable();
- if (ret > 0) {
- iocb->ki_pos += ret;
+ if (ret > 0)
written += ret;
- }

if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 063133ec77f49e..550525a525c45c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -864,16 +864,19 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
.len = iov_iter_count(i),
.flags = IOMAP_WRITE,
};
- int ret;
+ ssize_t ret;

if (iocb->ki_flags & IOCB_NOWAIT)
iter.flags |= IOMAP_NOWAIT;

while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_write_iter(&iter, i);
- if (iter.pos == iocb->ki_pos)
+
+ if (unlikely(ret < 0))
return ret;
- return iter.pos - iocb->ki_pos;
+ ret = iter.pos - iocb->ki_pos;
+ iocb->ki_pos += ret;
+ return ret;
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 431c3fd0e2b598..d57443db633637 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -720,8 +720,6 @@ xfs_file_buffered_write(
trace_xfs_file_buffered_write(iocb, from);
ret = iomap_file_buffered_write(iocb, from,
&xfs_buffered_write_iomap_ops);
- if (likely(ret >= 0))
- iocb->ki_pos += ret;

/*
* If we hit a space limit, try to free up some lingering preallocated
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 132f01d3461f14..e212d0636f848e 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -643,9 +643,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
goto inode_unlock;

ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
- if (ret > 0)
- iocb->ki_pos += ret;
- else if (ret == -EIO)
+ if (ret == -EIO)
zonefs_io_error(inode, true);

inode_unlock:
--
2.39.2


2023-05-31 07:53:15

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 08/12] iomap: use kiocb_write_and_wait and kiocb_invalidate_pages

Use the common helpers for direct I/O page invalidation instead of
open coding the logic. This leads to a slight reordering of checks
in __iomap_dio_rw to keep the logic straight.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Reviewed-by: Darrick J. Wong <[email protected]>
---
fs/iomap/direct-io.c | 55 ++++++++++++++++----------------------------
1 file changed, 20 insertions(+), 35 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 0795c54a745bca..6bd14691f96e07 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -472,7 +472,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, void *private, size_t done_before)
{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
struct iomap_iter iomi = {
.inode = inode,
@@ -481,11 +480,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
.flags = IOMAP_DIRECT,
.private = private,
};
- loff_t end = iomi.pos + iomi.len - 1, ret = 0;
bool wait_for_completion =
is_sync_kiocb(iocb) || (dio_flags & IOMAP_DIO_FORCE_WAIT);
struct blk_plug plug;
struct iomap_dio *dio;
+ loff_t ret = 0;

trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before);

@@ -509,31 +508,29 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->submit.waiter = current;
dio->submit.poll_bio = NULL;

+ if (iocb->ki_flags & IOCB_NOWAIT)
+ iomi.flags |= IOMAP_NOWAIT;
+
if (iov_iter_rw(iter) == READ) {
if (iomi.pos >= dio->i_size)
goto out_free_dio;

- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, iomi.pos,
- end)) {
- ret = -EAGAIN;
- goto out_free_dio;
- }
- iomi.flags |= IOMAP_NOWAIT;
- }
-
if (user_backed_iter(iter))
dio->flags |= IOMAP_DIO_DIRTY;
+
+ ret = kiocb_write_and_wait(iocb, iomi.len);
+ if (ret)
+ goto out_free_dio;
} else {
iomi.flags |= IOMAP_WRITE;
dio->flags |= IOMAP_DIO_WRITE;

- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_has_page(mapping, iomi.pos, end)) {
- ret = -EAGAIN;
+ if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
+ ret = -EAGAIN;
+ if (iomi.pos >= dio->i_size ||
+ iomi.pos + iomi.len > dio->i_size)
goto out_free_dio;
- }
- iomi.flags |= IOMAP_NOWAIT;
+ iomi.flags |= IOMAP_OVERWRITE_ONLY;
}

/* for data sync or sync, we need sync completion processing */
@@ -549,31 +546,19 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!(iocb->ki_flags & IOCB_SYNC))
dio->flags |= IOMAP_DIO_WRITE_FUA;
}
- }
-
- if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
- ret = -EAGAIN;
- if (iomi.pos >= dio->i_size ||
- iomi.pos + iomi.len > dio->i_size)
- goto out_free_dio;
- iomi.flags |= IOMAP_OVERWRITE_ONLY;
- }

- ret = filemap_write_and_wait_range(mapping, iomi.pos, end);
- if (ret)
- goto out_free_dio;
-
- if (iov_iter_rw(iter) == WRITE) {
/*
* Try to invalidate cache pages for the range we are writing.
* If this invalidation fails, let the caller fall back to
* buffered I/O.
*/
- if (invalidate_inode_pages2_range(mapping,
- iomi.pos >> PAGE_SHIFT, end >> PAGE_SHIFT)) {
- trace_iomap_dio_invalidate_fail(inode, iomi.pos,
- iomi.len);
- ret = -ENOTBLK;
+ ret = kiocb_invalidate_pages(iocb, iomi.len);
+ if (ret) {
+ if (ret != -EAGAIN) {
+ trace_iomap_dio_invalidate_fail(inode, iomi.pos,
+ iomi.len);
+ ret = -ENOTBLK;
+ }
goto out_free_dio;
}

--
2.39.2


2023-05-31 07:54:13

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 12/12] fuse: use direct_write_fallback

Use the generic direct_write_fallback helper instead of duplicating the
logic.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
---
fs/fuse/file.c | 24 ++----------------------
1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 025973ad813e05..7a72dc0a691201 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1340,7 +1340,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
ssize_t written = 0;
- ssize_t written_buffered = 0;
struct inode *inode = mapping->host;
ssize_t err;
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1377,30 +1376,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;

if (iocb->ki_flags & IOCB_DIRECT) {
- loff_t pos, endbyte;
-
written = generic_file_direct_write(iocb, from);
if (written < 0 || !iov_iter_count(from))
goto out;
-
- written_buffered = fuse_perform_write(iocb, from);
- if (written_buffered < 0) {
- err = written_buffered;
- goto out;
- }
- pos = iocb->ki_pos - written_buffered;
- endbyte = iocb->ki_pos - 1;
-
- err = filemap_write_and_wait_range(file->f_mapping, pos,
- endbyte);
- if (err)
- goto out;
-
- invalidate_mapping_pages(file->f_mapping,
- pos >> PAGE_SHIFT,
- endbyte >> PAGE_SHIFT);
-
- written += written_buffered;
+ written = direct_write_fallback(iocb, from, written,
+ generic_perform_write(iocb, from));
} else {
written = fuse_perform_write(iocb, from);
}
--
2.39.2


2023-05-31 07:54:23

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 11/12] fuse: drop redundant arguments to fuse_perform_write

pos is always equal to iocb->ki_pos, and mapping is always equal to
iocb->ki_filp->f_mapping.

Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Damien Le Moal <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
Acked-by: Miklos Szeredi <[email protected]>
---
fs/fuse/file.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index e60e48bf392d49..025973ad813e05 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1280,13 +1280,13 @@ static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
max_pages);
}

-static ssize_t fuse_perform_write(struct kiocb *iocb,
- struct address_space *mapping,
- struct iov_iter *ii, loff_t pos)
+static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ loff_t pos = iocb->ki_pos;
int err = 0;
ssize_t res = 0;

@@ -1383,8 +1383,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (written < 0 || !iov_iter_count(from))
goto out;

- written_buffered = fuse_perform_write(iocb, mapping, from,
- iocb->ki_pos);
+ written_buffered = fuse_perform_write(iocb, from);
if (written_buffered < 0) {
err = written_buffered;
goto out;
@@ -1403,7 +1402,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)

written += written_buffered;
} else {
- written = fuse_perform_write(iocb, mapping, from, iocb->ki_pos);
+ written = fuse_perform_write(iocb, from);
}
out:
inode_unlock(inode);
--
2.39.2


2023-05-31 09:05:45

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH 12/12] fuse: use direct_write_fallback

On Wed, 31 May 2023 at 09:51, Christoph Hellwig <[email protected]> wrote:
>
> Use the generic direct_write_fallback helper instead of duplicating the
> logic.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Damien Le Moal <[email protected]>
> ---
> fs/fuse/file.c | 24 ++----------------------
> 1 file changed, 2 insertions(+), 22 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 025973ad813e05..7a72dc0a691201 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1340,7 +1340,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
> struct file *file = iocb->ki_filp;
> struct address_space *mapping = file->f_mapping;
> ssize_t written = 0;
> - ssize_t written_buffered = 0;
> struct inode *inode = mapping->host;
> ssize_t err;
> struct fuse_conn *fc = get_fuse_conn(inode);
> @@ -1377,30 +1376,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
> goto out;
>
> if (iocb->ki_flags & IOCB_DIRECT) {
> - loff_t pos, endbyte;
> -
> written = generic_file_direct_write(iocb, from);
> if (written < 0 || !iov_iter_count(from))
> goto out;
> -
> - written_buffered = fuse_perform_write(iocb, from);
> - if (written_buffered < 0) {
> - err = written_buffered;
> - goto out;
> - }
> - pos = iocb->ki_pos - written_buffered;
> - endbyte = iocb->ki_pos - 1;
> -
> - err = filemap_write_and_wait_range(file->f_mapping, pos,
> - endbyte);
> - if (err)
> - goto out;
> -
> - invalidate_mapping_pages(file->f_mapping,
> - pos >> PAGE_SHIFT,
> - endbyte >> PAGE_SHIFT);
> -
> - written += written_buffered;
> + written = direct_write_fallback(iocb, from, written,
> + generic_perform_write(iocb, from));

This should use fuse_perform_write().

Last version of the patch was correct; copy-paste error?

Thanks,
Miklos

2023-05-31 09:06:20

by Damien Le Moal

[permalink] [raw]
Subject: Re: [PATCH 01/12] backing_dev: remove current->backing_dev_info

On 5/31/23 16:50, Christoph Hellwig wrote:
> The last user of current->backing_dev_info disappeared in commit
> b9b1335e6403 ("remove bdi_congested() and wb_congested() and related
> functions"). Remove the field and all assignments to it.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Hannes Reinecke <[email protected]>
> Reviewed-by: Darrick J. Wong <[email protected]>

Nice cleanup !

Reviewed-by: Damien Le Moal <[email protected]>

--
Damien Le Moal
Western Digital Research


2023-05-31 14:28:19

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH 01/12] backing_dev: remove current->backing_dev_info

On Wed, May 31, 2023 at 09:50:15AM +0200, Christoph Hellwig wrote:
> The last user of current->backing_dev_info disappeared in commit
> b9b1335e6403 ("remove bdi_congested() and wb_congested() and related
> functions"). Remove the field and all assignments to it.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Hannes Reinecke <[email protected]>
> Reviewed-by: Darrick J. Wong <[email protected]>
> ---

I somehow thought I'd already acked this... Anyway,
Reviewed-by: Christian Brauner <[email protected]>