2024-05-28 16:48:52

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 0/7] Start moving write_begin/write_end out of aops

Christoph wants to remove write_begin/write_end from aops and pass them
to filemap as callback functions. Here's one possible route to do this.
I combined it with the folio conversion (because why touch the same code
twice?) and tweaked some of the other things (support for ridiculously
large folios with size_t lengths, remove the need to initialise fsdata
by passing only a pointer to the fsdata pointer). And then I converted
ext4, which is probably the worst filesystem to convert because it needs
three different bwops. Most fs will only need one.

Not written yet: convert all the other fs, remove wrappers.

v2:
- Redo how we pass fsdata around so it can persist across multiple
invocations of filemap_perform_write()
- Add ext2
- Minor tweak to iomap

This is against 2bfcfd584ff5 (Linus current head) and will conflict with
other patches in flight.

Matthew Wilcox (Oracle) (7):
fs: Introduce buffered_write_operations
fs: Supply optional buffered_write_operations in buffer.c
buffer: Add buffer_write_begin, buffer_write_end and
__buffer_write_end
fs: Add filemap_symlink()
ext2: Convert to buffered_write_operations
ext4: Convert to buffered_write_operations
iomap: Return the folio from iomap_write_begin()

Documentation/filesystems/locking.rst | 23 ++++
fs/buffer.c | 158 ++++++++++++++++++--------
fs/ext2/ext2.h | 1 +
fs/ext2/file.c | 4 +-
fs/ext2/inode.c | 55 ++++-----
fs/ext2/namei.c | 2 +-
fs/ext4/ext4.h | 24 ++--
fs/ext4/file.c | 12 +-
fs/ext4/inline.c | 66 +++++------
fs/ext4/inode.c | 134 ++++++++++------------
fs/iomap/buffered-io.c | 35 +++---
fs/jfs/file.c | 3 +-
fs/namei.c | 25 ++++
fs/ramfs/file-mmu.c | 3 +-
fs/ufs/file.c | 2 +-
include/linux/buffer_head.h | 28 ++++-
include/linux/fs.h | 3 -
include/linux/pagemap.h | 23 ++++
mm/filemap.c | 77 ++++++++-----
19 files changed, 417 insertions(+), 261 deletions(-)

--
2.43.0



2024-05-28 16:49:10

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 5/7] ext2: Convert to buffered_write_operations

Move write_begin and write_end from the address_space_operations to
the new buffered_write_operations. Removes a number of hidden calls
to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/ext2/ext2.h | 1 +
fs/ext2/file.c | 4 ++--
fs/ext2/inode.c | 55 ++++++++++++++++++++++++++-----------------------
fs/ext2/namei.c | 2 +-
4 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index f38bdd46e4f7..89b498c81f18 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -784,6 +784,7 @@ extern const struct file_operations ext2_file_operations;
extern void ext2_set_file_ops(struct inode *inode);
extern const struct address_space_operations ext2_aops;
extern const struct iomap_ops ext2_iomap_ops;
+extern const struct buffered_write_operations ext2_bw_ops;

/* namei.c */
extern const struct inode_operations ext2_dir_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 10b061ac5bc0..108e8d2e9654 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -252,7 +252,7 @@ static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)

iocb->ki_flags &= ~IOCB_DIRECT;
pos = iocb->ki_pos;
- status = generic_perform_write(iocb, from);
+ status = filemap_perform_write(iocb, from, &ext2_bw_ops, NULL);
if (unlikely(status < 0)) {
ret = status;
goto out_unlock;
@@ -299,7 +299,7 @@ static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_DIRECT)
return ext2_dio_write_iter(iocb, from);

- return generic_file_write_iter(iocb, from);
+ return filemap_write_iter(iocb, from, &ext2_bw_ops, NULL);
}

static int ext2_file_open(struct inode *inode, struct file *filp)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0caa1650cee8..a89525d08aa3 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -914,30 +914,6 @@ static void ext2_readahead(struct readahead_control *rac)
mpage_readahead(rac, ext2_get_block);
}

-static int
-ext2_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
-{
- int ret;
-
- ret = block_write_begin(mapping, pos, len, pagep, ext2_get_block);
- if (ret < 0)
- ext2_write_failed(mapping, pos + len);
- return ret;
-}
-
-static int ext2_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- int ret;
-
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
- if (ret < len)
- ext2_write_failed(mapping, pos + len);
- return ret;
-}
-
static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,ext2_get_block);
@@ -962,8 +938,6 @@ const struct address_space_operations ext2_aops = {
.invalidate_folio = block_invalidate_folio,
.read_folio = ext2_read_folio,
.readahead = ext2_readahead,
- .write_begin = ext2_write_begin,
- .write_end = ext2_write_end,
.bmap = ext2_bmap,
.writepages = ext2_writepages,
.migrate_folio = buffer_migrate_folio,
@@ -976,6 +950,35 @@ static const struct address_space_operations ext2_dax_aops = {
.dirty_folio = noop_dirty_folio,
};

+static struct folio *ext2_write_begin(struct file *file,
+ struct address_space *mapping, loff_t pos, size_t len,
+ void **fsdata)
+{
+ struct folio *folio;
+
+ folio = buffer_write_begin(mapping, pos, len, ext2_get_block);
+
+ if (IS_ERR(folio))
+ ext2_write_failed(mapping, pos + len);
+ return folio;
+}
+
+static size_t ext2_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, size_t len, size_t copied, struct folio *folio,
+ void **fsdata)
+{
+ size_t ret = buffer_write_end(file, mapping, pos, len, copied, folio);
+
+ if (ret < len)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
+}
+
+const struct buffered_write_operations ext2_bw_ops = {
+ .write_begin = ext2_write_begin,
+ .write_end = ext2_write_end,
+};
+
/*
* Probably it should be a library function... search for first non-zero word
* or memcmp with zero_page, whatever is better for particular architecture.
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 8346ab9534c1..a0edb11be826 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -179,7 +179,7 @@ static int ext2_symlink (struct mnt_idmap * idmap, struct inode * dir,
inode->i_op = &ext2_symlink_inode_operations;
inode_nohighmem(inode);
inode->i_mapping->a_ops = &ext2_aops;
- err = page_symlink(inode, symname, l);
+ err = filemap_symlink(inode, symname, l, &ext2_bw_ops, NULL);
if (err)
goto out_fail;
} else {
--
2.43.0


2024-05-28 16:49:21

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 2/7] fs: Supply optional buffered_write_operations in buffer.c

generic_cont_expand_simple() and cont_expand_zero() currently call
->write_begin and ->write_end, so will also need to be converted to
use buffered_write_operations. Use macro magic to pass NULL if no
buffered_write_operations is supplied.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/buffer.c | 78 ++++++++++++++++++++++++++-----------
include/linux/buffer_head.h | 22 +++++++++--
2 files changed, 74 insertions(+), 26 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 8c19e705b9c3..58ac52f20bf6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2466,23 +2466,32 @@ EXPORT_SYMBOL(block_read_full_folio);
* truncates. Uses filesystem pagecache writes to allow the filesystem to
* deal with the hole.
*/
-int generic_cont_expand_simple(struct inode *inode, loff_t size)
+int generic_cont_expand_simple(struct inode *inode, loff_t size,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *aops = mapping->a_ops;
struct page *page;
- void *fsdata = NULL;
+ struct folio *folio;
int err;

err = inode_newsize_ok(inode, size);
if (err)
goto out;

- err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
+ if (ops) {
+ folio = ops->write_begin(NULL, mapping, size, 0, &fsdata);
+ if (IS_ERR(folio))
+ err = PTR_ERR(folio);
+ } else
+ err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
if (err)
goto out;

- err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
+ if (ops)
+ err = ops->write_end(NULL, mapping, size, 0, 0, folio, &fsdata);
+ else
+ err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
BUG_ON(err > 0);

out:
@@ -2491,13 +2500,14 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
EXPORT_SYMBOL(generic_cont_expand_simple);

static int cont_expand_zero(struct file *file, struct address_space *mapping,
- loff_t pos, loff_t *bytes)
+ loff_t pos, loff_t *bytes,
+ const struct buffered_write_operations *ops, void **fsdata)
{
struct inode *inode = mapping->host;
const struct address_space_operations *aops = mapping->a_ops;
unsigned int blocksize = i_blocksize(inode);
struct page *page;
- void *fsdata = NULL;
+ struct folio *folio;
pgoff_t index, curidx;
loff_t curpos;
unsigned zerofrom, offset, len;
@@ -2514,13 +2524,25 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = PAGE_SIZE - zerofrom;

- err = aops->write_begin(file, mapping, curpos, len,
- &page, &fsdata);
- if (err)
- goto out;
+ if (ops) {
+ folio = ops->write_begin(file, mapping, curpos, len,
+ fsdata);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ page = &folio->page;
+ } else {
+ err = aops->write_begin(file, mapping, curpos, len,
+ &page, fsdata);
+ if (err)
+ goto out;
+ }
zero_user(page, zerofrom, len);
- err = aops->write_end(file, mapping, curpos, len, len,
- page, fsdata);
+ if (ops)
+ err = ops->write_end(file, mapping, curpos, len, len,
+ folio, fsdata);
+ else
+ err = aops->write_end(file, mapping, curpos, len, len,
+ page, *fsdata);
if (err < 0)
goto out;
BUG_ON(err != len);
@@ -2547,13 +2569,25 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = offset - zerofrom;

- err = aops->write_begin(file, mapping, curpos, len,
- &page, &fsdata);
- if (err)
- goto out;
+ if (ops) {
+ folio = ops->write_begin(file, mapping, curpos, len,
+ fsdata);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ page = &folio->page;
+ } else {
+ err = aops->write_begin(file, mapping, curpos, len,
+ &page, fsdata);
+ if (err)
+ goto out;
+ }
zero_user(page, zerofrom, len);
- err = aops->write_end(file, mapping, curpos, len, len,
- page, fsdata);
+ if (ops)
+ err = ops->write_end(file, mapping, curpos, len, len,
+ folio, fsdata);
+ else
+ err = aops->write_end(file, mapping, curpos, len, len,
+ page, *fsdata);
if (err < 0)
goto out;
BUG_ON(err != len);
@@ -2568,16 +2602,16 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
* We may have to extend the file.
*/
int cont_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata,
- get_block_t *get_block, loff_t *bytes)
+ loff_t pos, unsigned len, struct page **pagep, void **fsdata,
+ get_block_t *get_block, loff_t *bytes,
+ const struct buffered_write_operations *ops)
{
struct inode *inode = mapping->host;
unsigned int blocksize = i_blocksize(inode);
unsigned int zerofrom;
int err;

- err = cont_expand_zero(file, mapping, pos, bytes);
+ err = cont_expand_zero(file, mapping, pos, bytes, ops, fsdata);
if (err)
return err;

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e022e40b099e..1b7f14e39ab8 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -268,16 +268,30 @@ int generic_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
-int cont_write_begin(struct file *, struct address_space *, loff_t,
- unsigned, struct page **, void **,
- get_block_t *, loff_t *);
-int generic_cont_expand_simple(struct inode *inode, loff_t size);
+int cont_write_begin(struct file *, struct address_space *, loff_t pos,
+ unsigned len, struct page **, void **fsdata, get_block_t *,
+ loff_t *bytes, const struct buffered_write_operations *);
+int generic_cont_expand_simple(struct inode *inode, loff_t size,
+ const struct buffered_write_operations *ops, void *fsdata);
void block_commit_write(struct page *page, unsigned int from, unsigned int to);
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block);
sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);

+#define _cont_write_begin(file, mapping, pos, len, pagep, fsdata, \
+ getblk, bytes, ops, extra...) \
+ cont_write_begin(file, mapping, pos, len, pagep, fsdata, \
+ getblk, bytes, ops)
+#define cont_write_begin(file, mapping, pos, len, pagep, fsdata, \
+ getblk, bytes, ops...) \
+ _cont_write_begin(file, mapping, pos, len, pagep, fsdata, \
+ getblk, bytes, ## ops, NULL)
+#define _generic_cont_expand_simple(inode, size, ops, fsdata, extra...) \
+ generic_cont_expand_simple(inode, size, ops, fsdata)
+#define generic_cont_expand_simple(inode, size, ops_data...) \
+ _generic_cont_expand_simple(inode, size, ## ops_data, NULL, NULL)
+
#ifdef CONFIG_MIGRATION
extern int buffer_migrate_folio(struct address_space *,
struct folio *dst, struct folio *src, enum migrate_mode);
--
2.43.0


2024-05-28 16:49:41

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 1/7] fs: Introduce buffered_write_operations

Start the process of moving write_begin and write_end out from the
address_space_operations to their own struct.

The new write_begin returns the folio or an ERR_PTR instead of returning
the folio by reference. It also accepts len as a size_t and (as
documented) the len may be larger than PAGE_SIZE.

Pass an optional buffered_write_operations pointer to various functions
in filemap.c. The old names are available as macros for now, except
for generic_file_write_iter() which is used as a function pointer by
many filesystems. If using the new functions, the filesystem can have
per-operation fsdata instead of per-page fsdata.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
Documentation/filesystems/locking.rst | 23 ++++++++
fs/jfs/file.c | 3 +-
fs/ramfs/file-mmu.c | 3 +-
fs/ufs/file.c | 2 +-
include/linux/fs.h | 3 --
include/linux/pagemap.h | 21 ++++++++
mm/filemap.c | 77 +++++++++++++++++----------
7 files changed, 97 insertions(+), 35 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index e664061ed55d..e62a8a83ea9e 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -413,6 +413,29 @@ path after ->swap_activate() returned success.

->swap_rw will be called for swap IO if SWP_FS_OPS was set by ->swap_activate().

+buffered_write_operations
+=========================
+
+ struct folio *(*write_begin)(struct file *, struct address_space *,
+ loff_t pos, size_t len, void **fsdata);
+ size_t (*write_end)(struct file *, struct address_space *,
+ loff_t pos, size_t len, size_t copied,
+ struct folio *folio, void **fsdata);
+
+For write_begin, 'len' is typically the remaining length of the write,
+and is not capped to PAGE_SIZE. For write_end, len will be limited so
+that pos + len <= folio_pos(folio) + folio_size(folio). copied will
+not exceed len. pos + len will not exceed MAX_LFS_FILESIZE.
+
+write_begin may return an ERR_PTR. write_end may return a number less
+than copied if it needs the caller to retry from an earlier position in
+the file. It cannot signal an error; that must be done by write_begin().
+
+write_begin should lock the folio and increase its refcount. write_end
+should unlock it and drop the refcount, possibly after setting it
+uptodate (it can only use folio_end_read() for this if it knows the folio
+was not previously uptodate; probably best not to use it).
+
file_lock_operations
====================

diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 01b6912e60f8..9c62445ea6be 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -4,8 +4,7 @@
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*/

-#include <linux/mm.h>
-#include <linux/fs.h>
+#include <linux/pagemap.h>
#include <linux/posix_acl.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b45c7edc3225..5de2a232d07f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -24,8 +24,7 @@
* caches is sufficient.
*/

-#include <linux/fs.h>
-#include <linux/mm.h>
+#include <linux/pagemap.h>
#include <linux/ramfs.h>
#include <linux/sched.h>

diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 6558882a89ef..b557d4a14143 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,7 +24,7 @@
* ext2 fs regular file handling primitives
*/

-#include <linux/fs.h>
+#include <linux/pagemap.h>

#include "ufs_fs.h"
#include "ufs.h"
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0283cf366c2a..4c6c2042cbeb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3115,10 +3115,7 @@ extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
ssize_t already_read);
extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
-ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
ssize_t direct_written, ssize_t buffered_written);

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ee633712bba0..2921c1cc6335 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -18,6 +18,27 @@

struct folio_batch;

+struct buffered_write_operations {
+ struct folio *(*write_begin)(struct file *, struct address_space *,
+ loff_t pos, size_t len, void **fsdata);
+ size_t (*write_end)(struct file *, struct address_space *,
+ loff_t pos, size_t len, size_t copied,
+ struct folio *folio, void **fsdata);
+};
+
+ssize_t filemap_write_iter(struct kiocb *, struct iov_iter *,
+ const struct buffered_write_operations *, void *fsdata);
+ssize_t __filemap_write_iter(struct kiocb *, struct iov_iter *,
+ const struct buffered_write_operations *, void *fsdata);
+ssize_t filemap_perform_write(struct kiocb *, struct iov_iter *,
+ const struct buffered_write_operations *, void *fsdata);
+
+ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+#define generic_perform_write(kiocb, iter) \
+ filemap_perform_write(kiocb, iter, NULL, NULL)
+#define __generic_file_write_iter(kiocb, iter) \
+ __filemap_write_iter(kiocb, iter, NULL, NULL)
+
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);

diff --git a/mm/filemap.c b/mm/filemap.c
index 382c3d06bfb1..162ac110c423 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -95,7 +95,7 @@
* ->invalidate_lock (filemap_fault)
* ->lock_page (filemap_fault, access_process_vm)
*
- * ->i_rwsem (generic_perform_write)
+ * ->i_rwsem (filemap_perform_write)
* ->mmap_lock (fault_in_readable->do_page_fault)
*
* bdi->wb.list_lock
@@ -3975,7 +3975,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
}
EXPORT_SYMBOL(generic_file_direct_write);

-ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
+ssize_t filemap_perform_write(struct kiocb *iocb, struct iov_iter *i,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct file *file = iocb->ki_filp;
loff_t pos = iocb->ki_pos;
@@ -3985,11 +3986,10 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
ssize_t written = 0;

do {
- struct page *page;
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
+ struct folio *folio;
+ size_t offset; /* Offset into pagecache folio */
+ size_t bytes; /* Bytes to write to page */
size_t copied; /* Bytes copied from user */
- void *fsdata = NULL;

offset = (pos & (PAGE_SIZE - 1));
bytes = min_t(unsigned long, PAGE_SIZE - offset,
@@ -4012,19 +4012,33 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
break;
}

- status = a_ops->write_begin(file, mapping, pos, bytes,
+ if (ops) {
+ folio = ops->write_begin(file, mapping, pos, bytes, &fsdata);
+ if (IS_ERR(folio)) {
+ status = PTR_ERR(folio);
+ break;
+ }
+ } else {
+ struct page *page;
+ status = a_ops->write_begin(file, mapping, pos, bytes,
&page, &fsdata);
- if (unlikely(status < 0))
- break;
+ if (unlikely(status < 0))
+ break;
+ folio = page_folio(page);
+ }

if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ flush_dcache_folio(folio);

- copied = copy_page_from_iter_atomic(page, offset, bytes, i);
- flush_dcache_page(page);
+ copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
+ flush_dcache_folio(folio);

- status = a_ops->write_end(file, mapping, pos, bytes, copied,
- page, fsdata);
+ if (ops)
+ status = ops->write_end(file, mapping, pos, bytes,
+ copied, folio, &fsdata);
+ else
+ status = a_ops->write_end(file, mapping, pos, bytes,
+ copied, &folio->page, fsdata);
if (unlikely(status != copied)) {
iov_iter_revert(i, copied - max(status, 0L));
if (unlikely(status < 0))
@@ -4054,12 +4068,13 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i)
iocb->ki_pos += written;
return written;
}
-EXPORT_SYMBOL(generic_perform_write);
+EXPORT_SYMBOL(filemap_perform_write);

/**
- * __generic_file_write_iter - write data to a file
- * @iocb: IO state structure (file, offset, etc.)
- * @from: iov_iter with data to write
+ * __filemap_write_iter - write data to a file
+ * @iocb: IO state structure (file, offset, etc.)
+ * @from: iov_iter with data to write
+ * @ops: How to inform the filesystem that a write is starting/finishing.
*
* This function does all the work needed for actually writing data to a
* file. It does all basic checks, removes SUID from the file, updates
@@ -4077,7 +4092,8 @@ EXPORT_SYMBOL(generic_perform_write);
* * number of bytes written, even for truncated writes
* * negative error code if no data has been written at all
*/
-ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t __filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -4104,27 +4120,29 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
return ret;
return direct_write_fallback(iocb, from, ret,
- generic_perform_write(iocb, from));
+ filemap_perform_write(iocb, from, ops, fsdata));
}

- return generic_perform_write(iocb, from);
+ return filemap_perform_write(iocb, from, ops, fsdata);
}
-EXPORT_SYMBOL(__generic_file_write_iter);
+EXPORT_SYMBOL(__filemap_write_iter);

/**
- * generic_file_write_iter - write data to a file
+ * filemap_write_iter - write data to a file
* @iocb: IO state structure
* @from: iov_iter with data to write
*
- * This is a wrapper around __generic_file_write_iter() to be used by most
+ * This is a wrapper around __filemap_write_iter() to be used by most
* filesystems. It takes care of syncing the file in case of O_SYNC file
* and acquires i_rwsem as needed.
+ *
* Return:
- * * negative error code if no data has been written at all of
+ * * negative error code if no data has been written at all or if
* vfs_fsync_range() failed for a synchronous write
* * number of bytes written, even for truncated writes
*/
-ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ const struct buffered_write_operations *ops, void *fsdata)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -4133,13 +4151,18 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0)
- ret = __generic_file_write_iter(iocb, from);
+ ret = __filemap_write_iter(iocb, from, ops, fsdata);
inode_unlock(inode);

if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
}
+
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ return filemap_write_iter(iocb, from, NULL, NULL);
+}
EXPORT_SYMBOL(generic_file_write_iter);

/**
--
2.43.0


2024-05-28 18:02:17

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 7/7] iomap: Return the folio from iomap_write_begin()

Use an ERR_PTR to return any error that may have occurred, otherwise
return the folio directly instead of returning it by reference. This
mirrors changes which are going into the filemap ->write_begin callbacks.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/iomap/buffered-io.c | 35 ++++++++++++++++-------------------
1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index c5802a459334..f0c40ac425ce 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -764,27 +764,27 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
return iomap_read_inline_data(iter, folio);
}

-static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
- size_t len, struct folio **foliop)
+static struct folio *iomap_write_begin(struct iomap_iter *iter, loff_t pos,
+ size_t len)
{
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct folio *folio;
- int status = 0;
+ int status;

BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);

if (fatal_signal_pending(current))
- return -EINTR;
+ return ERR_PTR(-EINTR);

if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

folio = __iomap_get_folio(iter, pos, len);
if (IS_ERR(folio))
- return PTR_ERR(folio);
+ return folio;

/*
* Now we have a locked folio, before we do anything with it we need to
@@ -801,7 +801,6 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
&iter->iomap);
if (!iomap_valid) {
iter->iomap.flags |= IOMAP_F_STALE;
- status = 0;
goto out_unlock;
}
}
@@ -819,13 +818,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
if (unlikely(status))
goto out_unlock;

- *foliop = folio;
- return 0;
+ return folio;

out_unlock:
__iomap_put_folio(iter, pos, 0, folio);

- return status;
+ return ERR_PTR(status);
}

static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
@@ -940,9 +938,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
break;
}

- status = iomap_write_begin(iter, pos, bytes, &folio);
- if (unlikely(status)) {
+ folio = iomap_write_begin(iter, pos, bytes);
+ if (IS_ERR(folio)) {
iomap_write_failed(iter->inode, pos, bytes);
+ status = PTR_ERR(folio);
break;
}
if (iter->iomap.flags & IOMAP_F_STALE)
@@ -1330,14 +1329,13 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)

do {
struct folio *folio;
- int status;
size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length);
bool ret;

- status = iomap_write_begin(iter, pos, bytes, &folio);
- if (unlikely(status))
- return status;
+ folio = iomap_write_begin(iter, pos, bytes);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
if (iomap->flags & IOMAP_F_STALE)
break;

@@ -1393,14 +1391,13 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)

do {
struct folio *folio;
- int status;
size_t offset;
size_t bytes = min_t(u64, SIZE_MAX, length);
bool ret;

- status = iomap_write_begin(iter, pos, bytes, &folio);
- if (status)
- return status;
+ folio = iomap_write_begin(iter, pos, bytes);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
if (iter->iomap.flags & IOMAP_F_STALE)
break;

--
2.43.0


2024-05-28 18:07:35

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 6/7] ext4: Convert to buffered_write_operations

Pass the appropriate buffered_write_operations to filemap_perform_write().
Saves a lot of page<->folio conversions.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/buffer.c | 2 +-
fs/ext4/ext4.h | 24 ++++-----
fs/ext4/file.c | 12 ++++-
fs/ext4/inline.c | 66 ++++++++++-------------
fs/ext4/inode.c | 134 ++++++++++++++++++++---------------------------
5 files changed, 108 insertions(+), 130 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 4064b21fe499..98f116e8abde 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2299,7 +2299,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- return buffer_write_end(file, mapping, pos, len, copied,
+ return __buffer_write_end(file, mapping, pos, len, copied,
page_folio(page));
}
EXPORT_SYMBOL(block_write_end);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 983dad8c07ec..b6f7509e3f55 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2971,8 +2971,6 @@ int ext4_walk_page_buffers(handle_t *handle,
struct buffer_head *bh));
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
struct buffer_head *bh);
-#define FALL_BACK_TO_NONDELALLOC 1
-#define CONVERT_INLINE_DATA 2

typedef enum {
EXT4_IGET_NORMAL = 0,
@@ -3011,6 +3009,7 @@ extern int ext4_break_layouts(struct inode *);
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
+int ext4_nonda_switch(struct super_block *sb);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
@@ -3026,6 +3025,10 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);

+extern const struct buffered_write_operations ext4_bw_ops;
+extern const struct buffered_write_operations ext4_journalled_bw_ops;
+extern const struct buffered_write_operations ext4_da_bw_ops;
+
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
@@ -3551,17 +3554,12 @@ extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);

int ext4_readpage_inline(struct inode *inode, struct folio *folio);
-extern int ext4_try_to_write_inline_data(struct address_space *mapping,
- struct inode *inode,
- loff_t pos, unsigned len,
- struct page **pagep);
-int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
- unsigned copied, struct folio *folio);
-extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
- struct inode *inode,
- loff_t pos, unsigned len,
- struct page **pagep,
- void **fsdata);
+struct folio *ext4_try_to_write_inline_data(struct address_space *mapping,
+ struct inode *inode, loff_t pos, size_t len);
+size_t ext4_write_inline_data_end(struct inode *inode, loff_t pos, size_t len,
+ size_t copied, struct folio *folio);
+struct folio *ext4_da_write_inline_data_begin(struct address_space *mapping,
+ struct inode *inode, loff_t pos, size_t len);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
struct inode *dir, struct inode *inode);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index c89e434db6b7..08c2772966a9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -287,16 +287,26 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
{
ssize_t ret;
struct inode *inode = file_inode(iocb->ki_filp);
+ const struct buffered_write_operations *ops;

if (iocb->ki_flags & IOCB_NOWAIT)
return -EOPNOTSUPP;

+ if (ext4_should_journal_data(inode))
+ ops = &ext4_journalled_bw_ops;
+ else if (test_opt(inode->i_sb, DELALLOC) &&
+ !ext4_nonda_switch(inode->i_sb) &&
+ !ext4_verity_in_progress(inode))
+ ops = &ext4_da_bw_ops;
+ else
+ ops = &ext4_bw_ops;
+
inode_lock(inode);
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;

- ret = generic_perform_write(iocb, from);
+ ret = filemap_perform_write(iocb, from, ops, NULL);

out:
inode_unlock(inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d5bd1e3a5d36..cb5cb2cc9c2b 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -538,8 +538,9 @@ int ext4_readpage_inline(struct inode *inode, struct folio *folio)
return ret >= 0 ? 0 : ret;
}

-static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
- struct inode *inode)
+/* Returns NULL on success, ERR_PTR on failure */
+static void *ext4_convert_inline_data_to_extent(struct address_space *mapping,
+ struct inode *inode)
{
int ret, needed_blocks, no_expand;
handle_t *handle = NULL;
@@ -554,14 +555,14 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
* will trap here again.
*/
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
- return 0;
+ return NULL;
}

needed_blocks = ext4_writepage_trans_blocks(inode);

ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
- return ret;
+ return ERR_PTR(ret);

retry:
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
@@ -648,7 +649,7 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
if (handle)
ext4_journal_stop(handle);
brelse(iloc.bh);
- return ret;
+ return ERR_PTR(ret);
}

/*
@@ -657,10 +658,8 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
* in the inode also. If not, create the page the handle, move the data
* to the page make it update and let the later codes create extent for it.
*/
-int ext4_try_to_write_inline_data(struct address_space *mapping,
- struct inode *inode,
- loff_t pos, unsigned len,
- struct page **pagep)
+struct folio *ext4_try_to_write_inline_data(struct address_space *mapping,
+ struct inode *inode, loff_t pos, size_t len)
{
int ret;
handle_t *handle;
@@ -672,7 +671,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,

ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
- return ret;
+ return ERR_PTR(ret);

/*
* The possible write could happen in the inode,
@@ -680,7 +679,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
*/
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
+ folio = ERR_CAST(handle);
handle = NULL;
goto out;
}
@@ -703,17 +702,14 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,

folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
mapping_gfp_mask(mapping));
- if (IS_ERR(folio)) {
- ret = PTR_ERR(folio);
+ if (IS_ERR(folio))
goto out;
- }

- *pagep = &folio->page;
down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode)) {
- ret = 0;
folio_unlock(folio);
folio_put(folio);
+ folio = NULL;
goto out_up_read;
}

@@ -726,21 +722,22 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
}
}

- ret = 1;
handle = NULL;
out_up_read:
up_read(&EXT4_I(inode)->xattr_sem);
out:
- if (handle && (ret != 1))
+ if (ret < 0)
+ folio = ERR_PTR(ret);
+ if (handle && IS_ERR_OR_NULL(folio))
ext4_journal_stop(handle);
brelse(iloc.bh);
- return ret;
+ return folio;
convert:
return ext4_convert_inline_data_to_extent(mapping, inode);
}

-int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
- unsigned copied, struct folio *folio)
+size_t ext4_write_inline_data_end(struct inode *inode, loff_t pos, size_t len,
+ size_t copied, struct folio *folio)
{
handle_t *handle = ext4_journal_current_handle();
int no_expand;
@@ -831,8 +828,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
* need to start the journal since the file's metadata isn't changed now.
*/
static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
- struct inode *inode,
- void **fsdata)
+ struct inode *inode)
{
int ret = 0, inline_size;
struct folio *folio;
@@ -869,7 +865,6 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
folio_mark_dirty(folio);
folio_mark_uptodate(folio);
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
- *fsdata = (void *)CONVERT_INLINE_DATA;

out:
up_read(&EXT4_I(inode)->xattr_sem);
@@ -888,11 +883,8 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
* handle in writepages(the i_disksize update is left to the
* normal ext4_da_write_end).
*/
-int ext4_da_write_inline_data_begin(struct address_space *mapping,
- struct inode *inode,
- loff_t pos, unsigned len,
- struct page **pagep,
- void **fsdata)
+struct folio *ext4_da_write_inline_data_begin(struct address_space *mapping,
+ struct inode *inode, loff_t pos, size_t len)
{
int ret;
handle_t *handle;
@@ -902,7 +894,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,

ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
- return ret;
+ return ERR_PTR(ret);

retry_journal:
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
@@ -918,8 +910,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
if (ret == -ENOSPC) {
ext4_journal_stop(handle);
ret = ext4_da_convert_inline_data_to_extent(mapping,
- inode,
- fsdata);
+ inode);
if (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_journal;
@@ -932,10 +923,8 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
*/
folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
mapping_gfp_mask(mapping));
- if (IS_ERR(folio)) {
- ret = PTR_ERR(folio);
+ if (IS_ERR(folio))
goto out_journal;
- }

down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode)) {
@@ -954,18 +943,17 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
goto out_release_page;

up_read(&EXT4_I(inode)->xattr_sem);
- *pagep = &folio->page;
- brelse(iloc.bh);
- return 1;
+ goto out;
out_release_page:
up_read(&EXT4_I(inode)->xattr_sem);
folio_unlock(folio);
folio_put(folio);
+ folio = ERR_PTR(ret);
out_journal:
ext4_journal_stop(handle);
out:
brelse(iloc.bh);
- return ret;
+ return folio;
}

#ifdef INLINE_DIR_DEBUG
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4bae9ccf5fe0..e9526e55e86c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1014,7 +1014,7 @@ int do_journal_get_write_access(handle_t *handle, struct inode *inode,

#ifdef CONFIG_FS_ENCRYPTION
static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
- get_block_t *get_block)
+ get_block_t *get_block, void **fsdata)
{
unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
@@ -1114,9 +1114,9 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
* and the ext4_write_end(). So doing the jbd2_journal_start at the start of
* ext4_write_begin() is the right place.
*/
-static int ext4_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+static struct folio *ext4_write_begin(struct file *file,
+ struct address_space *mapping, loff_t pos, size_t len,
+ void **fsdata)
{
struct inode *inode = mapping->host;
int ret, needed_blocks;
@@ -1127,7 +1127,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
unsigned from, to;

if (unlikely(ext4_forced_shutdown(inode->i_sb)))
- return -EIO;
+ return ERR_PTR(-EIO);

trace_ext4_write_begin(inode, pos, len);
/*
@@ -1140,12 +1140,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
to = from + len;

if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
- ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
- pagep);
- if (ret < 0)
- return ret;
- if (ret == 1)
- return 0;
+ folio = ext4_try_to_write_inline_data(mapping, inode, pos, len);
+ if (folio)
+ return folio;
}

/*
@@ -1159,7 +1156,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
- return PTR_ERR(folio);
+ return folio;
/*
* The same as page allocation, we prealloc buffer heads before
* starting the handle.
@@ -1173,7 +1170,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
if (IS_ERR(handle)) {
folio_put(folio);
- return PTR_ERR(handle);
+ return ERR_CAST(handle);
}

folio_lock(folio);
@@ -1190,9 +1187,10 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
#ifdef CONFIG_FS_ENCRYPTION
if (ext4_should_dioread_nolock(inode))
ret = ext4_block_write_begin(folio, pos, len,
- ext4_get_block_unwritten);
+ ext4_get_block_unwritten, fsdata);
else
- ret = ext4_block_write_begin(folio, pos, len, ext4_get_block);
+ ret = ext4_block_write_begin(folio, pos, len, ext4_get_block,
+ fsdata);
#else
if (ext4_should_dioread_nolock(inode))
ret = __block_write_begin(&folio->page, pos, len,
@@ -1239,10 +1237,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_journal;
folio_put(folio);
- return ret;
+ return ERR_PTR(ret);
}
- *pagep = &folio->page;
- return ret;
+ return folio;
}

/* For write_end() in data=journal mode */
@@ -1266,12 +1263,10 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
* ext4 never places buffers on inode->i_mapping->i_private_list. metadata
* buffers are managed internally.
*/
-static int ext4_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+static size_t ext4_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, size_t len, size_t copied, struct folio *folio,
+ void **fsdata)
{
- struct folio *folio = page_folio(page);
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
@@ -1286,7 +1281,7 @@ static int ext4_write_end(struct file *file,
return ext4_write_inline_data_end(inode, pos, len, copied,
folio);

- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ copied = __buffer_write_end(file, mapping, pos, len, copied, folio);
/*
* it's important to update i_size while still holding folio lock:
* page writeout could otherwise come in and zero beyond i_size.
@@ -1370,12 +1365,10 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
} while (bh != head);
}

-static int ext4_journalled_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+static size_t ext4_journalled_write_end(struct file *file,
+ struct address_space *mapping, loff_t pos, size_t len,
+ size_t copied, struct folio *folio, void **fsdata)
{
- struct folio *folio = page_folio(page);
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
@@ -2816,7 +2809,7 @@ static int ext4_dax_writepages(struct address_space *mapping,
return ret;
}

-static int ext4_nonda_switch(struct super_block *sb)
+int ext4_nonda_switch(struct super_block *sb)
{
s64 free_clusters, dirty_clusters;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2850,45 +2843,35 @@ static int ext4_nonda_switch(struct super_block *sb)
return 0;
}

-static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+static struct folio *ext4_da_write_begin(struct file *file,
+ struct address_space *mapping, loff_t pos, size_t len,
+ void **fsdata)
{
int ret, retries = 0;
struct folio *folio;
- pgoff_t index;
struct inode *inode = mapping->host;

if (unlikely(ext4_forced_shutdown(inode->i_sb)))
- return -EIO;
+ return ERR_PTR(-EIO);

- index = pos >> PAGE_SHIFT;
-
- if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) {
- *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
- return ext4_write_begin(file, mapping, pos,
- len, pagep, fsdata);
- }
- *fsdata = (void *)0;
trace_ext4_da_write_begin(inode, pos, len);

if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
- ret = ext4_da_write_inline_data_begin(mapping, inode, pos, len,
- pagep, fsdata);
- if (ret < 0)
- return ret;
- if (ret == 1)
- return 0;
+ folio = ext4_da_write_inline_data_begin(mapping, inode, pos,
+ len);
+ if (folio)
+ return folio;
}

retry:
- folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
- return PTR_ERR(folio);
+ return folio;

#ifdef CONFIG_FS_ENCRYPTION
- ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
+ ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep,
+ fsdata);
#else
ret = __block_write_begin(&folio->page, pos, len, ext4_da_get_block_prep);
#endif
@@ -2906,11 +2889,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
if (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
- return ret;
+ return ERR_PTR(ret);
}

- *pagep = &folio->page;
- return ret;
+ return folio;
}

/*
@@ -2936,9 +2918,8 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
return 1;
}

-static int ext4_da_do_write_end(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct folio *folio)
+static size_t ext4_da_do_write_end(struct address_space *mapping, loff_t pos,
+ size_t len, size_t copied, struct folio *folio)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
@@ -2998,23 +2979,15 @@ static int ext4_da_do_write_end(struct address_space *mapping,
return copied;
}

-static int ext4_da_write_end(struct file *file,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+static size_t ext4_da_write_end(struct file *file,
+ struct address_space *mapping, loff_t pos, size_t len,
+ size_t copied, struct folio *folio, void **fsdata)
{
struct inode *inode = mapping->host;
- int write_mode = (int)(unsigned long)fsdata;
- struct folio *folio = page_folio(page);
-
- if (write_mode == FALL_BACK_TO_NONDELALLOC)
- return ext4_write_end(file, mapping, pos,
- len, copied, &folio->page, fsdata);

trace_ext4_da_write_end(inode, pos, len, copied);

- if (write_mode != CONVERT_INLINE_DATA &&
- ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
+ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
ext4_has_inline_data(inode))
return ext4_write_inline_data_end(inode, pos, len, copied,
folio);
@@ -3521,8 +3494,6 @@ static const struct address_space_operations ext4_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
.writepages = ext4_writepages,
- .write_begin = ext4_write_begin,
- .write_end = ext4_write_end,
.dirty_folio = ext4_dirty_folio,
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
@@ -3537,8 +3508,6 @@ static const struct address_space_operations ext4_journalled_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
.writepages = ext4_writepages,
- .write_begin = ext4_write_begin,
- .write_end = ext4_journalled_write_end,
.dirty_folio = ext4_journalled_dirty_folio,
.bmap = ext4_bmap,
.invalidate_folio = ext4_journalled_invalidate_folio,
@@ -3553,8 +3522,6 @@ static const struct address_space_operations ext4_da_aops = {
.read_folio = ext4_read_folio,
.readahead = ext4_readahead,
.writepages = ext4_writepages,
- .write_begin = ext4_da_write_begin,
- .write_end = ext4_da_write_end,
.dirty_folio = ext4_dirty_folio,
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
@@ -3572,6 +3539,21 @@ static const struct address_space_operations ext4_dax_aops = {
.swap_activate = ext4_iomap_swap_activate,
};

+const struct buffered_write_operations ext4_bw_ops = {
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_write_end,
+};
+
+const struct buffered_write_operations ext4_journalled_bw_ops = {
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_journalled_write_end,
+};
+
+const struct buffered_write_operations ext4_da_bw_ops = {
+ .write_begin = ext4_da_write_begin,
+ .write_end = ext4_da_write_end,
+};
+
void ext4_set_aops(struct inode *inode)
{
switch (ext4_inode_journal_mode(inode)) {
--
2.43.0


2024-05-28 18:16:05

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 3/7] buffer: Add buffer_write_begin, buffer_write_end and __buffer_write_end

These functions are to be called from filesystem implementations of
write_begin and write_end. They correspond to block_write_begin,
generic_write_end and block_write_end. The old functions need to
be kept around as they're used as function pointers.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/buffer.c | 80 ++++++++++++++++++++++++++-----------
include/linux/buffer_head.h | 6 +++
2 files changed, 63 insertions(+), 23 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 58ac52f20bf6..4064b21fe499 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2217,39 +2217,55 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to)
}

/*
- * block_write_begin takes care of the basic task of block allocation and
+ * buffer_write_begin - Helper for filesystem write_begin implementations
+ * @mapping: Address space being written to.
+ * @pos: Position in bytes within the file.
+ * @len: Number of bytes being written.
+ * @get_block: How to get buffer_heads for this filesystem.
+ *
+ * Take care of the basic task of block allocation and
* bringing partial write blocks uptodate first.
*
* The filesystem needs to handle block truncation upon failure.
+ *
+ * Return: The folio to write to, or an ERR_PTR on failure.
*/
-int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
- struct page **pagep, get_block_t *get_block)
+struct folio *buffer_write_begin(struct address_space *mapping, loff_t pos,
+ size_t len, get_block_t *get_block)
{
- pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio = __filemap_get_folio(mapping, pos / PAGE_SIZE,
+ FGP_WRITEBEGIN, mapping_gfp_mask(mapping));
int status;

- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
+ if (IS_ERR(folio))
+ return folio;

- status = __block_write_begin(page, pos, len, get_block);
+ status = __block_write_begin_int(folio, pos, len, get_block, NULL);
if (unlikely(status)) {
- unlock_page(page);
- put_page(page);
- page = NULL;
+ folio_unlock(folio);
+ folio_put(folio);
+ folio = ERR_PTR(status);
}

- *pagep = page;
- return status;
+ return folio;
+}
+EXPORT_SYMBOL(buffer_write_begin);
+
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+ struct page **pagep, get_block_t *get_block)
+{
+ struct folio *folio = buffer_write_begin(mapping, pos, len, get_block);
+
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ *pagep = &folio->page;
+ return 0;
}
EXPORT_SYMBOL(block_write_begin);

-int block_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+size_t __buffer_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, size_t len, size_t copied, struct folio *folio)
{
- struct folio *folio = page_folio(page);
size_t start = pos - folio_pos(folio);

if (unlikely(copied < len)) {
@@ -2277,17 +2293,26 @@ int block_write_end(struct file *file, struct address_space *mapping,

return copied;
}
-EXPORT_SYMBOL(block_write_end);
+EXPORT_SYMBOL(__buffer_write_end);

-int generic_write_end(struct file *file, struct address_space *mapping,
+int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
+{
+ return buffer_write_end(file, mapping, pos, len, copied,
+ page_folio(page));
+}
+EXPORT_SYMBOL(block_write_end);
+
+size_t buffer_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, size_t len, size_t copied,
+ struct folio *folio)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
bool i_size_changed = false;

- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ copied = __buffer_write_end(file, mapping, pos, len, copied, folio);

/*
* No need to use i_size_read() here, the i_size cannot change under us
@@ -2301,8 +2326,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
i_size_changed = true;
}

- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);

if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
@@ -2316,6 +2341,15 @@ int generic_write_end(struct file *file, struct address_space *mapping,
mark_inode_dirty(inode);
return copied;
}
+EXPORT_SYMBOL(buffer_write_end);
+
+int generic_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ return buffer_write_end(file, mapping, pos, len, copied,
+ page_folio(page));
+}
EXPORT_SYMBOL(generic_write_end);

/*
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 1b7f14e39ab8..44e4b2b18cc0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -267,6 +267,12 @@ int block_write_end(struct file *, struct address_space *,
int generic_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
+struct folio *buffer_write_begin(struct address_space *mapping, loff_t pos,
+ size_t len, get_block_t *get_block);
+size_t buffer_write_end(struct file *, struct address_space *, loff_t pos,
+ size_t len, size_t copied, struct folio *);
+size_t __buffer_write_end(struct file *, struct address_space *, loff_t pos,
+ size_t len, size_t copied, struct folio *);
void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
int cont_write_begin(struct file *, struct address_space *, loff_t pos,
unsigned len, struct page **, void **fsdata, get_block_t *,
--
2.43.0


2024-05-28 18:16:06

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 4/7] fs: Add filemap_symlink()

This is the equivalent of page_symlink() but takes a
buffered_write_operations structure. It also doesn't handle GFP_NOFS
for you; if you need that, use memalloc_nofs_save() yourself.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/namei.c | 25 +++++++++++++++++++++++++
include/linux/pagemap.h | 2 ++
2 files changed, 27 insertions(+)

diff --git a/fs/namei.c b/fs/namei.c
index 37fb0a8aa09a..4352206b0408 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -5255,6 +5255,31 @@ int page_symlink(struct inode *inode, const char *symname, int len)
}
EXPORT_SYMBOL(page_symlink);

+int filemap_symlink(struct inode *inode, const char *symname, int len,
+ const struct buffered_write_operations *ops, void **fsdata)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct folio *folio;
+ int err;
+
+retry:
+ folio = ops->write_begin(NULL, mapping, 0, len-1, fsdata);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+
+ memcpy(folio_address(folio), symname, len-1);
+
+ err = ops->write_end(NULL, mapping, 0, len-1, len-1, folio, fsdata);
+ if (err < 0)
+ return err;
+ if (err < len-1)
+ goto retry;
+
+ mark_inode_dirty(inode);
+ return 0;
+}
+EXPORT_SYMBOL(filemap_symlink);
+
const struct inode_operations page_symlink_inode_operations = {
.get_link = page_get_link,
};
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2921c1cc6335..a7540f757368 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -39,6 +39,8 @@ ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
#define __generic_file_write_iter(kiocb, iter) \
__filemap_write_iter(kiocb, iter, NULL, NULL)

+int filemap_symlink(struct inode *inode, const char *symname, int len,
+ const struct buffered_write_operations *bw, void **fsdata);
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);

--
2.43.0


2024-05-28 23:34:30

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 7/7] iomap: Return the folio from iomap_write_begin()

Hi Matthew,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.10-rc1 next-20240528]
[cannot apply to tytso-ext4/dev jack-fs/for_next hch-configfs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Matthew-Wilcox-Oracle/fs-Introduce-buffered_write_operations/20240529-005213
base: linus/master
patch link: https://lore.kernel.org/r/20240528164829.2105447-8-willy%40infradead.org
patch subject: [PATCH 7/7] iomap: Return the folio from iomap_write_begin()
config: hexagon-allnoconfig (https://download.01.org/0day-ci/archive/20240529/[email protected]/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project bafda89a0944d947fc4b3b5663185e07a397ac30)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240529/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

In file included from fs/iomap/buffered-io.c:9:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:10:
In file included from include/linux/mm.h:2253:
include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
514 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
In file included from fs/iomap/buffered-io.c:9:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:14:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
548 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
561 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
| ^
In file included from fs/iomap/buffered-io.c:9:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:14:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
574 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
| ^
In file included from fs/iomap/buffered-io.c:9:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:14:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
585 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
595 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
605 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
>> fs/iomap/buffered-io.c:802:7: warning: variable 'status' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
802 | if (!iomap_valid) {
| ^~~~~~~~~~~~
fs/iomap/buffered-io.c:826:17: note: uninitialized use occurs here
826 | return ERR_PTR(status);
| ^~~~~~
fs/iomap/buffered-io.c:802:3: note: remove the 'if' if its condition is always false
802 | if (!iomap_valid) {
| ^~~~~~~~~~~~~~~~~~~
803 | iter->iomap.flags |= IOMAP_F_STALE;
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804 | goto out_unlock;
| ~~~~~~~~~~~~~~~~
805 | }
| ~
fs/iomap/buffered-io.c:773:12: note: initialize the variable 'status' to silence this warning
773 | int status;
| ^
| = 0
8 warnings generated.


vim +802 fs/iomap/buffered-io.c

69f4a26c1e0c7c Gao Xiang 2021-08-03 766
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 767) static struct folio *iomap_write_begin(struct iomap_iter *iter, loff_t pos,
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 768) size_t len)
afc51aaa22f26c Darrick J. Wong 2019-07-15 769 {
471859f57d4253 Andreas Gruenbacher 2023-01-15 770 const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
fad0a1ab34f777 Christoph Hellwig 2021-08-10 771 const struct iomap *srcmap = iomap_iter_srcmap(iter);
d1bd0b4ebfe052 Matthew Wilcox (Oracle 2021-11-03 772) struct folio *folio;
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 773) int status;
afc51aaa22f26c Darrick J. Wong 2019-07-15 774
1b5c1e36dc0e0f Christoph Hellwig 2021-08-10 775 BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
1b5c1e36dc0e0f Christoph Hellwig 2021-08-10 776 if (srcmap != &iter->iomap)
c039b997927263 Goldwyn Rodrigues 2019-10-18 777 BUG_ON(pos + len > srcmap->offset + srcmap->length);
afc51aaa22f26c Darrick J. Wong 2019-07-15 778
afc51aaa22f26c Darrick J. Wong 2019-07-15 779 if (fatal_signal_pending(current))
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 780) return ERR_PTR(-EINTR);
afc51aaa22f26c Darrick J. Wong 2019-07-15 781
d454ab82bc7f4a Matthew Wilcox (Oracle 2021-12-09 782) if (!mapping_large_folio_support(iter->inode->i_mapping))
d454ab82bc7f4a Matthew Wilcox (Oracle 2021-12-09 783) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
d454ab82bc7f4a Matthew Wilcox (Oracle 2021-12-09 784)
07c22b56685dd7 Andreas Gruenbacher 2023-01-15 785 folio = __iomap_get_folio(iter, pos, len);
9060bc4d3aca61 Andreas Gruenbacher 2023-01-15 786 if (IS_ERR(folio))
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 787) return folio;
d7b64041164ca1 Dave Chinner 2022-11-29 788
d7b64041164ca1 Dave Chinner 2022-11-29 789 /*
d7b64041164ca1 Dave Chinner 2022-11-29 790 * Now we have a locked folio, before we do anything with it we need to
d7b64041164ca1 Dave Chinner 2022-11-29 791 * check that the iomap we have cached is not stale. The inode extent
d7b64041164ca1 Dave Chinner 2022-11-29 792 * mapping can change due to concurrent IO in flight (e.g.
d7b64041164ca1 Dave Chinner 2022-11-29 793 * IOMAP_UNWRITTEN state can change and memory reclaim could have
d7b64041164ca1 Dave Chinner 2022-11-29 794 * reclaimed a previously partially written page at this index after IO
d7b64041164ca1 Dave Chinner 2022-11-29 795 * completion before this write reaches this file offset) and hence we
d7b64041164ca1 Dave Chinner 2022-11-29 796 * could do the wrong thing here (zero a page range incorrectly or fail
d7b64041164ca1 Dave Chinner 2022-11-29 797 * to zero) and corrupt data.
d7b64041164ca1 Dave Chinner 2022-11-29 798 */
471859f57d4253 Andreas Gruenbacher 2023-01-15 799 if (folio_ops && folio_ops->iomap_valid) {
471859f57d4253 Andreas Gruenbacher 2023-01-15 800 bool iomap_valid = folio_ops->iomap_valid(iter->inode,
d7b64041164ca1 Dave Chinner 2022-11-29 801 &iter->iomap);
d7b64041164ca1 Dave Chinner 2022-11-29 @802 if (!iomap_valid) {
d7b64041164ca1 Dave Chinner 2022-11-29 803 iter->iomap.flags |= IOMAP_F_STALE;
d7b64041164ca1 Dave Chinner 2022-11-29 804 goto out_unlock;
d7b64041164ca1 Dave Chinner 2022-11-29 805 }
d7b64041164ca1 Dave Chinner 2022-11-29 806 }
d7b64041164ca1 Dave Chinner 2022-11-29 807
d454ab82bc7f4a Matthew Wilcox (Oracle 2021-12-09 808) if (pos + len > folio_pos(folio) + folio_size(folio))
d454ab82bc7f4a Matthew Wilcox (Oracle 2021-12-09 809) len = folio_pos(folio) + folio_size(folio) - pos;
afc51aaa22f26c Darrick J. Wong 2019-07-15 810
c039b997927263 Goldwyn Rodrigues 2019-10-18 811 if (srcmap->type == IOMAP_INLINE)
bc6123a84a71b5 Matthew Wilcox (Oracle 2021-05-02 812) status = iomap_write_begin_inline(iter, folio);
1b5c1e36dc0e0f Christoph Hellwig 2021-08-10 813 else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
d1bd0b4ebfe052 Matthew Wilcox (Oracle 2021-11-03 814) status = __block_write_begin_int(folio, pos, len, NULL, srcmap);
afc51aaa22f26c Darrick J. Wong 2019-07-15 815 else
bc6123a84a71b5 Matthew Wilcox (Oracle 2021-05-02 816) status = __iomap_write_begin(iter, pos, len, folio);
afc51aaa22f26c Darrick J. Wong 2019-07-15 817
afc51aaa22f26c Darrick J. Wong 2019-07-15 818 if (unlikely(status))
afc51aaa22f26c Darrick J. Wong 2019-07-15 819 goto out_unlock;
afc51aaa22f26c Darrick J. Wong 2019-07-15 820
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 821) return folio;
afc51aaa22f26c Darrick J. Wong 2019-07-15 822
afc51aaa22f26c Darrick J. Wong 2019-07-15 823 out_unlock:
7a70a5085ed028 Andreas Gruenbacher 2023-01-15 824 __iomap_put_folio(iter, pos, 0, folio);
afc51aaa22f26c Darrick J. Wong 2019-07-15 825
07d07cfe38427e Matthew Wilcox (Oracle 2024-05-28 826) return ERR_PTR(status);
afc51aaa22f26c Darrick J. Wong 2019-07-15 827 }
afc51aaa22f26c Darrick J. Wong 2019-07-15 828

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-05-28 23:43:22

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 6/7] ext4: Convert to buffered_write_operations

Hi Matthew,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.10-rc1 next-20240528]
[cannot apply to tytso-ext4/dev jack-fs/for_next hch-configfs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Matthew-Wilcox-Oracle/fs-Introduce-buffered_write_operations/20240529-005213
base: linus/master
patch link: https://lore.kernel.org/r/20240528164829.2105447-7-willy%40infradead.org
patch subject: [PATCH 6/7] ext4: Convert to buffered_write_operations
config: hexagon-defconfig (https://download.01.org/0day-ci/archive/20240529/[email protected]/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project bafda89a0944d947fc4b3b5663185e07a397ac30)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240529/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

In file included from fs/ext4/inline.c:7:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:10:
In file included from include/linux/mm.h:2253:
include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
514 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
In file included from fs/ext4/inline.c:7:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:14:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
548 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
561 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
| ^
In file included from fs/ext4/inline.c:7:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:14:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
574 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
| ^
In file included from fs/ext4/inline.c:7:
In file included from include/linux/iomap.h:7:
In file included from include/linux/blk_types.h:10:
In file included from include/linux/bvec.h:10:
In file included from include/linux/highmem.h:12:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:14:
In file included from arch/hexagon/include/asm/io.h:328:
include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
585 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
595 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
605 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
>> fs/ext4/inline.c:914:7: warning: variable 'folio' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
914 | if (ret == -ENOSPC &&
| ^~~~~~~~~~~~~~~~~
915 | ext4_should_retry_alloc(inode->i_sb, &retries))
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fs/ext4/inline.c:956:9: note: uninitialized use occurs here
956 | return folio;
| ^~~~~
fs/ext4/inline.c:914:3: note: remove the 'if' if its condition is always true
914 | if (ret == -ENOSPC &&
| ^~~~~~~~~~~~~~~~~~~~~
915 | ext4_should_retry_alloc(inode->i_sb, &retries))
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
916 | goto retry_journal;
>> fs/ext4/inline.c:914:7: warning: variable 'folio' is used uninitialized whenever '&&' condition is false [-Wsometimes-uninitialized]
914 | if (ret == -ENOSPC &&
| ^~~~~~~~~~~~~~
fs/ext4/inline.c:956:9: note: uninitialized use occurs here
956 | return folio;
| ^~~~~
fs/ext4/inline.c:914:7: note: remove the '&&' if its condition is always true
914 | if (ret == -ENOSPC &&
| ^~~~~~~~~~~~~~~~~
>> fs/ext4/inline.c:907:6: warning: variable 'folio' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
907 | if (ret && ret != -ENOSPC)
| ^~~~~~~~~~~~~~~~~~~~~
fs/ext4/inline.c:956:9: note: uninitialized use occurs here
956 | return folio;
| ^~~~~
fs/ext4/inline.c:907:2: note: remove the 'if' if its condition is always false
907 | if (ret && ret != -ENOSPC)
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
908 | goto out_journal;
| ~~~~~~~~~~~~~~~~
fs/ext4/inline.c:901:6: warning: variable 'folio' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
901 | if (IS_ERR(handle)) {
| ^~~~~~~~~~~~~~
fs/ext4/inline.c:956:9: note: uninitialized use occurs here
956 | return folio;
| ^~~~~
fs/ext4/inline.c:901:2: note: remove the 'if' if its condition is always false
901 | if (IS_ERR(handle)) {
| ^~~~~~~~~~~~~~~~~~~~~
902 | ret = PTR_ERR(handle);
| ~~~~~~~~~~~~~~~~~~~~~~
903 | goto out;
| ~~~~~~~~~
904 | }
| ~
fs/ext4/inline.c:891:21: note: initialize the variable 'folio' to silence this warning
891 | struct folio *folio;
| ^
| = NULL
11 warnings generated.


vim +914 fs/ext4/inline.c

9c3569b50f12e47 Tao Ma 2012-12-10 877
9c3569b50f12e47 Tao Ma 2012-12-10 878 /*
9c3569b50f12e47 Tao Ma 2012-12-10 879 * Prepare the write for the inline data.
8d6ce136790268f Shijie Luo 2020-01-23 880 * If the data can be written into the inode, we just read
9c3569b50f12e47 Tao Ma 2012-12-10 881 * the page and make it uptodate, and start the journal.
9c3569b50f12e47 Tao Ma 2012-12-10 882 * Otherwise read the page, makes it dirty so that it can be
9c3569b50f12e47 Tao Ma 2012-12-10 883 * handle in writepages(the i_disksize update is left to the
9c3569b50f12e47 Tao Ma 2012-12-10 884 * normal ext4_da_write_end).
9c3569b50f12e47 Tao Ma 2012-12-10 885 */
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 886) struct folio *ext4_da_write_inline_data_begin(struct address_space *mapping,
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 887) struct inode *inode, loff_t pos, size_t len)
9c3569b50f12e47 Tao Ma 2012-12-10 888 {
09355d9d038a159 Ritesh Harjani 2022-01-17 889 int ret;
9c3569b50f12e47 Tao Ma 2012-12-10 890 handle_t *handle;
9a9d01f081ea29a Matthew Wilcox 2023-03-24 891 struct folio *folio;
9c3569b50f12e47 Tao Ma 2012-12-10 892 struct ext4_iloc iloc;
625ef8a3acd111d Lukas Czerner 2018-10-02 893 int retries = 0;
9c3569b50f12e47 Tao Ma 2012-12-10 894
9c3569b50f12e47 Tao Ma 2012-12-10 895 ret = ext4_get_inode_loc(inode, &iloc);
9c3569b50f12e47 Tao Ma 2012-12-10 896 if (ret)
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 897) return ERR_PTR(ret);
9c3569b50f12e47 Tao Ma 2012-12-10 898
bc0ca9df3b2abb1 Jan Kara 2014-01-06 899 retry_journal:
9924a92a8c21757 Theodore Ts'o 2013-02-08 900 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
9c3569b50f12e47 Tao Ma 2012-12-10 901 if (IS_ERR(handle)) {
9c3569b50f12e47 Tao Ma 2012-12-10 902 ret = PTR_ERR(handle);
9c3569b50f12e47 Tao Ma 2012-12-10 903 goto out;
9c3569b50f12e47 Tao Ma 2012-12-10 904 }
9c3569b50f12e47 Tao Ma 2012-12-10 905
9c3569b50f12e47 Tao Ma 2012-12-10 906 ret = ext4_prepare_inline_data(handle, inode, pos + len);
9c3569b50f12e47 Tao Ma 2012-12-10 @907 if (ret && ret != -ENOSPC)
52e4477758eef45 Jan Kara 2014-01-06 908 goto out_journal;
9c3569b50f12e47 Tao Ma 2012-12-10 909
9c3569b50f12e47 Tao Ma 2012-12-10 910 if (ret == -ENOSPC) {
8bc1379b82b8e80 Theodore Ts'o 2018-06-16 911 ext4_journal_stop(handle);
9c3569b50f12e47 Tao Ma 2012-12-10 912 ret = ext4_da_convert_inline_data_to_extent(mapping,
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 913) inode);
bc0ca9df3b2abb1 Jan Kara 2014-01-06 @914 if (ret == -ENOSPC &&
bc0ca9df3b2abb1 Jan Kara 2014-01-06 915 ext4_should_retry_alloc(inode->i_sb, &retries))
bc0ca9df3b2abb1 Jan Kara 2014-01-06 916 goto retry_journal;
9c3569b50f12e47 Tao Ma 2012-12-10 917 goto out;
9c3569b50f12e47 Tao Ma 2012-12-10 918 }
9c3569b50f12e47 Tao Ma 2012-12-10 919
36d116e99da7e45 Matthew Wilcox (Oracle 2022-02-22 920) /*
36d116e99da7e45 Matthew Wilcox (Oracle 2022-02-22 921) * We cannot recurse into the filesystem as the transaction
36d116e99da7e45 Matthew Wilcox (Oracle 2022-02-22 922) * is already started.
36d116e99da7e45 Matthew Wilcox (Oracle 2022-02-22 923) */
9a9d01f081ea29a Matthew Wilcox 2023-03-24 924 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS,
9a9d01f081ea29a Matthew Wilcox 2023-03-24 925 mapping_gfp_mask(mapping));
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 926) if (IS_ERR(folio))
52e4477758eef45 Jan Kara 2014-01-06 927 goto out_journal;
9c3569b50f12e47 Tao Ma 2012-12-10 928
9c3569b50f12e47 Tao Ma 2012-12-10 929 down_read(&EXT4_I(inode)->xattr_sem);
9c3569b50f12e47 Tao Ma 2012-12-10 930 if (!ext4_has_inline_data(inode)) {
9c3569b50f12e47 Tao Ma 2012-12-10 931 ret = 0;
9c3569b50f12e47 Tao Ma 2012-12-10 932 goto out_release_page;
9c3569b50f12e47 Tao Ma 2012-12-10 933 }
9c3569b50f12e47 Tao Ma 2012-12-10 934
9a9d01f081ea29a Matthew Wilcox 2023-03-24 935 if (!folio_test_uptodate(folio)) {
6b87fbe4155007c Matthew Wilcox 2023-03-24 936 ret = ext4_read_inline_folio(inode, folio);
9c3569b50f12e47 Tao Ma 2012-12-10 937 if (ret < 0)
9c3569b50f12e47 Tao Ma 2012-12-10 938 goto out_release_page;
9c3569b50f12e47 Tao Ma 2012-12-10 939 }
188c299e2a26cc3 Jan Kara 2021-08-16 940 ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh,
188c299e2a26cc3 Jan Kara 2021-08-16 941 EXT4_JTR_NONE);
362eca70b53389b Theodore Ts'o 2018-07-10 942 if (ret)
362eca70b53389b Theodore Ts'o 2018-07-10 943 goto out_release_page;
9c3569b50f12e47 Tao Ma 2012-12-10 944
9c3569b50f12e47 Tao Ma 2012-12-10 945 up_read(&EXT4_I(inode)->xattr_sem);
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 946) goto out;
9c3569b50f12e47 Tao Ma 2012-12-10 947 out_release_page:
9c3569b50f12e47 Tao Ma 2012-12-10 948 up_read(&EXT4_I(inode)->xattr_sem);
9a9d01f081ea29a Matthew Wilcox 2023-03-24 949 folio_unlock(folio);
9a9d01f081ea29a Matthew Wilcox 2023-03-24 950 folio_put(folio);
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 951) folio = ERR_PTR(ret);
52e4477758eef45 Jan Kara 2014-01-06 952 out_journal:
9c3569b50f12e47 Tao Ma 2012-12-10 953 ext4_journal_stop(handle);
52e4477758eef45 Jan Kara 2014-01-06 954 out:
9c3569b50f12e47 Tao Ma 2012-12-10 955 brelse(iloc.bh);
8ca000469995a1f Matthew Wilcox (Oracle 2024-05-28 956) return folio;
9c3569b50f12e47 Tao Ma 2012-12-10 957 }
9c3569b50f12e47 Tao Ma 2012-12-10 958

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-05-28 23:45:11

by Dave Chinner

[permalink] [raw]
Subject: Re: [PATCH 7/7] iomap: Return the folio from iomap_write_begin()

On Tue, May 28, 2024 at 05:48:28PM +0100, Matthew Wilcox (Oracle) wrote:
> Use an ERR_PTR to return any error that may have occurred, otherwise
> return the folio directly instead of returning it by reference. This
> mirrors changes which are going into the filemap ->write_begin callbacks.
>
> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
> ---
> fs/iomap/buffered-io.c | 35 ++++++++++++++++-------------------
> 1 file changed, 16 insertions(+), 19 deletions(-)
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index c5802a459334..f0c40ac425ce 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -764,27 +764,27 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
> return iomap_read_inline_data(iter, folio);
> }
>
> -static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> - size_t len, struct folio **foliop)
> +static struct folio *iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> + size_t len)
> {
> const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
> const struct iomap *srcmap = iomap_iter_srcmap(iter);
> struct folio *folio;
> - int status = 0;
> + int status;

Uninitialised return value.

>
> BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
> if (srcmap != &iter->iomap)
> BUG_ON(pos + len > srcmap->offset + srcmap->length);
>
> if (fatal_signal_pending(current))
> - return -EINTR;
> + return ERR_PTR(-EINTR);
>
> if (!mapping_large_folio_support(iter->inode->i_mapping))
> len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
>
> folio = __iomap_get_folio(iter, pos, len);
> if (IS_ERR(folio))
> - return PTR_ERR(folio);
> + return folio;
>
> /*
> * Now we have a locked folio, before we do anything with it we need to
> @@ -801,7 +801,6 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> &iter->iomap);
> if (!iomap_valid) {
> iter->iomap.flags |= IOMAP_F_STALE;
> - status = 0;
> goto out_unlock;
> }
> }

That looks wrong - status is now uninitialised when we jump to
the error handling. This case needs to return "no error, no folio"
so that the caller can detect the IOMAP_F_STALE flag and do the
right thing....

> @@ -819,13 +818,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> if (unlikely(status))
> goto out_unlock;
>
> - *foliop = folio;
> - return 0;
> + return folio;
>
> out_unlock:
> __iomap_put_folio(iter, pos, 0, folio);
>
> - return status;
> + return ERR_PTR(status);

This returns the uninitialised status value....

> }
>
> static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
> @@ -940,9 +938,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> break;
> }
>
> - status = iomap_write_begin(iter, pos, bytes, &folio);
> - if (unlikely(status)) {
> + folio = iomap_write_begin(iter, pos, bytes);
> + if (IS_ERR(folio)) {
> iomap_write_failed(iter->inode, pos, bytes);
> + status = PTR_ERR(folio);
> break;
> }
> if (iter->iomap.flags & IOMAP_F_STALE)

So this will now fail the write rather than iterating again at the
same offset with a new iomap.

-Dave.
--
Dave Chinner
[email protected]

2024-05-28 23:45:23

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/7] fs: Introduce buffered_write_operations

Hi Matthew,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.10-rc1 next-20240528]
[cannot apply to tytso-ext4/dev jack-fs/for_next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Matthew-Wilcox-Oracle/fs-Introduce-buffered_write_operations/20240529-005213
base: linus/master
patch link: https://lore.kernel.org/r/20240528164829.2105447-2-willy%40infradead.org
patch subject: [PATCH 1/7] fs: Introduce buffered_write_operations
config: arm64-allnoconfig (https://download.01.org/0day-ci/archive/20240529/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240529/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

>> mm/filemap.c:4097: warning: Function parameter or struct member 'fsdata' not described in '__filemap_write_iter'
mm/filemap.c:4146: warning: Function parameter or struct member 'ops' not described in 'filemap_write_iter'
>> mm/filemap.c:4146: warning: Function parameter or struct member 'fsdata' not described in 'filemap_write_iter'


vim +4097 mm/filemap.c

^1da177e4c3f41 Linus Torvalds 2005-04-16 4072
e4dd9de3c66bc7 Jan Kara 2009-08-17 4073 /**
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4074) * __filemap_write_iter - write data to a file
e4dd9de3c66bc7 Jan Kara 2009-08-17 4075 * @iocb: IO state structure (file, offset, etc.)
8174202b34c30e Al Viro 2014-04-03 4076 * @from: iov_iter with data to write
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4077) * @ops: How to inform the filesystem that a write is starting/finishing.
e4dd9de3c66bc7 Jan Kara 2009-08-17 4078 *
e4dd9de3c66bc7 Jan Kara 2009-08-17 4079 * This function does all the work needed for actually writing data to a
e4dd9de3c66bc7 Jan Kara 2009-08-17 4080 * file. It does all basic checks, removes SUID from the file, updates
e4dd9de3c66bc7 Jan Kara 2009-08-17 4081 * modification times and calls proper subroutines depending on whether we
e4dd9de3c66bc7 Jan Kara 2009-08-17 4082 * do direct IO or a standard buffered write.
e4dd9de3c66bc7 Jan Kara 2009-08-17 4083 *
9608703e488cf7 Jan Kara 2021-04-12 4084 * It expects i_rwsem to be grabbed unless we work on a block device or similar
e4dd9de3c66bc7 Jan Kara 2009-08-17 4085 * object which does not need locking at all.
e4dd9de3c66bc7 Jan Kara 2009-08-17 4086 *
e4dd9de3c66bc7 Jan Kara 2009-08-17 4087 * This function does *not* take care of syncing data in case of O_SYNC write.
e4dd9de3c66bc7 Jan Kara 2009-08-17 4088 * A caller has to handle it. This is mainly due to the fact that we want to
9608703e488cf7 Jan Kara 2021-04-12 4089 * avoid syncing under i_rwsem.
a862f68a8b3600 Mike Rapoport 2019-03-05 4090 *
a862f68a8b3600 Mike Rapoport 2019-03-05 4091 * Return:
a862f68a8b3600 Mike Rapoport 2019-03-05 4092 * * number of bytes written, even for truncated writes
a862f68a8b3600 Mike Rapoport 2019-03-05 4093 * * negative error code if no data has been written at all
e4dd9de3c66bc7 Jan Kara 2009-08-17 4094 */
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4095) ssize_t __filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4096) const struct buffered_write_operations *ops, void *fsdata)
^1da177e4c3f41 Linus Torvalds 2005-04-16 @4097 {
^1da177e4c3f41 Linus Torvalds 2005-04-16 4098 struct file *file = iocb->ki_filp;
fb5527e68d4956 Jeff Moyer 2006-10-19 4099 struct address_space *mapping = file->f_mapping;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4100 struct inode *inode = mapping->host;
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4101 ssize_t ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4102
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4103 ret = file_remove_privs(file);
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4104 if (ret)
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4105 return ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4106
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4107 ret = file_update_time(file);
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4108 if (ret)
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4109 return ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4110
2ba48ce513c4e5 Al Viro 2015-04-09 4111 if (iocb->ki_flags & IOCB_DIRECT) {
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4112 ret = generic_file_direct_write(iocb, from);
^1da177e4c3f41 Linus Torvalds 2005-04-16 4113 /*
fbbbad4bc2101e Matthew Wilcox 2015-02-16 4114 * If the write stopped short of completing, fall back to
fbbbad4bc2101e Matthew Wilcox 2015-02-16 4115 * buffered writes. Some filesystems do this for writes to
fbbbad4bc2101e Matthew Wilcox 2015-02-16 4116 * holes, for example. For DAX files, a buffered write will
fbbbad4bc2101e Matthew Wilcox 2015-02-16 4117 * not succeed (even if it did, DAX does not handle dirty
fbbbad4bc2101e Matthew Wilcox 2015-02-16 4118 * page-cache pages correctly).
^1da177e4c3f41 Linus Torvalds 2005-04-16 4119 */
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4120 if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4121 return ret;
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4122 return direct_write_fallback(iocb, from, ret,
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4123) filemap_perform_write(iocb, from, ops, fsdata));
fb5527e68d4956 Jeff Moyer 2006-10-19 4124 }
44fff0fa08ec5a Christoph Hellwig 2023-06-01 4125
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4126) return filemap_perform_write(iocb, from, ops, fsdata);
^1da177e4c3f41 Linus Torvalds 2005-04-16 4127 }
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4128) EXPORT_SYMBOL(__filemap_write_iter);
^1da177e4c3f41 Linus Torvalds 2005-04-16 4129
e4dd9de3c66bc7 Jan Kara 2009-08-17 4130 /**
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4131) * filemap_write_iter - write data to a file
e4dd9de3c66bc7 Jan Kara 2009-08-17 4132 * @iocb: IO state structure
8174202b34c30e Al Viro 2014-04-03 4133 * @from: iov_iter with data to write
e4dd9de3c66bc7 Jan Kara 2009-08-17 4134 *
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4135) * This is a wrapper around __filemap_write_iter() to be used by most
e4dd9de3c66bc7 Jan Kara 2009-08-17 4136 * filesystems. It takes care of syncing the file in case of O_SYNC file
9608703e488cf7 Jan Kara 2021-04-12 4137 * and acquires i_rwsem as needed.
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4138) *
a862f68a8b3600 Mike Rapoport 2019-03-05 4139 * Return:
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4140) * * negative error code if no data has been written at all or if
a862f68a8b3600 Mike Rapoport 2019-03-05 4141 * vfs_fsync_range() failed for a synchronous write
a862f68a8b3600 Mike Rapoport 2019-03-05 4142 * * number of bytes written, even for truncated writes
e4dd9de3c66bc7 Jan Kara 2009-08-17 4143 */
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4144) ssize_t filemap_write_iter(struct kiocb *iocb, struct iov_iter *from,
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4145) const struct buffered_write_operations *ops, void *fsdata)
^1da177e4c3f41 Linus Torvalds 2005-04-16 @4146 {
^1da177e4c3f41 Linus Torvalds 2005-04-16 4147 struct file *file = iocb->ki_filp;
148f948ba877f4 Jan Kara 2009-08-17 4148 struct inode *inode = file->f_mapping->host;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4149 ssize_t ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4150
5955102c9984fa Al Viro 2016-01-22 4151 inode_lock(inode);
3309dd04cbcd2c Al Viro 2015-04-09 4152 ret = generic_write_checks(iocb, from);
3309dd04cbcd2c Al Viro 2015-04-09 4153 if (ret > 0)
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4154) ret = __filemap_write_iter(iocb, from, ops, fsdata);
5955102c9984fa Al Viro 2016-01-22 4155 inode_unlock(inode);
^1da177e4c3f41 Linus Torvalds 2005-04-16 4156
e259221763a404 Christoph Hellwig 2016-04-07 4157 if (ret > 0)
e259221763a404 Christoph Hellwig 2016-04-07 4158 ret = generic_write_sync(iocb, ret);
^1da177e4c3f41 Linus Torvalds 2005-04-16 4159 return ret;
^1da177e4c3f41 Linus Torvalds 2005-04-16 4160 }
1e90da36c016f4 Matthew Wilcox (Oracle 2024-05-28 4161)

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-05-29 05:20:31

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 0/7] Start moving write_begin/write_end out of aops

On Tue, May 28, 2024 at 05:48:21PM +0100, Matthew Wilcox (Oracle) wrote:
> Christoph wants to remove write_begin/write_end from aops and pass them
> to filemap as callback functions. Here's one possible route to do this.
> I combined it with the folio conversion (because why touch the same code
> twice?) and tweaked some of the other things (support for ridiculously
> large folios with size_t lengths, remove the need to initialise fsdata
> by passing only a pointer to the fsdata pointer). And then I converted
> ext4, which is probably the worst filesystem to convert because it needs
> three different bwops. Most fs will only need one.

Hopefully ext4 will get convert to iomap before we need this.. :)

More seriously, there is an ext4 iomap conversion in progress and a
ext2 one, which is a really good copy & paste model for a lot of the
simple file systems. Maybe just wait for some of this to settle
to avoid a lot of duplicate work?


2024-05-29 05:21:26

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 1/7] fs: Introduce buffered_write_operations

On Tue, May 28, 2024 at 05:48:22PM +0100, Matthew Wilcox (Oracle) wrote:
> Start the process of moving write_begin and write_end out from the
> address_space_operations to their own struct.
>
> The new write_begin returns the folio or an ERR_PTR instead of returning
> the folio by reference. It also accepts len as a size_t and (as
> documented) the len may be larger than PAGE_SIZE.
>
> Pass an optional buffered_write_operations pointer to various functions
> in filemap.c. The old names are available as macros for now, except
> for generic_file_write_iter() which is used as a function pointer by
> many filesystems. If using the new functions, the filesystem can have
> per-operation fsdata instead of per-page fsdata.

The model looks good, but buffered_write_operations sounds a little
too generic for a helper that hopefully won't have too many users in
the end.


2024-05-29 05:26:11

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 7/7] iomap: Return the folio from iomap_write_begin()

On Tue, May 28, 2024 at 05:48:28PM +0100, Matthew Wilcox (Oracle) wrote:
> Use an ERR_PTR to return any error that may have occurred, otherwise
> return the folio directly instead of returning it by reference. This
> mirrors changes which are going into the filemap ->write_begin callbacks.

Besides the mechanical errors pointed out by Dave I really like the
new calling convention.