2022-12-31 15:12:01

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 0/9] Turn iomap_page_ops into iomap_folio_ops

Here's an updated version of this patch queue. Changes since v4 [*]:

* I've removed "fs: Add folio_may_straddle_isize helper" as I couldn't
get any feedback from Al Viro; the patch isn't essential for this
patch queue.

* The iomap_folio_ops operations have been renamed to ->get_folio() and
->put_folio(), and the helpers have been renamed to iomap_get_folio()
and iomap_put_folio().

* Patch "xfs: Make xfs_iomap_folio_ops static" has been added at the
end.

The patches are split up into relatively small pieces. That may seem
unnecessary, but at least it makes reviewing the patches easier.

If there are no more objections, can this go into iomap-for-next?

Thanks,
Andreas

[*] https://lore.kernel.org/linux-xfs/[email protected]/

Andreas Gruenbacher (9):
iomap: Add iomap_put_folio helper
iomap/gfs2: Unlock and put folio in page_done handler
iomap: Rename page_done handler to put_folio
iomap: Add iomap_get_folio helper
iomap/gfs2: Get page in page_prepare handler
iomap: Rename page_prepare handler to get_folio
iomap/xfs: Eliminate the iomap_valid handler
iomap: Rename page_ops to folio_ops
xfs: Make xfs_iomap_folio_ops static

fs/gfs2/bmap.c | 38 ++++++++++------
fs/iomap/buffered-io.c | 98 ++++++++++++++++++++++--------------------
fs/xfs/xfs_iomap.c | 41 ++++++++++++------
include/linux/iomap.h | 51 +++++++++-------------
4 files changed, 127 insertions(+), 101 deletions(-)

--
2.38.1


2022-12-31 15:12:24

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 5/9] iomap/gfs2: Get page in page_prepare handler

Change the iomap ->page_prepare() handler to get and return a locked
folio instead of doing that in iomap_write_begin(). This allows to
recover from out-of-memory situations in ->page_prepare(), which
eliminates the corresponding error handling code in iomap_write_begin().
The ->put_folio() handler now also isn't called with NULL as the folio
value anymore.

Filesystems are expected to use the iomap_get_folio() helper for getting
locked folios in their ->page_prepare() handlers.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/bmap.c | 21 +++++++++++++--------
fs/iomap/buffered-io.c | 17 ++++++-----------
include/linux/iomap.h | 9 +++++----
3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0c041459677b..41349e09558b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -956,15 +956,25 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
goto out;
}

-static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
- unsigned len)
+static struct folio *
+gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
{
+ struct inode *inode = iter->inode;
unsigned int blockmask = i_blocksize(inode) - 1;
struct gfs2_sbd *sdp = GFS2_SB(inode);
unsigned int blocks;
+ struct folio *folio;
+ int status;

blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
- return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
+ status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
+ if (status)
+ return ERR_PTR(status);
+
+ folio = iomap_get_folio(iter, pos);
+ if (IS_ERR(folio))
+ gfs2_trans_end(sdp);
+ return folio;
}

static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
@@ -974,11 +984,6 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);

- if (!folio) {
- gfs2_trans_end(sdp);
- return;
- }
-
if (!gfs2_is_stuffed(ip))
gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
copied);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index b84838d2b5d8..7decd8cdc755 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -609,7 +609,7 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,

if (page_ops && page_ops->put_folio) {
page_ops->put_folio(iter->inode, pos, ret, folio);
- } else if (folio) {
+ } else {
folio_unlock(folio);
folio_put(folio);
}
@@ -642,17 +642,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

- if (page_ops && page_ops->page_prepare) {
- status = page_ops->page_prepare(iter->inode, pos, len);
- if (status)
- return status;
- }
-
- folio = iomap_get_folio(iter, pos);
- if (IS_ERR(folio)) {
- iomap_put_folio(iter, pos, 0, NULL);
+ if (page_ops && page_ops->page_prepare)
+ folio = page_ops->page_prepare(iter, pos, len);
+ else
+ folio = iomap_get_folio(iter, pos);
+ if (IS_ERR(folio))
return PTR_ERR(folio);
- }

/*
* Now we have a locked folio, before we do anything with it we need to
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e5732cc5716b..87b5d0f8e578 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -13,6 +13,7 @@
struct address_space;
struct fiemap_extent_info;
struct inode;
+struct iomap_iter;
struct iomap_dio;
struct iomap_writepage_ctx;
struct iov_iter;
@@ -131,12 +132,12 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
* associated with them.
*
* When page_prepare succeeds, put_folio will always be called to do any
- * cleanup work necessary. In that put_folio call, @folio will be NULL if the
- * associated folio could not be obtained. When folio is not NULL, put_folio
- * is responsible for unlocking and putting the folio.
+ * cleanup work necessary. put_folio is responsible for unlocking and putting
+ * @folio.
*/
struct iomap_page_ops {
- int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
+ struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos,
+ unsigned len);
void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
struct folio *folio);

--
2.38.1

2022-12-31 15:12:24

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 4/9] iomap: Add iomap_get_folio helper

Add an iomap_get_folio() helper that gets a folio reference based on
an iomap iterator and an offset into the address space. Use it in
iomap_write_begin().

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/iomap/buffered-io.c | 39 ++++++++++++++++++++++++++++++---------
include/linux/iomap.h | 1 +
2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 2a9bab4f3c79..b84838d2b5d8 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -457,6 +457,33 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
}
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);

+/**
+ * iomap_get_folio - get a folio reference for writing
+ * @iter: iteration structure
+ * @pos: start offset of write
+ *
+ * Returns a locked reference to the folio at @pos, or an error pointer if the
+ * folio could not be obtained.
+ */
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
+{
+ unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
+ struct folio *folio;
+
+ if (iter->flags & IOMAP_NOWAIT)
+ fgp |= FGP_NOWAIT;
+
+ folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
+ fgp, mapping_gfp_mask(iter->inode->i_mapping));
+ if (folio)
+ return folio;
+
+ if (iter->flags & IOMAP_NOWAIT)
+ return ERR_PTR(-EAGAIN);
+ return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(iomap_get_folio);
+
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
{
trace_iomap_release_folio(folio->mapping->host, folio_pos(folio),
@@ -603,12 +630,8 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct folio *folio;
- unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
int status = 0;

- if (iter->flags & IOMAP_NOWAIT)
- fgp |= FGP_NOWAIT;
-
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
BUG_ON(pos + len > srcmap->offset + srcmap->length);
@@ -625,12 +648,10 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
return status;
}

- folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
- fgp, mapping_gfp_mask(iter->inode->i_mapping));
- if (!folio) {
- status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
+ folio = iomap_get_folio(iter, pos);
+ if (IS_ERR(folio)) {
iomap_put_folio(iter, pos, 0, NULL);
- return status;
+ return PTR_ERR(folio);
}

/*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 10ec36f373f4..e5732cc5716b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -261,6 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
--
2.38.1

2022-12-31 15:12:28

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 3/9] iomap: Rename page_done handler to put_folio

The ->page_done() handler in struct iomap_page_ops is now somewhat
misnamed in that it mainly deals with unlocking and putting a folio, so
rename it to ->put_folio().

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/bmap.c | 4 ++--
fs/iomap/buffered-io.c | 4 ++--
include/linux/iomap.h | 10 +++++-----
3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 46206286ad42..0c041459677b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -967,7 +967,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
}

-static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
+static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
unsigned copied, struct folio *folio)
{
struct gfs2_trans *tr = current->journal_info;
@@ -994,7 +994,7 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,

static const struct iomap_page_ops gfs2_iomap_page_ops = {
.page_prepare = gfs2_iomap_page_prepare,
- .page_done = gfs2_iomap_page_done,
+ .put_folio = gfs2_iomap_put_folio,
};

static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index e13d5694e299..2a9bab4f3c79 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -580,8 +580,8 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
{
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;

- if (page_ops && page_ops->page_done) {
- page_ops->page_done(iter->inode, pos, ret, folio);
+ if (page_ops && page_ops->put_folio) {
+ page_ops->put_folio(iter->inode, pos, ret, folio);
} else if (folio) {
folio_unlock(folio);
folio_put(folio);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 743e2a909162..10ec36f373f4 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -126,18 +126,18 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)

/*
* When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
- * and page_done will be called for each page written to. This only applies to
+ * and put_folio will be called for each page written to. This only applies to
* buffered writes as unbuffered writes will not typically have pages
* associated with them.
*
- * When page_prepare succeeds, page_done will always be called to do any
- * cleanup work necessary. In that page_done call, @folio will be NULL if the
- * associated folio could not be obtained. When folio is not NULL, page_done
+ * When page_prepare succeeds, put_folio will always be called to do any
+ * cleanup work necessary. In that put_folio call, @folio will be NULL if the
+ * associated folio could not be obtained. When folio is not NULL, put_folio
* is responsible for unlocking and putting the folio.
*/
struct iomap_page_ops {
int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
- void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
+ void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
struct folio *folio);

/*
--
2.38.1

2022-12-31 15:12:36

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 2/9] iomap/gfs2: Unlock and put folio in page_done handler

When an iomap defines a ->page_done() handler in its page_ops, delegate
unlocking the folio and putting the folio reference to that handler.

This allows to fix a race between journaled data writes and folio
writeback in gfs2: before this change, gfs2_iomap_page_done() was called
after unlocking the folio, so writeback could start writing back the
folio's buffers before they could be marked for writing to the journal.
Also, try_to_free_buffers() could free the buffers before
gfs2_iomap_page_done() was done adding the buffers to the current
current transaction. With this change, gfs2_iomap_page_done() adds the
buffers to the current transaction while the folio is still locked, so
the problems described above can no longer occur.

The only current user of ->page_done() is gfs2, so other filesystems are
not affected. To catch out any out-of-tree users, switch from a page to
a folio in ->page_done().

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/bmap.c | 15 ++++++++++++---
fs/iomap/buffered-io.c | 8 ++++----
include/linux/iomap.h | 7 ++++---
3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e7537fd305dd..46206286ad42 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -968,14 +968,23 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
}

static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
- unsigned copied, struct page *page)
+ unsigned copied, struct folio *folio)
{
struct gfs2_trans *tr = current->journal_info;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);

- if (page && !gfs2_is_stuffed(ip))
- gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
+ if (!folio) {
+ gfs2_trans_end(sdp);
+ return;
+ }
+
+ if (!gfs2_is_stuffed(ip))
+ gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
+ copied);
+
+ folio_unlock(folio);
+ folio_put(folio);

if (tr->tr_num_buf_new)
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index c30d150a9303..e13d5694e299 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -580,12 +580,12 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
{
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;

- if (folio)
+ if (page_ops && page_ops->page_done) {
+ page_ops->page_done(iter->inode, pos, ret, folio);
+ } else if (folio) {
folio_unlock(folio);
- if (page_ops && page_ops->page_done)
- page_ops->page_done(iter->inode, pos, ret, &folio->page);
- if (folio)
folio_put(folio);
+ }
}

static int iomap_write_begin_inline(const struct iomap_iter *iter,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 0983dfc9a203..743e2a909162 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -131,13 +131,14 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
* associated with them.
*
* When page_prepare succeeds, page_done will always be called to do any
- * cleanup work necessary. In that page_done call, @page will be NULL if the
- * associated page could not be obtained.
+ * cleanup work necessary. In that page_done call, @folio will be NULL if the
+ * associated folio could not be obtained. When folio is not NULL, page_done
+ * is responsible for unlocking and putting the folio.
*/
struct iomap_page_ops {
int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
- struct page *page);
+ struct folio *folio);

/*
* Check that the cached iomap still maps correctly to the filesystem's
--
2.38.1

2022-12-31 15:12:57

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

Eliminate the ->iomap_valid() handler by switching to a ->get_folio()
handler and validating the mapping there.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/iomap/buffered-io.c | 25 +++++--------------------
fs/xfs/xfs_iomap.c | 37 ++++++++++++++++++++++++++-----------
include/linux/iomap.h | 22 +++++-----------------
3 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 4f363d42dbaf..df6fca11f18c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct folio *folio;
- int status = 0;
+ int status;

BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
if (srcmap != &iter->iomap)
@@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
folio = page_ops->get_folio(iter, pos, len);
else
folio = iomap_get_folio(iter, pos);
- if (IS_ERR(folio))
- return PTR_ERR(folio);
-
- /*
- * Now we have a locked folio, before we do anything with it we need to
- * check that the iomap we have cached is not stale. The inode extent
- * mapping can change due to concurrent IO in flight (e.g.
- * IOMAP_UNWRITTEN state can change and memory reclaim could have
- * reclaimed a previously partially written page at this index after IO
- * completion before this write reaches this file offset) and hence we
- * could do the wrong thing here (zero a page range incorrectly or fail
- * to zero) and corrupt data.
- */
- if (page_ops && page_ops->iomap_valid) {
- bool iomap_valid = page_ops->iomap_valid(iter->inode,
- &iter->iomap);
- if (!iomap_valid) {
+ if (IS_ERR(folio)) {
+ if (folio == ERR_PTR(-ESTALE)) {
iter->iomap.flags |= IOMAP_F_STALE;
- status = 0;
- goto out_unlock;
+ return 0;
}
+ return PTR_ERR(folio);
}

if (pos + len > folio_pos(folio) + folio_size(folio))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 669c1bc5c3a7..d0bf99539180 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -62,29 +62,44 @@ xfs_iomap_inode_sequence(
return cookie | READ_ONCE(ip->i_df.if_seq);
}

-/*
- * Check that the iomap passed to us is still valid for the given offset and
- * length.
- */
-static bool
-xfs_iomap_valid(
- struct inode *inode,
- const struct iomap *iomap)
+static struct folio *
+xfs_get_folio(
+ struct iomap_iter *iter,
+ loff_t pos,
+ unsigned len)
{
+ struct inode *inode = iter->inode;
+ struct iomap *iomap = &iter->iomap;
struct xfs_inode *ip = XFS_I(inode);
+ struct folio *folio;

+ folio = iomap_get_folio(iter, pos);
+ if (IS_ERR(folio))
+ return folio;
+
+ /*
+ * Now that we have a locked folio, we need to check that the iomap we
+ * have cached is not stale. The inode extent mapping can change due to
+ * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and
+ * memory reclaim could have reclaimed a previously partially written
+ * page at this index after IO completion before this write reaches
+ * this file offset) and hence we could do the wrong thing here (zero a
+ * page range incorrectly or fail to zero) and corrupt data.
+ */
if (iomap->validity_cookie !=
xfs_iomap_inode_sequence(ip, iomap->flags)) {
trace_xfs_iomap_invalid(ip, iomap);
- return false;
+ folio_unlock(folio);
+ folio_put(folio);
+ return ERR_PTR(-ESTALE);
}

XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
- return true;
+ return folio;
}

const struct iomap_page_ops xfs_iomap_page_ops = {
- .iomap_valid = xfs_iomap_valid,
+ .get_folio = xfs_get_folio,
};

int
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index dd3575ada5d1..6f8e3321e475 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
* When get_folio succeeds, put_folio will always be called to do any
* cleanup work necessary. put_folio is responsible for unlocking and putting
* @folio.
+ *
+ * When an iomap is created, the filesystem can store internal state (e.g., a
+ * sequence number) in iomap->validity_cookie. When it then detects in the
+ * get_folio handler that the iomap is no longer up to date and needs to be
+ * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
*/
struct iomap_page_ops {
struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
unsigned len);
void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
struct folio *folio);
-
- /*
- * Check that the cached iomap still maps correctly to the filesystem's
- * internal extent map. FS internal extent maps can change while iomap
- * is iterating a cached iomap, so this hook allows iomap to detect that
- * the iomap needs to be refreshed during a long running write
- * operation.
- *
- * The filesystem can store internal state (e.g. a sequence number) in
- * iomap->validity_cookie when the iomap is first mapped to be able to
- * detect changes between mapping time and whenever .iomap_valid() is
- * called.
- *
- * This is called with the folio over the specified file position held
- * locked by the iomap code.
- */
- bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
};

/*
--
2.38.1

2022-12-31 15:13:24

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 6/9] iomap: Rename page_prepare handler to get_folio

The ->page_prepare() handler in struct iomap_page_ops is now somewhat
misnamed, so rename it to ->get_folio().

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/bmap.c | 6 +++---
fs/iomap/buffered-io.c | 4 ++--
include/linux/iomap.h | 6 +++---
3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 41349e09558b..d3adb715ac8c 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -957,7 +957,7 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
}

static struct folio *
-gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
+gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
{
struct inode *inode = iter->inode;
unsigned int blockmask = i_blocksize(inode) - 1;
@@ -998,7 +998,7 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
}

static const struct iomap_page_ops gfs2_iomap_page_ops = {
- .page_prepare = gfs2_iomap_page_prepare,
+ .get_folio = gfs2_iomap_get_folio,
.put_folio = gfs2_iomap_put_folio,
};

@@ -1291,7 +1291,7 @@ int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
/*
* NOTE: Never call gfs2_block_zero_range with an open transaction because it
* uses iomap write to perform its actions, which begin their own transactions
- * (iomap_begin, page_prepare, etc.)
+ * (iomap_begin, get_folio, etc.)
*/
static int gfs2_block_zero_range(struct inode *inode, loff_t from,
unsigned int length)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 7decd8cdc755..4f363d42dbaf 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -642,8 +642,8 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

- if (page_ops && page_ops->page_prepare)
- folio = page_ops->page_prepare(iter, pos, len);
+ if (page_ops && page_ops->get_folio)
+ folio = page_ops->get_folio(iter, pos, len);
else
folio = iomap_get_folio(iter, pos);
if (IS_ERR(folio))
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 87b5d0f8e578..dd3575ada5d1 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -126,17 +126,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
}

/*
- * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
+ * When a filesystem sets page_ops in an iomap mapping it returns, get_folio
* and put_folio will be called for each page written to. This only applies to
* buffered writes as unbuffered writes will not typically have pages
* associated with them.
*
- * When page_prepare succeeds, put_folio will always be called to do any
+ * When get_folio succeeds, put_folio will always be called to do any
* cleanup work necessary. put_folio is responsible for unlocking and putting
* @folio.
*/
struct iomap_page_ops {
- struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos,
+ struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
unsigned len);
void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
struct folio *folio);
--
2.38.1

2022-12-31 15:13:42

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 8/9] iomap: Rename page_ops to folio_ops

The operations in struct page_ops all operate on folios, so rename
struct page_ops to struct folio_ops.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/gfs2/bmap.c | 4 ++--
fs/iomap/buffered-io.c | 12 ++++++------
fs/xfs/xfs_iomap.c | 4 ++--
include/linux/iomap.h | 14 +++++++-------
4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d3adb715ac8c..e191ecfb1fde 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -997,7 +997,7 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
gfs2_trans_end(sdp);
}

-static const struct iomap_page_ops gfs2_iomap_page_ops = {
+static const struct iomap_folio_ops gfs2_iomap_folio_ops = {
.get_folio = gfs2_iomap_get_folio,
.put_folio = gfs2_iomap_put_folio,
};
@@ -1075,7 +1075,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
}

if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
- iomap->page_ops = &gfs2_iomap_page_ops;
+ iomap->folio_ops = &gfs2_iomap_folio_ops;
return 0;

out_trans_end:
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index df6fca11f18c..c4a7aef2a272 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -605,10 +605,10 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
struct folio *folio)
{
- const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
+ const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;

- if (page_ops && page_ops->put_folio) {
- page_ops->put_folio(iter->inode, pos, ret, folio);
+ if (folio_ops && folio_ops->put_folio) {
+ folio_ops->put_folio(iter->inode, pos, ret, folio);
} else {
folio_unlock(folio);
folio_put(folio);
@@ -627,7 +627,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
size_t len, struct folio **foliop)
{
- const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
+ const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
struct folio *folio;
int status;
@@ -642,8 +642,8 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
if (!mapping_large_folio_support(iter->inode->i_mapping))
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));

- if (page_ops && page_ops->get_folio)
- folio = page_ops->get_folio(iter, pos, len);
+ if (folio_ops && folio_ops->get_folio)
+ folio = folio_ops->get_folio(iter, pos, len);
else
folio = iomap_get_folio(iter, pos);
if (IS_ERR(folio)) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d0bf99539180..5bddf31e21eb 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -98,7 +98,7 @@ xfs_get_folio(
return folio;
}

-const struct iomap_page_ops xfs_iomap_page_ops = {
+const struct iomap_folio_ops xfs_iomap_folio_ops = {
.get_folio = xfs_get_folio,
};

@@ -148,7 +148,7 @@ xfs_bmbt_to_iomap(
iomap->flags |= IOMAP_F_DIRTY;

iomap->validity_cookie = sequence_cookie;
- iomap->page_ops = &xfs_iomap_page_ops;
+ iomap->folio_ops = &xfs_iomap_folio_ops;
return 0;
}

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 6f8e3321e475..2e2be828af86 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -86,7 +86,7 @@ struct vm_fault;
*/
#define IOMAP_NULL_ADDR -1ULL /* addr is not valid */

-struct iomap_page_ops;
+struct iomap_folio_ops;

struct iomap {
u64 addr; /* disk offset of mapping, bytes */
@@ -98,7 +98,7 @@ struct iomap {
struct dax_device *dax_dev; /* dax_dev for dax operations */
void *inline_data;
void *private; /* filesystem private */
- const struct iomap_page_ops *page_ops;
+ const struct iomap_folio_ops *folio_ops;
u64 validity_cookie; /* used with .iomap_valid() */
};

@@ -126,10 +126,10 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
}

/*
- * When a filesystem sets page_ops in an iomap mapping it returns, get_folio
- * and put_folio will be called for each page written to. This only applies to
- * buffered writes as unbuffered writes will not typically have pages
- * associated with them.
+ * When a filesystem sets folio_ops in an iomap mapping it returns,
+ * get_folio and put_folio will be called for each page written to. This
+ * only applies to buffered writes as unbuffered writes will not typically have
+ * pages associated with them.
*
* When get_folio succeeds, put_folio will always be called to do any
* cleanup work necessary. put_folio is responsible for unlocking and putting
@@ -140,7 +140,7 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
* get_folio handler that the iomap is no longer up to date and needs to be
* refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
*/
-struct iomap_page_ops {
+struct iomap_folio_ops {
struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
unsigned len);
void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
--
2.38.1

2022-12-31 15:13:52

by Andreas Gruenbacher

[permalink] [raw]
Subject: [PATCH v5 9/9] xfs: Make xfs_iomap_folio_ops static

Variable xfs_iomap_folio_ops isn't used outside xfs_iomap.c, so it
should be static.

Signed-off-by: Andreas Gruenbacher <[email protected]>
---
fs/xfs/xfs_iomap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5bddf31e21eb..7d1795a9c742 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -98,7 +98,7 @@ xfs_get_folio(
return folio;
}

-const struct iomap_folio_ops xfs_iomap_folio_ops = {
+static const struct iomap_folio_ops xfs_iomap_folio_ops = {
.get_folio = xfs_get_folio,
};

--
2.38.1

2023-01-04 17:47:36

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 3/9] iomap: Rename page_done handler to put_folio

On Sat, Dec 31, 2022 at 04:09:13PM +0100, Andreas Gruenbacher wrote:
> The ->page_done() handler in struct iomap_page_ops is now somewhat
> misnamed in that it mainly deals with unlocking and putting a folio, so
> rename it to ->put_folio().
>
> Signed-off-by: Andreas Gruenbacher <[email protected]>
> ---
> fs/gfs2/bmap.c | 4 ++--
> fs/iomap/buffered-io.c | 4 ++--
> include/linux/iomap.h | 10 +++++-----
> 3 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 46206286ad42..0c041459677b 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -967,7 +967,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
> return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> }
>
> -static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
> +static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
> unsigned copied, struct folio *folio)
> {
> struct gfs2_trans *tr = current->journal_info;
> @@ -994,7 +994,7 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
>
> static const struct iomap_page_ops gfs2_iomap_page_ops = {
> .page_prepare = gfs2_iomap_page_prepare,
> - .page_done = gfs2_iomap_page_done,
> + .put_folio = gfs2_iomap_put_folio,
> };
>
> static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index e13d5694e299..2a9bab4f3c79 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -580,8 +580,8 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
> {
> const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
>
> - if (page_ops && page_ops->page_done) {
> - page_ops->page_done(iter->inode, pos, ret, folio);
> + if (page_ops && page_ops->put_folio) {
> + page_ops->put_folio(iter->inode, pos, ret, folio);
> } else if (folio) {
> folio_unlock(folio);
> folio_put(folio);
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 743e2a909162..10ec36f373f4 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -126,18 +126,18 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
>
> /*
> * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
> - * and page_done will be called for each page written to. This only applies to
> + * and put_folio will be called for each page written to. This only applies to

"...for each folio written to."

With that fixed,
Reviewed-by: Darrick J. Wong <[email protected]>

--D


> * buffered writes as unbuffered writes will not typically have pages
> * associated with them.
> *
> - * When page_prepare succeeds, page_done will always be called to do any
> - * cleanup work necessary. In that page_done call, @folio will be NULL if the
> - * associated folio could not be obtained. When folio is not NULL, page_done
> + * When page_prepare succeeds, put_folio will always be called to do any
> + * cleanup work necessary. In that put_folio call, @folio will be NULL if the
> + * associated folio could not be obtained. When folio is not NULL, put_folio
> * is responsible for unlocking and putting the folio.
> */
> struct iomap_page_ops {
> int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
> - void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
> + void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> struct folio *folio);
>
> /*
> --
> 2.38.1
>

2023-01-04 17:47:37

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 4/9] iomap: Add iomap_get_folio helper

On Sat, Dec 31, 2022 at 04:09:14PM +0100, Andreas Gruenbacher wrote:
> Add an iomap_get_folio() helper that gets a folio reference based on
> an iomap iterator and an offset into the address space. Use it in
> iomap_write_begin().
>
> Signed-off-by: Andreas Gruenbacher <[email protected]>

Pretty straightforward,
Reviewed-by: Darrick J. Wong <[email protected]>

--D

> ---
> fs/iomap/buffered-io.c | 39 ++++++++++++++++++++++++++++++---------
> include/linux/iomap.h | 1 +
> 2 files changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 2a9bab4f3c79..b84838d2b5d8 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -457,6 +457,33 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
> }
> EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
>
> +/**
> + * iomap_get_folio - get a folio reference for writing
> + * @iter: iteration structure
> + * @pos: start offset of write
> + *
> + * Returns a locked reference to the folio at @pos, or an error pointer if the
> + * folio could not be obtained.
> + */
> +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
> +{
> + unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
> + struct folio *folio;
> +
> + if (iter->flags & IOMAP_NOWAIT)
> + fgp |= FGP_NOWAIT;
> +
> + folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
> + fgp, mapping_gfp_mask(iter->inode->i_mapping));
> + if (folio)
> + return folio;
> +
> + if (iter->flags & IOMAP_NOWAIT)
> + return ERR_PTR(-EAGAIN);
> + return ERR_PTR(-ENOMEM);
> +}
> +EXPORT_SYMBOL_GPL(iomap_get_folio);
> +
> bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
> {
> trace_iomap_release_folio(folio->mapping->host, folio_pos(folio),
> @@ -603,12 +630,8 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> const struct iomap *srcmap = iomap_iter_srcmap(iter);
> struct folio *folio;
> - unsigned fgp = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE | FGP_NOFS;
> int status = 0;
>
> - if (iter->flags & IOMAP_NOWAIT)
> - fgp |= FGP_NOWAIT;
> -
> BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
> if (srcmap != &iter->iomap)
> BUG_ON(pos + len > srcmap->offset + srcmap->length);
> @@ -625,12 +648,10 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> return status;
> }
>
> - folio = __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
> - fgp, mapping_gfp_mask(iter->inode->i_mapping));
> - if (!folio) {
> - status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
> + folio = iomap_get_folio(iter, pos);
> + if (IS_ERR(folio)) {
> iomap_put_folio(iter, pos, 0, NULL);
> - return status;
> + return PTR_ERR(folio);
> }
>
> /*
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 10ec36f373f4..e5732cc5716b 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -261,6 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
> int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
> void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
> bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
> +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
> bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
> void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
> int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
> --
> 2.38.1
>

2023-01-04 17:47:57

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 2/9] iomap/gfs2: Unlock and put folio in page_done handler

On Sat, Dec 31, 2022 at 04:09:12PM +0100, Andreas Gruenbacher wrote:
> When an iomap defines a ->page_done() handler in its page_ops, delegate
> unlocking the folio and putting the folio reference to that handler.
>
> This allows to fix a race between journaled data writes and folio
> writeback in gfs2: before this change, gfs2_iomap_page_done() was called
> after unlocking the folio, so writeback could start writing back the
> folio's buffers before they could be marked for writing to the journal.
> Also, try_to_free_buffers() could free the buffers before
> gfs2_iomap_page_done() was done adding the buffers to the current
> current transaction. With this change, gfs2_iomap_page_done() adds the
> buffers to the current transaction while the folio is still locked, so
> the problems described above can no longer occur.
>
> The only current user of ->page_done() is gfs2, so other filesystems are
> not affected. To catch out any out-of-tree users, switch from a page to
> a folio in ->page_done().

I really hope there aren't any out of tree users...

> Signed-off-by: Andreas Gruenbacher <[email protected]>

Reviewed-by: Darrick J. Wong <[email protected]>

--D

> ---
> fs/gfs2/bmap.c | 15 ++++++++++++---
> fs/iomap/buffered-io.c | 8 ++++----
> include/linux/iomap.h | 7 ++++---
> 3 files changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index e7537fd305dd..46206286ad42 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -968,14 +968,23 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
> }
>
> static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
> - unsigned copied, struct page *page)
> + unsigned copied, struct folio *folio)
> {
> struct gfs2_trans *tr = current->journal_info;
> struct gfs2_inode *ip = GFS2_I(inode);
> struct gfs2_sbd *sdp = GFS2_SB(inode);
>
> - if (page && !gfs2_is_stuffed(ip))
> - gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
> + if (!folio) {
> + gfs2_trans_end(sdp);
> + return;
> + }
> +
> + if (!gfs2_is_stuffed(ip))
> + gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
> + copied);
> +
> + folio_unlock(folio);
> + folio_put(folio);
>
> if (tr->tr_num_buf_new)
> __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index c30d150a9303..e13d5694e299 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -580,12 +580,12 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
> {
> const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
>
> - if (folio)
> + if (page_ops && page_ops->page_done) {
> + page_ops->page_done(iter->inode, pos, ret, folio);
> + } else if (folio) {
> folio_unlock(folio);
> - if (page_ops && page_ops->page_done)
> - page_ops->page_done(iter->inode, pos, ret, &folio->page);
> - if (folio)
> folio_put(folio);
> + }
> }
>
> static int iomap_write_begin_inline(const struct iomap_iter *iter,
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 0983dfc9a203..743e2a909162 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -131,13 +131,14 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> * associated with them.
> *
> * When page_prepare succeeds, page_done will always be called to do any
> - * cleanup work necessary. In that page_done call, @page will be NULL if the
> - * associated page could not be obtained.
> + * cleanup work necessary. In that page_done call, @folio will be NULL if the
> + * associated folio could not be obtained. When folio is not NULL, page_done
> + * is responsible for unlocking and putting the folio.
> */
> struct iomap_page_ops {
> int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
> void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
> - struct page *page);
> + struct folio *folio);
>
> /*
> * Check that the cached iomap still maps correctly to the filesystem's
> --
> 2.38.1
>

2023-01-04 17:48:20

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 5/9] iomap/gfs2: Get page in page_prepare handler

On Sat, Dec 31, 2022 at 04:09:15PM +0100, Andreas Gruenbacher wrote:
> Change the iomap ->page_prepare() handler to get and return a locked
> folio instead of doing that in iomap_write_begin(). This allows to
> recover from out-of-memory situations in ->page_prepare(), which
> eliminates the corresponding error handling code in iomap_write_begin().
> The ->put_folio() handler now also isn't called with NULL as the folio
> value anymore.
>
> Filesystems are expected to use the iomap_get_folio() helper for getting
> locked folios in their ->page_prepare() handlers.
>
> Signed-off-by: Andreas Gruenbacher <[email protected]>

This patchset makes the page ops make a lot more sense to me now. I
very much like the way that the new ->get_folio ->put_folio functions
split the responsibilities for setting up the page cach write and
tearing it down. Thank you for cleaning this up. :)

(I would still like hch or willy to take a second look at this, however.)

Reviewed-by: Darrick J. Wong <[email protected]>

--D

> ---
> fs/gfs2/bmap.c | 21 +++++++++++++--------
> fs/iomap/buffered-io.c | 17 ++++++-----------
> include/linux/iomap.h | 9 +++++----
> 3 files changed, 24 insertions(+), 23 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 0c041459677b..41349e09558b 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -956,15 +956,25 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
> goto out;
> }
>
> -static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
> - unsigned len)
> +static struct folio *
> +gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
> {
> + struct inode *inode = iter->inode;
> unsigned int blockmask = i_blocksize(inode) - 1;
> struct gfs2_sbd *sdp = GFS2_SB(inode);
> unsigned int blocks;
> + struct folio *folio;
> + int status;
>
> blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
> - return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> + status = gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> + if (status)
> + return ERR_PTR(status);
> +
> + folio = iomap_get_folio(iter, pos);
> + if (IS_ERR(folio))
> + gfs2_trans_end(sdp);
> + return folio;
> }
>
> static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
> @@ -974,11 +984,6 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
> struct gfs2_inode *ip = GFS2_I(inode);
> struct gfs2_sbd *sdp = GFS2_SB(inode);
>
> - if (!folio) {
> - gfs2_trans_end(sdp);
> - return;
> - }
> -
> if (!gfs2_is_stuffed(ip))
> gfs2_page_add_databufs(ip, &folio->page, offset_in_page(pos),
> copied);
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index b84838d2b5d8..7decd8cdc755 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -609,7 +609,7 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
>
> if (page_ops && page_ops->put_folio) {
> page_ops->put_folio(iter->inode, pos, ret, folio);
> - } else if (folio) {
> + } else {
> folio_unlock(folio);
> folio_put(folio);
> }
> @@ -642,17 +642,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> if (!mapping_large_folio_support(iter->inode->i_mapping))
> len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
>
> - if (page_ops && page_ops->page_prepare) {
> - status = page_ops->page_prepare(iter->inode, pos, len);
> - if (status)
> - return status;
> - }
> -
> - folio = iomap_get_folio(iter, pos);
> - if (IS_ERR(folio)) {
> - iomap_put_folio(iter, pos, 0, NULL);
> + if (page_ops && page_ops->page_prepare)
> + folio = page_ops->page_prepare(iter, pos, len);
> + else
> + folio = iomap_get_folio(iter, pos);
> + if (IS_ERR(folio))
> return PTR_ERR(folio);
> - }
>
> /*
> * Now we have a locked folio, before we do anything with it we need to
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index e5732cc5716b..87b5d0f8e578 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -13,6 +13,7 @@
> struct address_space;
> struct fiemap_extent_info;
> struct inode;
> +struct iomap_iter;
> struct iomap_dio;
> struct iomap_writepage_ctx;
> struct iov_iter;
> @@ -131,12 +132,12 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> * associated with them.
> *
> * When page_prepare succeeds, put_folio will always be called to do any
> - * cleanup work necessary. In that put_folio call, @folio will be NULL if the
> - * associated folio could not be obtained. When folio is not NULL, put_folio
> - * is responsible for unlocking and putting the folio.
> + * cleanup work necessary. put_folio is responsible for unlocking and putting
> + * @folio.
> */
> struct iomap_page_ops {
> - int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
> + struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos,
> + unsigned len);
> void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> struct folio *folio);
>
> --
> 2.38.1
>

2023-01-04 17:48:46

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 6/9] iomap: Rename page_prepare handler to get_folio

On Sat, Dec 31, 2022 at 04:09:16PM +0100, Andreas Gruenbacher wrote:
> The ->page_prepare() handler in struct iomap_page_ops is now somewhat
> misnamed, so rename it to ->get_folio().
>
> Signed-off-by: Andreas Gruenbacher <[email protected]>

Looks good to me,
Reviewed-by: Darrick J. Wong <[email protected]>

--D

> ---
> fs/gfs2/bmap.c | 6 +++---
> fs/iomap/buffered-io.c | 4 ++--
> include/linux/iomap.h | 6 +++---
> 3 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 41349e09558b..d3adb715ac8c 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -957,7 +957,7 @@ static int __gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
> }
>
> static struct folio *
> -gfs2_iomap_page_prepare(struct iomap_iter *iter, loff_t pos, unsigned len)
> +gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
> {
> struct inode *inode = iter->inode;
> unsigned int blockmask = i_blocksize(inode) - 1;
> @@ -998,7 +998,7 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
> }
>
> static const struct iomap_page_ops gfs2_iomap_page_ops = {
> - .page_prepare = gfs2_iomap_page_prepare,
> + .get_folio = gfs2_iomap_get_folio,
> .put_folio = gfs2_iomap_put_folio,
> };
>
> @@ -1291,7 +1291,7 @@ int gfs2_alloc_extent(struct inode *inode, u64 lblock, u64 *dblock,
> /*
> * NOTE: Never call gfs2_block_zero_range with an open transaction because it
> * uses iomap write to perform its actions, which begin their own transactions
> - * (iomap_begin, page_prepare, etc.)
> + * (iomap_begin, get_folio, etc.)
> */
> static int gfs2_block_zero_range(struct inode *inode, loff_t from,
> unsigned int length)
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 7decd8cdc755..4f363d42dbaf 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -642,8 +642,8 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> if (!mapping_large_folio_support(iter->inode->i_mapping))
> len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
>
> - if (page_ops && page_ops->page_prepare)
> - folio = page_ops->page_prepare(iter, pos, len);
> + if (page_ops && page_ops->get_folio)
> + folio = page_ops->get_folio(iter, pos, len);
> else
> folio = iomap_get_folio(iter, pos);
> if (IS_ERR(folio))
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 87b5d0f8e578..dd3575ada5d1 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -126,17 +126,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> }
>
> /*
> - * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
> + * When a filesystem sets page_ops in an iomap mapping it returns, get_folio
> * and put_folio will be called for each page written to. This only applies to
> * buffered writes as unbuffered writes will not typically have pages
> * associated with them.
> *
> - * When page_prepare succeeds, put_folio will always be called to do any
> + * When get_folio succeeds, put_folio will always be called to do any
> * cleanup work necessary. put_folio is responsible for unlocking and putting
> * @folio.
> */
> struct iomap_page_ops {
> - struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos,
> + struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
> unsigned len);
> void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> struct folio *folio);
> --
> 2.38.1
>

2023-01-04 18:00:51

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

On Sat, Dec 31, 2022 at 04:09:17PM +0100, Andreas Gruenbacher wrote:
> Eliminate the ->iomap_valid() handler by switching to a ->get_folio()
> handler and validating the mapping there.
>
> Signed-off-by: Andreas Gruenbacher <[email protected]>
> ---
> fs/iomap/buffered-io.c | 25 +++++--------------------
> fs/xfs/xfs_iomap.c | 37 ++++++++++++++++++++++++++-----------
> include/linux/iomap.h | 22 +++++-----------------
> 3 files changed, 36 insertions(+), 48 deletions(-)
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 4f363d42dbaf..df6fca11f18c 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> const struct iomap *srcmap = iomap_iter_srcmap(iter);
> struct folio *folio;
> - int status = 0;
> + int status;
>
> BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
> if (srcmap != &iter->iomap)
> @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> folio = page_ops->get_folio(iter, pos, len);
> else
> folio = iomap_get_folio(iter, pos);
> - if (IS_ERR(folio))
> - return PTR_ERR(folio);
> -
> - /*
> - * Now we have a locked folio, before we do anything with it we need to
> - * check that the iomap we have cached is not stale. The inode extent
> - * mapping can change due to concurrent IO in flight (e.g.
> - * IOMAP_UNWRITTEN state can change and memory reclaim could have
> - * reclaimed a previously partially written page at this index after IO
> - * completion before this write reaches this file offset) and hence we
> - * could do the wrong thing here (zero a page range incorrectly or fail
> - * to zero) and corrupt data.
> - */
> - if (page_ops && page_ops->iomap_valid) {
> - bool iomap_valid = page_ops->iomap_valid(iter->inode,
> - &iter->iomap);
> - if (!iomap_valid) {
> + if (IS_ERR(folio)) {
> + if (folio == ERR_PTR(-ESTALE)) {
> iter->iomap.flags |= IOMAP_F_STALE;
> - status = 0;
> - goto out_unlock;
> + return 0;
> }
> + return PTR_ERR(folio);

I wonder if this should be reworked a bit to reduce indenting:

if (PTR_ERR(folio) == -ESTALE) {
iter->iomap.flags |= IOMAP_F_STALE;
return 0;
}
if (IS_ERR(folio))
return PTR_ERR(folio);

But I don't have any strong opinions about that.

> }
>
> if (pos + len > folio_pos(folio) + folio_size(folio))
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 669c1bc5c3a7..d0bf99539180 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence(
> return cookie | READ_ONCE(ip->i_df.if_seq);
> }
>
> -/*
> - * Check that the iomap passed to us is still valid for the given offset and
> - * length.
> - */
> -static bool
> -xfs_iomap_valid(
> - struct inode *inode,
> - const struct iomap *iomap)
> +static struct folio *
> +xfs_get_folio(
> + struct iomap_iter *iter,
> + loff_t pos,
> + unsigned len)
> {
> + struct inode *inode = iter->inode;
> + struct iomap *iomap = &iter->iomap;
> struct xfs_inode *ip = XFS_I(inode);
> + struct folio *folio;
>
> + folio = iomap_get_folio(iter, pos);
> + if (IS_ERR(folio))
> + return folio;
> +
> + /*
> + * Now that we have a locked folio, we need to check that the iomap we
> + * have cached is not stale. The inode extent mapping can change due to
> + * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and
> + * memory reclaim could have reclaimed a previously partially written
> + * page at this index after IO completion before this write reaches
> + * this file offset) and hence we could do the wrong thing here (zero a
> + * page range incorrectly or fail to zero) and corrupt data.
> + */
> if (iomap->validity_cookie !=
> xfs_iomap_inode_sequence(ip, iomap->flags)) {
> trace_xfs_iomap_invalid(ip, iomap);
> - return false;
> + folio_unlock(folio);
> + folio_put(folio);
> + return ERR_PTR(-ESTALE);
> }
>
> XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
> - return true;
> + return folio;
> }
>
> const struct iomap_page_ops xfs_iomap_page_ops = {
> - .iomap_valid = xfs_iomap_valid,
> + .get_folio = xfs_get_folio,
> };
>
> int
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index dd3575ada5d1..6f8e3321e475 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> * When get_folio succeeds, put_folio will always be called to do any
> * cleanup work necessary. put_folio is responsible for unlocking and putting
> * @folio.
> + *
> + * When an iomap is created, the filesystem can store internal state (e.g., a
> + * sequence number) in iomap->validity_cookie. When it then detects in the

I would reword this to:

"When an iomap is created, the filesystem can store internal state (e.g.
sequence number) in iomap->validity_cookie. The get_folio handler can
use this validity cookie to detect if the iomap is no longer up to date
and needs to be refreshed. If this is the case, the function should
return ERR_PTR(-ESTALE) to retry the operation with a fresh mapping."

--D

> + * get_folio handler that the iomap is no longer up to date and needs to be
> + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
> */
> struct iomap_page_ops {
> struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
> unsigned len);
> void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> struct folio *folio);
> -
> - /*
> - * Check that the cached iomap still maps correctly to the filesystem's
> - * internal extent map. FS internal extent maps can change while iomap
> - * is iterating a cached iomap, so this hook allows iomap to detect that
> - * the iomap needs to be refreshed during a long running write
> - * operation.
> - *
> - * The filesystem can store internal state (e.g. a sequence number) in
> - * iomap->validity_cookie when the iomap is first mapped to be able to
> - * detect changes between mapping time and whenever .iomap_valid() is
> - * called.
> - *
> - * This is called with the folio over the specified file position held
> - * locked by the iomap code.
> - */
> - bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
> };
>
> /*
> --
> 2.38.1
>

2023-01-04 18:01:09

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 8/9] iomap: Rename page_ops to folio_ops

On Sat, Dec 31, 2022 at 04:09:18PM +0100, Andreas Gruenbacher wrote:
> The operations in struct page_ops all operate on folios, so rename
> struct page_ops to struct folio_ops.
>
> Signed-off-by: Andreas Gruenbacher <[email protected]>

Yup.
Reviewed-by: Darrick J. Wong <[email protected]>

--D

> ---
> fs/gfs2/bmap.c | 4 ++--
> fs/iomap/buffered-io.c | 12 ++++++------
> fs/xfs/xfs_iomap.c | 4 ++--
> include/linux/iomap.h | 14 +++++++-------
> 4 files changed, 17 insertions(+), 17 deletions(-)
>
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index d3adb715ac8c..e191ecfb1fde 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -997,7 +997,7 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
> gfs2_trans_end(sdp);
> }
>
> -static const struct iomap_page_ops gfs2_iomap_page_ops = {
> +static const struct iomap_folio_ops gfs2_iomap_folio_ops = {
> .get_folio = gfs2_iomap_get_folio,
> .put_folio = gfs2_iomap_put_folio,
> };
> @@ -1075,7 +1075,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
> }
>
> if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))
> - iomap->page_ops = &gfs2_iomap_page_ops;
> + iomap->folio_ops = &gfs2_iomap_folio_ops;
> return 0;
>
> out_trans_end:
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index df6fca11f18c..c4a7aef2a272 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -605,10 +605,10 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
> static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
> struct folio *folio)
> {
> - const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> + const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
>
> - if (page_ops && page_ops->put_folio) {
> - page_ops->put_folio(iter->inode, pos, ret, folio);
> + if (folio_ops && folio_ops->put_folio) {
> + folio_ops->put_folio(iter->inode, pos, ret, folio);
> } else {
> folio_unlock(folio);
> folio_put(folio);
> @@ -627,7 +627,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
> static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> size_t len, struct folio **foliop)
> {
> - const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> + const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
> const struct iomap *srcmap = iomap_iter_srcmap(iter);
> struct folio *folio;
> int status;
> @@ -642,8 +642,8 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> if (!mapping_large_folio_support(iter->inode->i_mapping))
> len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
>
> - if (page_ops && page_ops->get_folio)
> - folio = page_ops->get_folio(iter, pos, len);
> + if (folio_ops && folio_ops->get_folio)
> + folio = folio_ops->get_folio(iter, pos, len);
> else
> folio = iomap_get_folio(iter, pos);
> if (IS_ERR(folio)) {
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index d0bf99539180..5bddf31e21eb 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -98,7 +98,7 @@ xfs_get_folio(
> return folio;
> }
>
> -const struct iomap_page_ops xfs_iomap_page_ops = {
> +const struct iomap_folio_ops xfs_iomap_folio_ops = {
> .get_folio = xfs_get_folio,
> };
>
> @@ -148,7 +148,7 @@ xfs_bmbt_to_iomap(
> iomap->flags |= IOMAP_F_DIRTY;
>
> iomap->validity_cookie = sequence_cookie;
> - iomap->page_ops = &xfs_iomap_page_ops;
> + iomap->folio_ops = &xfs_iomap_folio_ops;
> return 0;
> }
>
> diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> index 6f8e3321e475..2e2be828af86 100644
> --- a/include/linux/iomap.h
> +++ b/include/linux/iomap.h
> @@ -86,7 +86,7 @@ struct vm_fault;
> */
> #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */
>
> -struct iomap_page_ops;
> +struct iomap_folio_ops;
>
> struct iomap {
> u64 addr; /* disk offset of mapping, bytes */
> @@ -98,7 +98,7 @@ struct iomap {
> struct dax_device *dax_dev; /* dax_dev for dax operations */
> void *inline_data;
> void *private; /* filesystem private */
> - const struct iomap_page_ops *page_ops;
> + const struct iomap_folio_ops *folio_ops;
> u64 validity_cookie; /* used with .iomap_valid() */
> };
>
> @@ -126,10 +126,10 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> }
>
> /*
> - * When a filesystem sets page_ops in an iomap mapping it returns, get_folio
> - * and put_folio will be called for each page written to. This only applies to
> - * buffered writes as unbuffered writes will not typically have pages
> - * associated with them.
> + * When a filesystem sets folio_ops in an iomap mapping it returns,
> + * get_folio and put_folio will be called for each page written to. This
> + * only applies to buffered writes as unbuffered writes will not typically have
> + * pages associated with them.
> *
> * When get_folio succeeds, put_folio will always be called to do any
> * cleanup work necessary. put_folio is responsible for unlocking and putting
> @@ -140,7 +140,7 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> * get_folio handler that the iomap is no longer up to date and needs to be
> * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
> */
> -struct iomap_page_ops {
> +struct iomap_folio_ops {
> struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
> unsigned len);
> void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> --
> 2.38.1
>

2023-01-04 19:01:31

by Andreas Grünbacher

[permalink] [raw]
Subject: Re: [PATCH v5 3/9] iomap: Rename page_done handler to put_folio

Am Mi., 4. Jan. 2023 um 18:47 Uhr schrieb Darrick J. Wong <[email protected]>:
>
> On Sat, Dec 31, 2022 at 04:09:13PM +0100, Andreas Gruenbacher wrote:
> > The ->page_done() handler in struct iomap_page_ops is now somewhat
> > misnamed in that it mainly deals with unlocking and putting a folio, so
> > rename it to ->put_folio().
> >
> > Signed-off-by: Andreas Gruenbacher <[email protected]>
> > ---
> > fs/gfs2/bmap.c | 4 ++--
> > fs/iomap/buffered-io.c | 4 ++--
> > include/linux/iomap.h | 10 +++++-----
> > 3 files changed, 9 insertions(+), 9 deletions(-)
> >
> > diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> > index 46206286ad42..0c041459677b 100644
> > --- a/fs/gfs2/bmap.c
> > +++ b/fs/gfs2/bmap.c
> > @@ -967,7 +967,7 @@ static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
> > return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
> > }
> >
> > -static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
> > +static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
> > unsigned copied, struct folio *folio)
> > {
> > struct gfs2_trans *tr = current->journal_info;
> > @@ -994,7 +994,7 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
> >
> > static const struct iomap_page_ops gfs2_iomap_page_ops = {
> > .page_prepare = gfs2_iomap_page_prepare,
> > - .page_done = gfs2_iomap_page_done,
> > + .put_folio = gfs2_iomap_put_folio,
> > };
> >
> > static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index e13d5694e299..2a9bab4f3c79 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -580,8 +580,8 @@ static void iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
> > {
> > const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> >
> > - if (page_ops && page_ops->page_done) {
> > - page_ops->page_done(iter->inode, pos, ret, folio);
> > + if (page_ops && page_ops->put_folio) {
> > + page_ops->put_folio(iter->inode, pos, ret, folio);
> > } else if (folio) {
> > folio_unlock(folio);
> > folio_put(folio);
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index 743e2a909162..10ec36f373f4 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -126,18 +126,18 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> >
> > /*
> > * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare
> > - * and page_done will be called for each page written to. This only applies to
> > + * and put_folio will be called for each page written to. This only applies to
>
> "...for each folio written to."

Ah, yes.

> With that fixed,
> Reviewed-by: Darrick J. Wong <[email protected]>
>
> --D
>
>
> > * buffered writes as unbuffered writes will not typically have pages

And here, it should be "folios" as well I'd say.

Thanks,
Andreas

> > * associated with them.
> > *
> > - * When page_prepare succeeds, page_done will always be called to do any
> > - * cleanup work necessary. In that page_done call, @folio will be NULL if the
> > - * associated folio could not be obtained. When folio is not NULL, page_done
> > + * When page_prepare succeeds, put_folio will always be called to do any
> > + * cleanup work necessary. In that put_folio call, @folio will be NULL if the
> > + * associated folio could not be obtained. When folio is not NULL, put_folio
> > * is responsible for unlocking and putting the folio.
> > */
> > struct iomap_page_ops {
> > int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
> > - void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
> > + void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> > struct folio *folio);
> >
> > /*
> > --
> > 2.38.1
> >

2023-01-04 19:08:02

by Andreas Grünbacher

[permalink] [raw]
Subject: Re: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

Am Mi., 4. Jan. 2023 um 19:00 Uhr schrieb Darrick J. Wong <[email protected]>:
> On Sat, Dec 31, 2022 at 04:09:17PM +0100, Andreas Gruenbacher wrote:
> > Eliminate the ->iomap_valid() handler by switching to a ->get_folio()
> > handler and validating the mapping there.
> >
> > Signed-off-by: Andreas Gruenbacher <[email protected]>
> > ---
> > fs/iomap/buffered-io.c | 25 +++++--------------------
> > fs/xfs/xfs_iomap.c | 37 ++++++++++++++++++++++++++-----------
> > include/linux/iomap.h | 22 +++++-----------------
> > 3 files changed, 36 insertions(+), 48 deletions(-)
> >
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 4f363d42dbaf..df6fca11f18c 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -630,7 +630,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> > const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
> > const struct iomap *srcmap = iomap_iter_srcmap(iter);
> > struct folio *folio;
> > - int status = 0;
> > + int status;
> >
> > BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
> > if (srcmap != &iter->iomap)
> > @@ -646,27 +646,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
> > folio = page_ops->get_folio(iter, pos, len);
> > else
> > folio = iomap_get_folio(iter, pos);
> > - if (IS_ERR(folio))
> > - return PTR_ERR(folio);
> > -
> > - /*
> > - * Now we have a locked folio, before we do anything with it we need to
> > - * check that the iomap we have cached is not stale. The inode extent
> > - * mapping can change due to concurrent IO in flight (e.g.
> > - * IOMAP_UNWRITTEN state can change and memory reclaim could have
> > - * reclaimed a previously partially written page at this index after IO
> > - * completion before this write reaches this file offset) and hence we
> > - * could do the wrong thing here (zero a page range incorrectly or fail
> > - * to zero) and corrupt data.
> > - */
> > - if (page_ops && page_ops->iomap_valid) {
> > - bool iomap_valid = page_ops->iomap_valid(iter->inode,
> > - &iter->iomap);
> > - if (!iomap_valid) {
> > + if (IS_ERR(folio)) {
> > + if (folio == ERR_PTR(-ESTALE)) {
> > iter->iomap.flags |= IOMAP_F_STALE;
> > - status = 0;
> > - goto out_unlock;
> > + return 0;
> > }
> > + return PTR_ERR(folio);
>
> I wonder if this should be reworked a bit to reduce indenting:
>
> if (PTR_ERR(folio) == -ESTALE) {
> iter->iomap.flags |= IOMAP_F_STALE;
> return 0;
> }
> if (IS_ERR(folio))
> return PTR_ERR(folio);
>
> But I don't have any strong opinions about that.

This is a relatively hot path; the compiler would have to convert the
flattened version back to the nested version for the best possible
result to avoid a redundant check. Not sure it would always do that.

> > }
> >
> > if (pos + len > folio_pos(folio) + folio_size(folio))
> > diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> > index 669c1bc5c3a7..d0bf99539180 100644
> > --- a/fs/xfs/xfs_iomap.c
> > +++ b/fs/xfs/xfs_iomap.c
> > @@ -62,29 +62,44 @@ xfs_iomap_inode_sequence(
> > return cookie | READ_ONCE(ip->i_df.if_seq);
> > }
> >
> > -/*
> > - * Check that the iomap passed to us is still valid for the given offset and
> > - * length.
> > - */
> > -static bool
> > -xfs_iomap_valid(
> > - struct inode *inode,
> > - const struct iomap *iomap)
> > +static struct folio *
> > +xfs_get_folio(
> > + struct iomap_iter *iter,
> > + loff_t pos,
> > + unsigned len)
> > {
> > + struct inode *inode = iter->inode;
> > + struct iomap *iomap = &iter->iomap;
> > struct xfs_inode *ip = XFS_I(inode);
> > + struct folio *folio;
> >
> > + folio = iomap_get_folio(iter, pos);
> > + if (IS_ERR(folio))
> > + return folio;
> > +
> > + /*
> > + * Now that we have a locked folio, we need to check that the iomap we
> > + * have cached is not stale. The inode extent mapping can change due to
> > + * concurrent IO in flight (e.g., IOMAP_UNWRITTEN state can change and
> > + * memory reclaim could have reclaimed a previously partially written
> > + * page at this index after IO completion before this write reaches
> > + * this file offset) and hence we could do the wrong thing here (zero a
> > + * page range incorrectly or fail to zero) and corrupt data.
> > + */
> > if (iomap->validity_cookie !=
> > xfs_iomap_inode_sequence(ip, iomap->flags)) {
> > trace_xfs_iomap_invalid(ip, iomap);
> > - return false;
> > + folio_unlock(folio);
> > + folio_put(folio);
> > + return ERR_PTR(-ESTALE);
> > }
> >
> > XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
> > - return true;
> > + return folio;
> > }
> >
> > const struct iomap_page_ops xfs_iomap_page_ops = {
> > - .iomap_valid = xfs_iomap_valid,
> > + .get_folio = xfs_get_folio,
> > };
> >
> > int
> > diff --git a/include/linux/iomap.h b/include/linux/iomap.h
> > index dd3575ada5d1..6f8e3321e475 100644
> > --- a/include/linux/iomap.h
> > +++ b/include/linux/iomap.h
> > @@ -134,29 +134,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap)
> > * When get_folio succeeds, put_folio will always be called to do any
> > * cleanup work necessary. put_folio is responsible for unlocking and putting
> > * @folio.
> > + *
> > + * When an iomap is created, the filesystem can store internal state (e.g., a
> > + * sequence number) in iomap->validity_cookie. When it then detects in the
>
> I would reword this to:
>
> "When an iomap is created, the filesystem can store internal state (e.g.
> sequence number) in iomap->validity_cookie. The get_folio handler can
> use this validity cookie to detect if the iomap is no longer up to date
> and needs to be refreshed. If this is the case, the function should
> return ERR_PTR(-ESTALE) to retry the operation with a fresh mapping."

Yes, but "e.g." is always followed by a comma and it's "when", not
"if" here. So how about:

* When an iomap is created, the filesystem can store internal state (e.g., a
* sequence number) in iomap->validity_cookie. The get_folio handler can use
* this validity cookie to detect when the iomap needs to be refreshed because
* it is no longer up to date. In that case, the function should return
* ERR_PTR(-ESTALE) to retry the operation with a fresh mapping.

I've incorporated all your feedback into this branch for now:

https://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git/log/?h=iomap-race

Thank you,
Andreas


> --D
>
> > + * get_folio handler that the iomap is no longer up to date and needs to be
> > + * refreshed, it can return ERR_PTR(-ESTALE) to trigger a retry.
> > */
> > struct iomap_page_ops {
> > struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos,
> > unsigned len);
> > void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied,
> > struct folio *folio);
> > -
> > - /*
> > - * Check that the cached iomap still maps correctly to the filesystem's
> > - * internal extent map. FS internal extent maps can change while iomap
> > - * is iterating a cached iomap, so this hook allows iomap to detect that
> > - * the iomap needs to be refreshed during a long running write
> > - * operation.
> > - *
> > - * The filesystem can store internal state (e.g. a sequence number) in
> > - * iomap->validity_cookie when the iomap is first mapped to be able to
> > - * detect changes between mapping time and whenever .iomap_valid() is
> > - * called.
> > - *
> > - * This is called with the folio over the specified file position held
> > - * locked by the iomap code.
> > - */
> > - bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
> > };
> >
> > /*
> > --
> > 2.38.1
> >

2023-01-04 19:15:36

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> I wonder if this should be reworked a bit to reduce indenting:
>
> if (PTR_ERR(folio) == -ESTALE) {

FYI this is a bad habit to be in. The compiler can optimise

if (folio == ERR_PTR(-ESTALE))

better than it can optimise the other way around.

2023-01-08 17:30:09

by Christoph Hellwig

[permalink] [raw]

2023-01-08 17:31:14

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v5 4/9] iomap: Add iomap_get_folio helper

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-01-08 17:33:00

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v5 6/9] iomap: Rename page_prepare handler to get_folio

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

although it might make sense to just do the rename in patch 5 which
changes the signature to return a folio?

2023-01-08 17:33:01

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v5 5/9] iomap/gfs2: Get page in page_prepare handler

> + if (page_ops && page_ops->page_prepare)
> + folio = page_ops->page_prepare(iter, pos, len);
> + else
> + folio = iomap_get_folio(iter, pos);
> + if (IS_ERR(folio))
> return PTR_ERR(folio);

I'd love to have a iomap_get_folio helper for this sequence so that
we match iomap_put_folio. That would require renaming the current
iomap_get_folio to __iomap_get_folio.

Otherwise this looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-01-08 17:34:05

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote:
> On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> > I wonder if this should be reworked a bit to reduce indenting:
> >
> > if (PTR_ERR(folio) == -ESTALE) {
>
> FYI this is a bad habit to be in. The compiler can optimise
>
> if (folio == ERR_PTR(-ESTALE))
>
> better than it can optimise the other way around.

Yes. I think doing the recording that Darrick suggested combined
with this style would be best:

if (folio == ERR_PTR(-ESTALE)) {
iter->iomap.flags |= IOMAP_F_STALE;
return 0;
}
if (IS_ERR(folio))
return PTR_ERR(folio);

2023-01-08 17:34:35

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v5 8/9] iomap: Rename page_ops to folio_ops

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-01-08 17:34:36

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v5 9/9] xfs: Make xfs_iomap_folio_ops static

On Sat, Dec 31, 2022 at 04:09:19PM +0100, Andreas Gruenbacher wrote:
> Variable xfs_iomap_folio_ops isn't used outside xfs_iomap.c, so it
> should be static.

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2023-01-08 18:58:50

by Andreas Gruenbacher

[permalink] [raw]
Subject: Re: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

On Sun, Jan 8, 2023 at 6:32 PM Christoph Hellwig <[email protected]> wrote:
> On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote:
> > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> > > I wonder if this should be reworked a bit to reduce indenting:
> > >
> > > if (PTR_ERR(folio) == -ESTALE) {
> >
> > FYI this is a bad habit to be in. The compiler can optimise
> >
> > if (folio == ERR_PTR(-ESTALE))
> >
> > better than it can optimise the other way around.
>
> Yes. I think doing the recording that Darrick suggested combined
> with this style would be best:
>
> if (folio == ERR_PTR(-ESTALE)) {
> iter->iomap.flags |= IOMAP_F_STALE;
> return 0;
> }
> if (IS_ERR(folio))
> return PTR_ERR(folio);

Again, I've implemented this as a nested if because the -ESTALE case
should be pretty rare, and if we unnest, we end up with an additional
check on the main code path. To be specific, the "before" code here on
my current system is this:

------------------------------------
if (IS_ERR(folio)) {
22ad: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp
22b4: 0f 87 bf 03 00 00 ja 2679 <iomap_write_begin+0x499>
return 0;
}
return PTR_ERR(folio);
}
[...]
2679: 89 e8 mov %ebp,%eax
if (folio == ERR_PTR(-ESTALE)) {
267b: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp
267f: 0f 85 b7 fc ff ff jne 233c <iomap_write_begin+0x15c>
iter->iomap.flags |= IOMAP_F_STALE;
2685: 66 81 4b 42 00 02 orw $0x200,0x42(%rbx)
return 0;
268b: e9 aa fc ff ff jmp 233a <iomap_write_begin+0x15a>
------------------------------------

While the "after" code is this:

------------------------------------
if (folio == ERR_PTR(-ESTALE)) {
22ad: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp
22b1: 0f 84 bc 00 00 00 je 2373 <iomap_write_begin+0x193>
iter->iomap.flags |= IOMAP_F_STALE;
return 0;
}
if (IS_ERR(folio))
return PTR_ERR(folio);
22b7: 89 e8 mov %ebp,%eax
if (IS_ERR(folio))
22b9: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp
22c0: 0f 87 82 00 00 00 ja 2348 <iomap_write_begin+0x168>
------------------------------------

The compiler isn't smart enough to re-nest the ifs by recognizing that
folio == ERR_PTR(-ESTALE) is a subset of IS_ERR(folio).

So do you still insist on that un-nesting even though it produces worse code?

Thanks,
Andreas

2023-01-08 19:43:17

by Andreas Gruenbacher

[permalink] [raw]
Subject: Re: [PATCH v5 5/9] iomap/gfs2: Get page in page_prepare handler

On Sun, Jan 8, 2023 at 6:29 PM Christoph Hellwig <[email protected]> wrote:
> > + if (page_ops && page_ops->page_prepare)
> > + folio = page_ops->page_prepare(iter, pos, len);
> > + else
> > + folio = iomap_get_folio(iter, pos);
> > + if (IS_ERR(folio))
> > return PTR_ERR(folio);
>
> I'd love to have a iomap_get_folio helper for this sequence so that
> we match iomap_put_folio. That would require renaming the current
> iomap_get_folio to __iomap_get_folio.

That's the wrong way around though. iomap_get_folio() is exported to
filesystems, so if at all, we should rename iomap_put_folio() which is
a static function only used in the iomap code.

I'll post an update.

Thanks,
Andreas

2023-01-10 22:01:48

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH v5 7/9] iomap/xfs: Eliminate the iomap_valid handler

On Sun, Jan 08, 2023 at 07:50:01PM +0100, Andreas Gruenbacher wrote:
> On Sun, Jan 8, 2023 at 6:32 PM Christoph Hellwig <[email protected]> wrote:
> > On Wed, Jan 04, 2023 at 07:08:17PM +0000, Matthew Wilcox wrote:
> > > On Wed, Jan 04, 2023 at 09:53:17AM -0800, Darrick J. Wong wrote:
> > > > I wonder if this should be reworked a bit to reduce indenting:
> > > >
> > > > if (PTR_ERR(folio) == -ESTALE) {
> > >
> > > FYI this is a bad habit to be in. The compiler can optimise
> > >
> > > if (folio == ERR_PTR(-ESTALE))
> > >
> > > better than it can optimise the other way around.
> >
> > Yes. I think doing the recording that Darrick suggested combined
> > with this style would be best:
> >
> > if (folio == ERR_PTR(-ESTALE)) {
> > iter->iomap.flags |= IOMAP_F_STALE;
> > return 0;
> > }
> > if (IS_ERR(folio))
> > return PTR_ERR(folio);
>
> Again, I've implemented this as a nested if because the -ESTALE case
> should be pretty rare, and if we unnest, we end up with an additional
> check on the main code path. To be specific, the "before" code here on
> my current system is this:
>
> ------------------------------------
> if (IS_ERR(folio)) {
> 22ad: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp
> 22b4: 0f 87 bf 03 00 00 ja 2679 <iomap_write_begin+0x499>
> return 0;
> }
> return PTR_ERR(folio);
> }
> [...]
> 2679: 89 e8 mov %ebp,%eax
> if (folio == ERR_PTR(-ESTALE)) {
> 267b: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp
> 267f: 0f 85 b7 fc ff ff jne 233c <iomap_write_begin+0x15c>
> iter->iomap.flags |= IOMAP_F_STALE;
> 2685: 66 81 4b 42 00 02 orw $0x200,0x42(%rbx)
> return 0;
> 268b: e9 aa fc ff ff jmp 233a <iomap_write_begin+0x15a>
> ------------------------------------
>
> While the "after" code is this:
>
> ------------------------------------
> if (folio == ERR_PTR(-ESTALE)) {
> 22ad: 48 83 fd 8c cmp $0xffffffffffffff8c,%rbp
> 22b1: 0f 84 bc 00 00 00 je 2373 <iomap_write_begin+0x193>
> iter->iomap.flags |= IOMAP_F_STALE;
> return 0;
> }
> if (IS_ERR(folio))
> return PTR_ERR(folio);
> 22b7: 89 e8 mov %ebp,%eax
> if (IS_ERR(folio))
> 22b9: 48 81 fd 00 f0 ff ff cmp $0xfffffffffffff000,%rbp
> 22c0: 0f 87 82 00 00 00 ja 2348 <iomap_write_begin+0x168>
> ------------------------------------
>
> The compiler isn't smart enough to re-nest the ifs by recognizing that
> folio == ERR_PTR(-ESTALE) is a subset of IS_ERR(folio).
>
> So do you still insist on that un-nesting even though it produces worse code?

Me? Not anymore. :)

--D

> Thanks,
> Andreas
>