It was decided to remove the page_endio() as per the previous RFC
discussion[1] of this series and move that functionality into the caller
itself. One of the side benefit of doing that is the callers have been
modified to directly work on folios as page_endio() already worked on
folios.
mpage changes were tested with a simple boot testing. orangefs was
tested by Mike Marshall (No code changes since he tested). Zram was
only build tested. No functional changes were introduced as a part of
this AFAIK.
Changes since RFC 2[2]:
- Call bio_put in zram bio end io handler (Still not Acked by hch[3])
- Call folio_set_error in mpage read endio error path (Willy)
- Directly call folio->mapping in mpage write endio error path (Willy)
[1] https://lore.kernel.org/linux-mm/[email protected]/
[2] https://lore.kernel.org/linux-mm/[email protected]/
[3] https://lore.kernel.org/linux-mm/[email protected]/
Pankaj Raghav (5):
zram: remove the call to page_endio in the bio end_io handler
orangefs: use folios in orangefs_readahead
mpage: split bi_end_io callback for reads and writes
mpage: use folios in bio end_io handler
filemap: remove page_endio()
drivers/block/zram/zram_drv.c | 8 ++------
fs/mpage.c | 38 +++++++++++++++++++++++++++--------
fs/orangefs/inode.c | 9 +++++----
include/linux/pagemap.h | 2 --
mm/filemap.c | 30 ---------------------------
5 files changed, 37 insertions(+), 50 deletions(-)
--
2.34.1
Convert orangefs_readahead() from using struct page to struct folio.
This conversion removes the call to page_endio() which is soon to be
removed, and simplifies the final page handling.
The page error flags is not required to be set in the error case as
orangefs doesn't depend on them.
Signed-off-by: Pankaj Raghav <[email protected]>
---
fs/orangefs/inode.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index aefdf1d3be7c..9014bbcc8031 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -244,7 +244,7 @@ static void orangefs_readahead(struct readahead_control *rac)
struct iov_iter iter;
struct inode *inode = rac->mapping->host;
struct xarray *i_pages;
- struct page *page;
+ struct folio *folio;
loff_t new_start = readahead_pos(rac);
int ret;
size_t new_len = 0;
@@ -275,9 +275,10 @@ static void orangefs_readahead(struct readahead_control *rac)
ret = 0;
/* clean up. */
- while ((page = readahead_page(rac))) {
- page_endio(page, false, ret);
- put_page(page);
+ while ((folio = readahead_folio(rac))) {
+ if (!ret)
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
}
}
--
2.34.1
zram_page_end_io function is called when alloc_page is used (for
partial IO) to trigger writeback from the user space. The pages used for
this operation is never locked or have the writeback set. So, it is safe
to remove the call to page_endio() function that unlocks or marks
writeback end on the page.
Rename the endio handler from zram_page_end_io to zram_read_end_io as
the call to page_endio() is removed and to associate the callback to the
operation it is used in.
Signed-off-by: Pankaj Raghav <[email protected]>
---
drivers/block/zram/zram_drv.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b7bb52f8dfbd..3300e7eda2f6 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -606,12 +606,8 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
atomic64_dec(&zram->stats.bd_count);
}
-static void zram_page_end_io(struct bio *bio)
+static void zram_read_end_io(struct bio *bio)
{
- struct page *page = bio_first_page_all(bio);
-
- page_endio(page, op_is_write(bio_op(bio)),
- blk_status_to_errno(bio->bi_status));
bio_put(bio);
}
@@ -635,7 +631,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
}
if (!parent)
- bio->bi_end_io = zram_page_end_io;
+ bio->bi_end_io = zram_read_end_io;
else
bio_chain(bio, parent);
--
2.34.1
Use folios in the bio end_io handler. This conversion does the appropriate
handling on the folios in the respective end_io callback and removes the
call to page_endio(), which is soon to be removed.
Signed-off-by: Pankaj Raghav <[email protected]>
---
fs/mpage.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/fs/mpage.c b/fs/mpage.c
index 3a545bf0f184..6f43b7c9d4de 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -45,13 +45,15 @@
*/
static void mpage_read_end_io(struct bio *bio)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
+ int err = blk_status_to_errno(bio->bi_status);
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- page_endio(page, REQ_OP_READ,
- blk_status_to_errno(bio->bi_status));
+ bio_for_each_folio_all(fi, bio) {
+ if (!err)
+ folio_mark_uptodate(fi.folio);
+ else
+ folio_set_error(fi.folio);
+ folio_unlock(fi.folio);
}
bio_put(bio);
@@ -59,13 +61,15 @@ static void mpage_read_end_io(struct bio *bio)
static void mpage_write_end_io(struct bio *bio)
{
- struct bio_vec *bv;
- struct bvec_iter_all iter_all;
+ struct folio_iter fi;
+ int err = blk_status_to_errno(bio->bi_status);
- bio_for_each_segment_all(bv, bio, iter_all) {
- struct page *page = bv->bv_page;
- page_endio(page, REQ_OP_WRITE,
- blk_status_to_errno(bio->bi_status));
+ bio_for_each_folio_all(fi, bio) {
+ if (err) {
+ folio_set_error(fi.folio);
+ mapping_set_error(fi.folio->mapping, err);
+ }
+ folio_end_writeback(fi.folio);
}
bio_put(bio);
--
2.34.1
On Tue, Mar 28, 2023 at 01:27:12PM +0200, Pankaj Raghav wrote:
> -static void zram_page_end_io(struct bio *bio)
> +static void zram_read_end_io(struct bio *bio)
> {
> - struct page *page = bio_first_page_all(bio);
> -
> - page_endio(page, op_is_write(bio_op(bio)),
> - blk_status_to_errno(bio->bi_status));
> bio_put(bio);
> }
>
> @@ -635,7 +631,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
> }
>
> if (!parent)
> - bio->bi_end_io = zram_page_end_io;
> + bio->bi_end_io = zram_read_end_io;
Can we just do:
if (!parent)
bio->bi_end_io = bio_put;
drivers/nvme/target/passthru.c does this, so it wouldn't be the first.
On Tue, Mar 28, 2023 at 01:27:13PM +0200, Pankaj Raghav wrote:
> Convert orangefs_readahead() from using struct page to struct folio.
> This conversion removes the call to page_endio() which is soon to be
> removed, and simplifies the final page handling.
>
> The page error flags is not required to be set in the error case as
> orangefs doesn't depend on them.
>
> Signed-off-by: Pankaj Raghav <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
Shouldn't Mike's tested-by be in here?
On 2023-03-28 17:21, Matthew Wilcox wrote:
> On Tue, Mar 28, 2023 at 01:27:13PM +0200, Pankaj Raghav wrote:
>> Convert orangefs_readahead() from using struct page to struct folio.
>> This conversion removes the call to page_endio() which is soon to be
>> removed, and simplifies the final page handling.
>>
>> The page error flags is not required to be set in the error case as
>> orangefs doesn't depend on them.
>>
>> Signed-off-by: Pankaj Raghav <[email protected]>
>
> Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
>
> Shouldn't Mike's tested-by be in here?
I mentioned that he tested in my cover letter as I didn't receive a Tested-by
tag from him.
On 2023-03-28 17:19, Matthew Wilcox wrote:
> On Tue, Mar 28, 2023 at 01:27:12PM +0200, Pankaj Raghav wrote:
>> -static void zram_page_end_io(struct bio *bio)
>> +static void zram_read_end_io(struct bio *bio)
>> {
>> - struct page *page = bio_first_page_all(bio);
>> -
>> - page_endio(page, op_is_write(bio_op(bio)),
>> - blk_status_to_errno(bio->bi_status));
>> bio_put(bio);
>> }
>>
>> @@ -635,7 +631,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
>> }
>>
>> if (!parent)
>> - bio->bi_end_io = zram_page_end_io;
>> + bio->bi_end_io = zram_read_end_io;
>
> Can we just do:
>
> if (!parent)
> bio->bi_end_io = bio_put;
>
Looks neat. I will wait for Christoph to comment whether just a bio_put() call
is enough in this case for non-chained bios before making this change for the
next version.
Thanks.
I thought telling you that I tested it was good :-) ...
Please do put a tested-by me in there...
-Mike
On Tue, Mar 28, 2023 at 12:02 PM Pankaj Raghav <[email protected]> wrote:
>
> On 2023-03-28 17:21, Matthew Wilcox wrote:
> > On Tue, Mar 28, 2023 at 01:27:13PM +0200, Pankaj Raghav wrote:
> >> Convert orangefs_readahead() from using struct page to struct folio.
> >> This conversion removes the call to page_endio() which is soon to be
> >> removed, and simplifies the final page handling.
> >>
> >> The page error flags is not required to be set in the error case as
> >> orangefs doesn't depend on them.
> >>
> >> Signed-off-by: Pankaj Raghav <[email protected]>
> >
> > Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
> >
> > Shouldn't Mike's tested-by be in here?
>
> I mentioned that he tested in my cover letter as I didn't receive a Tested-by
> tag from him.
On Tue, Mar 28, 2023 at 06:17:11PM +0200, Pankaj Raghav wrote:
> >> if (!parent)
> >> - bio->bi_end_io = zram_page_end_io;
> >> + bio->bi_end_io = zram_read_end_io;
> >
> > Can we just do:
> >
> > if (!parent)
> > bio->bi_end_io = bio_put;
> >
>
> Looks neat. I will wait for Christoph to comment whether just a bio_put() call
> is enough in this case for non-chained bios before making this change for the
> next version.
It is enough in the sense of keeping the previous behavior there.
It is not enough in the sense that the code is still broken as the
callers is never notified of the read completion. So I think for the
purpose of your series we're fine and can go ahead with this version
for now.
>
> Thanks.
---end quoted text---
On Tue, Mar 28, 2023 at 01:27:12PM +0200, Pankaj Raghav wrote:
> zram_page_end_io function is called when alloc_page is used (for
> partial IO) to trigger writeback from the user space. The pages used for
No, it was used with zram_rw_page since rw_page didn't carry the bio.
> this operation is never locked or have the writeback set. So, it is safe
VM had the page lock and wait to unlock.
> to remove the call to page_endio() function that unlocks or marks
> writeback end on the page.
>
> Rename the endio handler from zram_page_end_io to zram_read_end_io as
> the call to page_endio() is removed and to associate the callback to the
> operation it is used in.
Since zram removed the rw_page and IO comes with bio from now on,
IIUC, we are fine since every IO will go with chained-IO. Right?
>
> Signed-off-by: Pankaj Raghav <[email protected]>
> ---
> drivers/block/zram/zram_drv.c | 8 ++------
> 1 file changed, 2 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index b7bb52f8dfbd..3300e7eda2f6 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -606,12 +606,8 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
> atomic64_dec(&zram->stats.bd_count);
> }
>
> -static void zram_page_end_io(struct bio *bio)
> +static void zram_read_end_io(struct bio *bio)
> {
> - struct page *page = bio_first_page_all(bio);
> -
> - page_endio(page, op_is_write(bio_op(bio)),
> - blk_status_to_errno(bio->bi_status));
> bio_put(bio);
> }
>
> @@ -635,7 +631,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
> }
>
> if (!parent)
> - bio->bi_end_io = zram_page_end_io;
> + bio->bi_end_io = zram_read_end_io;
> else
> bio_chain(bio, parent);
>
> --
> 2.34.1
>
On Thu, Mar 30, 2023 at 03:51:54PM -0700, Minchan Kim wrote:
> > to remove the call to page_endio() function that unlocks or marks
> > writeback end on the page.
> >
> > Rename the endio handler from zram_page_end_io to zram_read_end_io as
> > the call to page_endio() is removed and to associate the callback to the
> > operation it is used in.
>
> Since zram removed the rw_page and IO comes with bio from now on,
> IIUC, we are fine since every IO will go with chained-IO. Right?
writeback_store callszram_bvec_read with a NULL bio, that is it just
fires off an async read without any synchronization.
On Thu, Mar 30, 2023 at 04:16:25PM -0700, Christoph Hellwig wrote:
> On Thu, Mar 30, 2023 at 03:51:54PM -0700, Minchan Kim wrote:
> > > to remove the call to page_endio() function that unlocks or marks
> > > writeback end on the page.
> > >
> > > Rename the endio handler from zram_page_end_io to zram_read_end_io as
> > > the call to page_endio() is removed and to associate the callback to the
> > > operation it is used in.
> >
> > Since zram removed the rw_page and IO comes with bio from now on,
> > IIUC, we are fine since every IO will go with chained-IO. Right?
>
> writeback_store callszram_bvec_read with a NULL bio, that is it just
> fires off an async read without any synchronization.
It should go under zram_read_from_zspool, not zram_bvec_read_from_bdev.
The ZRAM_UNDER_WB and ZRAM_WB under zram_slot_lock should synchronize.
On 2023-03-31 00:51, Minchan Kim wrote:
> On Tue, Mar 28, 2023 at 01:27:12PM +0200, Pankaj Raghav wrote:
>> zram_page_end_io function is called when alloc_page is used (for
>> partial IO) to trigger writeback from the user space. The pages used for
>
> No, it was used with zram_rw_page since rw_page didn't carry the bio.
>
>> this operation is never locked or have the writeback set. So, it is safe
>
> VM had the page lock and wait to unlock.
>
>> to remove the call to page_endio() function that unlocks or marks
>> writeback end on the page.
>>
>> Rename the endio handler from zram_page_end_io to zram_read_end_io as
>> the call to page_endio() is removed and to associate the callback to the
>> operation it is used in.
>
I revisited the code again. Let me know if I got it right.
When we trigger writeback, we will always call zram_bvec_read() only if
ZRAM_WB is not set. That means we will only call zram_read_from_zspool() in
__zram_bvec_read when parent bio set to NULL.
static ssize_t writeback_store(struct device *dev, ...
{
if (zram_test_flag(zram, index, ZRAM_WB) ||
zram_test_flag(zram, index, ZRAM_SAME) ||
zram_test_flag(zram, index, ZRAM_UNDER_WB))
goto next;
...
if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
...
}
static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
struct bio *bio, bool partial_io)
{
....
if (!zram_test_flag(zram, index, ZRAM_WB)) {
/* Slot should be locked through out the function call */
ret = zram_read_from_zspool(zram, page, index);
zram_slot_unlock(zram, index);
} else {
/* Slot should be unlocked before the function call */
zram_slot_unlock(zram, index);
ret = zram_bvec_read_from_bdev(zram, page, index, bio,
partial_io);
}
....
}
> Since zram removed the rw_page and IO comes with bio from now on,
> IIUC, we are fine since every IO will go with chained-IO. Right?
>
We will never call zram_bvec_read_from_bdev() with parent bio set to NULL. IOW, we will always
only hit the bio_chain case in read_from_bdev_async. So we could do the following?:
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b7bb52f8dfbd..2341f4009b0f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -606,15 +606,6 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
atomic64_dec(&zram->stats.bd_count);
}
-static void zram_page_end_io(struct bio *bio)
-{
- struct page *page = bio_first_page_all(bio);
-
- page_endio(page, op_is_write(bio_op(bio)),
- blk_status_to_errno(bio->bi_status));
- bio_put(bio);
-}
-
/*
* Returns 1 if the submission is successful.
*/
@@ -634,9 +625,7 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
return -EIO;
}
- if (!parent)
- bio->bi_end_io = zram_page_end_io;
- else
+ if (parent)
bio_chain(bio, parent);
submit_bio(bio);