From: ZhangPeng <[email protected]>
This patch series converts several functions in page_io.c to use a
folio, which can remove several implicit calls to compound_head().
ZhangPeng (6):
mm/page_io: use a folio in __end_swap_bio_read()
mm/page_io: use a folio in sio_read_complete()
mm/page_io: use a folio in swap_writepage_bdev_sync()
mm/page_io: use a folio in swap_writepage_bdev_async()
mm/page_io: convert count_swpout_vm_event() to take in a folio
mm/page_io: convert bio_associate_blkg_from_page() to take in a folio
mm/page_io.c | 56 +++++++++++++++++++++++++++-------------------------
1 file changed, 29 insertions(+), 27 deletions(-)
--
2.25.1
From: ZhangPeng <[email protected]>
Saves one implicit call to compound_head().
Signed-off-by: ZhangPeng <[email protected]>
---
mm/page_io.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c
index f24f814b8dca..84cc9e652451 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -355,6 +355,7 @@ static void swap_writepage_bdev_async(struct page *page,
struct writeback_control *wbc, struct swap_info_struct *sis)
{
struct bio *bio;
+ struct folio *folio = page_folio(page);
bio = bio_alloc(sis->bdev, 1,
REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
@@ -365,8 +366,8 @@ static void swap_writepage_bdev_async(struct page *page,
bio_associate_blkg_from_page(bio, page);
count_swpout_vm_event(page);
- set_page_writeback(page);
- unlock_page(page);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
submit_bio(bio);
}
--
2.25.1
From: ZhangPeng <[email protected]>
Saves three implicit call to compound_head().
Signed-off-by: ZhangPeng <[email protected]>
---
mm/page_io.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c
index 684cd3c7b59b..ebf431e5f538 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -58,18 +58,18 @@ static void end_swap_bio_write(struct bio *bio)
static void __end_swap_bio_read(struct bio *bio)
{
- struct page *page = bio_first_page_all(bio);
+ struct folio *folio = page_folio(bio_first_page_all(bio));
if (bio->bi_status) {
- SetPageError(page);
- ClearPageUptodate(page);
+ folio_set_error(folio);
+ folio_clear_uptodate(folio);
pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
(unsigned long long)bio->bi_iter.bi_sector);
} else {
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
- unlock_page(page);
+ folio_unlock(folio);
}
static void end_swap_bio_read(struct bio *bio)
--
2.25.1
From: ZhangPeng <[email protected]>
Convert bio_associate_blkg_from_page() to take in a folio. We can remove
two implicit calls to compound_head() by taking in a folio.
Signed-off-by: ZhangPeng <[email protected]>
---
mm/page_io.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c
index 66e8403bf96e..3c1fede46bd9 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -218,12 +218,12 @@ static inline void count_swpout_vm_event(struct folio *folio)
}
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio)
{
struct cgroup_subsys_state *css;
struct mem_cgroup *memcg;
- memcg = page_memcg(page);
+ memcg = folio_memcg(folio);
if (!memcg)
return;
@@ -233,7 +233,7 @@ static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
rcu_read_unlock();
}
#else
-#define bio_associate_blkg_from_page(bio, page) do { } while (0)
+#define bio_associate_blkg_from_page(bio, folio) do { } while (0)
#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
struct swap_iocb {
@@ -341,7 +341,7 @@ static void swap_writepage_bdev_sync(struct page *page,
bio.bi_iter.bi_sector = swap_page_sector(page);
__bio_add_page(&bio, page, thp_size(page), 0);
- bio_associate_blkg_from_page(&bio, page);
+ bio_associate_blkg_from_page(&bio, folio);
count_swpout_vm_event(folio);
folio_start_writeback(folio);
@@ -364,7 +364,7 @@ static void swap_writepage_bdev_async(struct page *page,
bio->bi_end_io = end_swap_bio_write;
__bio_add_page(bio, page, thp_size(page), 0);
- bio_associate_blkg_from_page(bio, page);
+ bio_associate_blkg_from_page(bio, folio);
count_swpout_vm_event(folio);
folio_start_writeback(folio);
folio_unlock(folio);
--
2.25.1
On Mon, Jul 17, 2023 at 09:26:00PM +0800, Peng Zhang wrote:
> From: ZhangPeng <[email protected]>
>
> Saves one implicit call to compound_head().
>
> Signed-off-by: ZhangPeng <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
From: ZhangPeng <[email protected]>
Saves three implicit call to compound_head().
Signed-off-by: ZhangPeng <[email protected]>
---
mm/page_io.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c
index ebf431e5f538..438d0c7c2194 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -406,19 +406,19 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
if (ret == sio->len) {
for (p = 0; p < sio->pages; p++) {
- struct page *page = sio->bvec[p].bv_page;
+ struct folio *folio = page_folio(sio->bvec[p].bv_page);
- SetPageUptodate(page);
- unlock_page(page);
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
}
count_vm_events(PSWPIN, sio->pages);
} else {
for (p = 0; p < sio->pages; p++) {
- struct page *page = sio->bvec[p].bv_page;
+ struct folio *folio = page_folio(sio->bvec[p].bv_page);
- SetPageError(page);
- ClearPageUptodate(page);
- unlock_page(page);
+ folio_set_error(folio);
+ folio_clear_uptodate(folio);
+ folio_unlock(folio);
}
pr_alert_ratelimited("Read-error on swap-device\n");
}
--
2.25.1
On Mon, Jul 17, 2023 at 09:25:57PM +0800, Peng Zhang wrote:
> +++ b/mm/page_io.c
> @@ -58,18 +58,18 @@ static void end_swap_bio_write(struct bio *bio)
>
> static void __end_swap_bio_read(struct bio *bio)
> {
> - struct page *page = bio_first_page_all(bio);
> + struct folio *folio = page_folio(bio_first_page_all(bio));
Should we have a bio_first_folio_all()? It wouldn't save any calls to
compound_head(), but it's slightly easier to use.
> if (bio->bi_status) {
> - SetPageError(page);
> - ClearPageUptodate(page);
> + folio_set_error(folio);
I appreciate this is a 1:1 conversion, but maybe we could think about
this a bit. Is there anybody who checks the
PageError()/folio_test_error() for this page/folio?
> + folio_clear_uptodate(folio);
Similarly ... surely the folio is already !uptodate, so we don't need to
clear the flag?
On Mon, Jul 17, 2023 at 09:26:02PM +0800, Peng Zhang wrote:
> Convert bio_associate_blkg_from_page() to take in a folio. We can remove
> two implicit calls to compound_head() by taking in a folio.
>
> Signed-off-by: ZhangPeng <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
On Mon, Jul 17, 2023 at 09:25:58PM +0800, Peng Zhang wrote:
> +++ b/mm/page_io.c
> @@ -406,19 +406,19 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
>
> if (ret == sio->len) {
> for (p = 0; p < sio->pages; p++) {
> - struct page *page = sio->bvec[p].bv_page;
> + struct folio *folio = page_folio(sio->bvec[p].bv_page);
>
> - SetPageUptodate(page);
> - unlock_page(page);
> + folio_mark_uptodate(folio);
> + folio_unlock(folio);
> }
I'm kind of shocked this works today. Usually bvecs coalesce adjacent
pages into a single entry, so you need to use a real iterator like
bio_for_each_folio_all() to extract individual pages from a bvec.
Maybe the sio bvec is constructed inefficiently.
I think Kent had some bvec folio iterators in progress?
> count_vm_events(PSWPIN, sio->pages);
> } else {
> for (p = 0; p < sio->pages; p++) {
> - struct page *page = sio->bvec[p].bv_page;
> + struct folio *folio = page_folio(sio->bvec[p].bv_page);
>
> - SetPageError(page);
> - ClearPageUptodate(page);
> - unlock_page(page);
> + folio_set_error(folio);
> + folio_clear_uptodate(folio);
> + folio_unlock(folio);
Similar questions to the last patch -- who checks the error flag on this
page/folio, and isn't the folio already !uptodate?
> }
> pr_alert_ratelimited("Read-error on swap-device\n");
> }
> --
> 2.25.1
>
From: ZhangPeng <[email protected]>
Convert count_swpout_vm_event() to take in a folio. We can remove five
implicit calls to compound_head() by taking in a folio.
Signed-off-by: ZhangPeng <[email protected]>
---
mm/page_io.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/mm/page_io.c b/mm/page_io.c
index 84cc9e652451..66e8403bf96e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -208,13 +208,13 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
return 0;
}
-static inline void count_swpout_vm_event(struct page *page)
+static inline void count_swpout_vm_event(struct folio *folio)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (unlikely(PageTransHuge(page)))
+ if (unlikely(folio_test_large(folio)))
count_vm_event(THP_SWPOUT);
#endif
- count_vm_events(PSWPOUT, thp_nr_pages(page));
+ count_vm_events(PSWPOUT, folio_nr_pages(folio));
}
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
@@ -283,7 +283,7 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
}
} else {
for (p = 0; p < sio->pages; p++)
- count_swpout_vm_event(sio->bvec[p].bv_page);
+ count_swpout_vm_event(page_folio(sio->bvec[p].bv_page));
}
for (p = 0; p < sio->pages; p++)
@@ -342,7 +342,7 @@ static void swap_writepage_bdev_sync(struct page *page,
__bio_add_page(&bio, page, thp_size(page), 0);
bio_associate_blkg_from_page(&bio, page);
- count_swpout_vm_event(page);
+ count_swpout_vm_event(folio);
folio_start_writeback(folio);
folio_unlock(folio);
@@ -365,7 +365,7 @@ static void swap_writepage_bdev_async(struct page *page,
__bio_add_page(bio, page, thp_size(page), 0);
bio_associate_blkg_from_page(bio, page);
- count_swpout_vm_event(page);
+ count_swpout_vm_event(folio);
folio_start_writeback(folio);
folio_unlock(folio);
submit_bio(bio);
--
2.25.1
On Mon, Jul 17, 2023 at 09:26:01PM +0800, Peng Zhang wrote:
> -static inline void count_swpout_vm_event(struct page *page)
> +static inline void count_swpout_vm_event(struct folio *folio)
> {
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> - if (unlikely(PageTransHuge(page)))
> + if (unlikely(folio_test_large(folio)))
> count_vm_event(THP_SWPOUT);
> #endif
Since this is a THP_SWPOUT event, we should do this as:
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (unlikely(PageTransHuge(page)))
+ if (folio_test_pmd_mappable(folio))
count_vm_event(THP_SWPOUT);
-#endif
On 2023/7/17 21:34, Matthew Wilcox wrote:
> On Mon, Jul 17, 2023 at 09:25:57PM +0800, Peng Zhang wrote:
>> +++ b/mm/page_io.c
>> @@ -58,18 +58,18 @@ static void end_swap_bio_write(struct bio *bio)
>>
>> static void __end_swap_bio_read(struct bio *bio)
>> {
>> - struct page *page = bio_first_page_all(bio);
>> + struct folio *folio = page_folio(bio_first_page_all(bio));
> Should we have a bio_first_folio_all()? It wouldn't save any calls to
> compound_head(), but it's slightly easier to use.
Yes, I'll convert bio_first_page_all() to bio_first_folio_all() in a v2.
Thanks.
>> if (bio->bi_status) {
>> - SetPageError(page);
>> - ClearPageUptodate(page);
>> + folio_set_error(folio);
> I appreciate this is a 1:1 conversion, but maybe we could think about
> this a bit. Is there anybody who checks the
> PageError()/folio_test_error() for this page/folio?
Maybe wait_dev_supers() checks the PageError() after write_dev_supers()
in fs/btrfs/disk-io.c?
>> + folio_clear_uptodate(folio);
> Similarly ... surely the folio is already !uptodate, so we don't need to
> clear the flag?
Indeed, the folio is already !uptodate. I'll drop this line in a v2.
Thanks for your feedback.
--
Best Regards,
Peng
On 2023/7/17 21:48, Matthew Wilcox wrote:
> On Mon, Jul 17, 2023 at 09:26:01PM +0800, Peng Zhang wrote:
>> -static inline void count_swpout_vm_event(struct page *page)
>> +static inline void count_swpout_vm_event(struct folio *folio)
>> {
>> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> - if (unlikely(PageTransHuge(page)))
>> + if (unlikely(folio_test_large(folio)))
>> count_vm_event(THP_SWPOUT);
>> #endif
> Since this is a THP_SWPOUT event, we should do this as:
>
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> - if (unlikely(PageTransHuge(page)))
> + if (folio_test_pmd_mappable(folio))
> count_vm_event(THP_SWPOUT);
> -#endif
Agreed. I'll convert PageTransHuge to folio_test_pmd_mappable.
--
Best Regards,
Peng
On Tue, Jul 18, 2023 at 08:56:16PM +0800, zhangpeng (AS) wrote:
> > > if (bio->bi_status) {
> > > - SetPageError(page);
> > > - ClearPageUptodate(page);
> > > + folio_set_error(folio);
> > I appreciate this is a 1:1 conversion, but maybe we could think about
> > this a bit. Is there anybody who checks the
> > PageError()/folio_test_error() for this page/folio?
>
> Maybe wait_dev_supers() checks the PageError() after write_dev_supers()
> in fs/btrfs/disk-io.c?
How does _this_ folio end up in btrfs's write_dev_supers()? This is a
swap read. The only folios which are swapped are anonymous and tmpfs.
btrfs takes care of doing its own I/O. wait_dev_supers() is looking
for the error set in btrfs_end_super_write() which is the completion
routine for write_dev_supers(). The pages involved there are attached
to a btrfs address_space, not shmem or anon.
On Tue, Jul 18, 2023 at 08:58:17PM +0800, zhangpeng (AS) wrote:
> On 2023/7/17 21:40, Matthew Wilcox wrote:
>
> > On Mon, Jul 17, 2023 at 09:25:58PM +0800, Peng Zhang wrote:
> > > +++ b/mm/page_io.c
> > > @@ -406,19 +406,19 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
> > > if (ret == sio->len) {
> > > for (p = 0; p < sio->pages; p++) {
> > > - struct page *page = sio->bvec[p].bv_page;
> > > + struct folio *folio = page_folio(sio->bvec[p].bv_page);
> > > - SetPageUptodate(page);
> > > - unlock_page(page);
> > > + folio_mark_uptodate(folio);
> > > + folio_unlock(folio);
> > > }
> > I'm kind of shocked this works today. Usually bvecs coalesce adjacent
> > pages into a single entry, so you need to use a real iterator like
> > bio_for_each_folio_all() to extract individual pages from a bvec.
> > Maybe the sio bvec is constructed inefficiently.
> >
> > I think Kent had some bvec folio iterators in progress?
>
> I'll convert bio_first_page_all() to bio_first_folio_all() in a v2.
That isn't my point at all. What I'm saying is that when you call a
function like bio_add_folio(), if @folio is physically adjacent to the
immediately prior folio already in the bio, it will extend the bv_len
instead of adding the new folio to the bvec. Maybe there's nothing like
that for sio.
On 2023/7/19 0:16, Matthew Wilcox wrote:
> On Tue, Jul 18, 2023 at 08:56:16PM +0800, zhangpeng (AS) wrote:
>>>> if (bio->bi_status) {
>>>> - SetPageError(page);
>>>> - ClearPageUptodate(page);
>>>> + folio_set_error(folio);
>>> I appreciate this is a 1:1 conversion, but maybe we could think about
>>> this a bit. Is there anybody who checks the
>>> PageError()/folio_test_error() for this page/folio?
>> Maybe wait_dev_supers() checks the PageError() after write_dev_supers()
>> in fs/btrfs/disk-io.c?
> How does _this_ folio end up in btrfs's write_dev_supers()? This is a
> swap read. The only folios which are swapped are anonymous and tmpfs.
> btrfs takes care of doing its own I/O. wait_dev_supers() is looking
> for the error set in btrfs_end_super_write() which is the completion
> routine for write_dev_supers(). The pages involved there are attached
> to a btrfs address_space, not shmem or anon.
Thanks for your explanation!
Then I think nobody checks the PageError()/folio_test_error() for the page
in patch 1 and patch 2. I'll delete SetPageError() in a v2.
--
Best Regards,
Peng
On Tue, Jul 18, 2023 at 05:16:15PM +0100, Matthew Wilcox wrote:
> How does _this_ folio end up in btrfs's write_dev_supers()? This is a
> swap read. The only folios which are swapped are anonymous and tmpfs.
> btrfs takes care of doing its own I/O. wait_dev_supers() is looking
> for the error set in btrfs_end_super_write() which is the completion
> routine for write_dev_supers(). The pages involved there are attached
> to a btrfs address_space, not shmem or anon.
It actually operates on the block_device inode. That does not matter
for this series, but complicates things in other ways.
On Mon, Jul 17, 2023 at 02:40:24PM +0100, Matthew Wilcox wrote:
> > for (p = 0; p < sio->pages; p++) {
> > - struct page *page = sio->bvec[p].bv_page;
> > + struct folio *folio = page_folio(sio->bvec[p].bv_page);
> >
> > - SetPageUptodate(page);
> > - unlock_page(page);
> > + folio_mark_uptodate(folio);
> > + folio_unlock(folio);
> > }
>
> I'm kind of shocked this works today. Usually bvecs coalesce adjacent
> pages into a single entry, so you need to use a real iterator like
> bio_for_each_folio_all() to extract individual pages from a bvec.
> Maybe the sio bvec is constructed inefficiently.
sio_read_complete is a kiocb.ki_complete handler. There is no
coalesce going on for ITER_BVEC iov_iters, which share nothing
but the underlying data structure with the block I/O path.