From: Luis Chamberlain <[email protected]>
filemap_create_folio() and do_read_cache_folio() were always allocating
folio of order 0. __filemap_get_folio was trying to allocate higher
order folios when fgp_flags had higher order hint set but it will default
to order 0 folio if higher order memory allocation fails.
Supporting mapping_min_order implies that we guarantee each folio in the
page cache has at least an order of mapping_min_order. When adding new
folios to the page cache we must also ensure the index used is aligned to
the mapping_min_order as the page cache requires the index to be aligned
to the order of the folio.
Signed-off-by: Luis Chamberlain <[email protected]>
Co-developed-by: Pankaj Raghav <[email protected]>
Signed-off-by: Pankaj Raghav <[email protected]>
---
mm/filemap.c | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 30de18c4fd28..f0c0cfbbd134 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -858,6 +858,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
+ folio);
mapping_set_update(&xas, mapping);
if (!huge) {
@@ -1895,8 +1897,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
folio_wait_stable(folio);
no_page:
if (!folio && (fgp_flags & FGP_CREAT)) {
- unsigned order = FGF_GET_ORDER(fgp_flags);
+ unsigned int min_order = mapping_min_folio_order(mapping);
+ unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
int err;
+ index = mapping_align_start_index(mapping, index);
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
gfp |= __GFP_WRITE;
@@ -1936,7 +1940,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
break;
folio_put(folio);
folio = NULL;
- } while (order-- > 0);
+ } while (order-- > min_order);
if (err == -EEXIST)
goto repeat;
@@ -2425,13 +2429,16 @@ static int filemap_update_page(struct kiocb *iocb,
}
static int filemap_create_folio(struct file *file,
- struct address_space *mapping, pgoff_t index,
+ struct address_space *mapping, loff_t pos,
struct folio_batch *fbatch)
{
struct folio *folio;
int error;
+ unsigned int min_order = mapping_min_folio_order(mapping);
+ pgoff_t index;
- folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
+ folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
+ min_order);
if (!folio)
return -ENOMEM;
@@ -2449,6 +2456,8 @@ static int filemap_create_folio(struct file *file,
* well to keep locking rules simple.
*/
filemap_invalidate_lock_shared(mapping);
+ /* index in PAGE units but aligned to min_order number of pages. */
+ index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
error = filemap_add_folio(mapping, folio, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (error == -EEXIST)
@@ -2509,8 +2518,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
if (!folio_batch_count(fbatch)) {
if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
return -EAGAIN;
- err = filemap_create_folio(filp, mapping,
- iocb->ki_pos >> PAGE_SHIFT, fbatch);
+ err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
if (err == AOP_TRUNCATED_PAGE)
goto retry;
return err;
@@ -3708,9 +3716,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
repeat:
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio)) {
- folio = filemap_alloc_folio(gfp, 0);
+ folio = filemap_alloc_folio(gfp,
+ mapping_min_folio_order(mapping));
if (!folio)
return ERR_PTR(-ENOMEM);
+ index = mapping_align_start_index(mapping, index);
err = filemap_add_folio(mapping, folio, index, gfp);
if (unlikely(err)) {
folio_put(folio);
--
2.34.1
On 4/25/24 13:37, Pankaj Raghav (Samsung) wrote:
> From: Luis Chamberlain <[email protected]>
>
> filemap_create_folio() and do_read_cache_folio() were always allocating
> folio of order 0. __filemap_get_folio was trying to allocate higher
> order folios when fgp_flags had higher order hint set but it will default
> to order 0 folio if higher order memory allocation fails.
>
> Supporting mapping_min_order implies that we guarantee each folio in the
> page cache has at least an order of mapping_min_order. When adding new
> folios to the page cache we must also ensure the index used is aligned to
> the mapping_min_order as the page cache requires the index to be aligned
> to the order of the folio.
>
> Signed-off-by: Luis Chamberlain <[email protected]>
> Co-developed-by: Pankaj Raghav <[email protected]>
> Signed-off-by: Pankaj Raghav <[email protected]>
> ---
> mm/filemap.c | 24 +++++++++++++++++-------
> 1 file changed, 17 insertions(+), 7 deletions(-)
>
Reviewed-by: Hannes Reinecke <[email protected]>
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
On Thu, Apr 25, 2024 at 01:37:38PM +0200, Pankaj Raghav (Samsung) wrote:
> From: Luis Chamberlain <[email protected]>
>
> filemap_create_folio() and do_read_cache_folio() were always allocating
> folio of order 0. __filemap_get_folio was trying to allocate higher
> order folios when fgp_flags had higher order hint set but it will default
> to order 0 folio if higher order memory allocation fails.
>
> Supporting mapping_min_order implies that we guarantee each folio in the
> page cache has at least an order of mapping_min_order. When adding new
> folios to the page cache we must also ensure the index used is aligned to
> the mapping_min_order as the page cache requires the index to be aligned
> to the order of the folio.
If we cannot find a folio of at least min_order size, what error is sent
back?
If the answer is "the same error that you get if we cannot allocate a
base page today (aka ENOMEM)", then I think I understand this enough to
say
Reviewed-by: Darrick J. Wong <[email protected]>
--D
> Signed-off-by: Luis Chamberlain <[email protected]>
> Co-developed-by: Pankaj Raghav <[email protected]>
> Signed-off-by: Pankaj Raghav <[email protected]>
> ---
> mm/filemap.c | 24 +++++++++++++++++-------
> 1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 30de18c4fd28..f0c0cfbbd134 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -858,6 +858,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
>
> VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
> VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
> + VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
> + folio);
> mapping_set_update(&xas, mapping);
>
> if (!huge) {
> @@ -1895,8 +1897,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> folio_wait_stable(folio);
> no_page:
> if (!folio && (fgp_flags & FGP_CREAT)) {
> - unsigned order = FGF_GET_ORDER(fgp_flags);
> + unsigned int min_order = mapping_min_folio_order(mapping);
> + unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
> int err;
> + index = mapping_align_start_index(mapping, index);
>
> if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
> gfp |= __GFP_WRITE;
> @@ -1936,7 +1940,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> break;
> folio_put(folio);
> folio = NULL;
> - } while (order-- > 0);
> + } while (order-- > min_order);
>
> if (err == -EEXIST)
> goto repeat;
> @@ -2425,13 +2429,16 @@ static int filemap_update_page(struct kiocb *iocb,
> }
>
> static int filemap_create_folio(struct file *file,
> - struct address_space *mapping, pgoff_t index,
> + struct address_space *mapping, loff_t pos,
> struct folio_batch *fbatch)
> {
> struct folio *folio;
> int error;
> + unsigned int min_order = mapping_min_folio_order(mapping);
> + pgoff_t index;
>
> - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> + folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> + min_order);
> if (!folio)
> return -ENOMEM;
>
> @@ -2449,6 +2456,8 @@ static int filemap_create_folio(struct file *file,
> * well to keep locking rules simple.
> */
> filemap_invalidate_lock_shared(mapping);
> + /* index in PAGE units but aligned to min_order number of pages. */
> + index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
> error = filemap_add_folio(mapping, folio, index,
> mapping_gfp_constraint(mapping, GFP_KERNEL));
> if (error == -EEXIST)
> @@ -2509,8 +2518,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
> if (!folio_batch_count(fbatch)) {
> if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
> return -EAGAIN;
> - err = filemap_create_folio(filp, mapping,
> - iocb->ki_pos >> PAGE_SHIFT, fbatch);
> + err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
> if (err == AOP_TRUNCATED_PAGE)
> goto retry;
> return err;
> @@ -3708,9 +3716,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
> repeat:
> folio = filemap_get_folio(mapping, index);
> if (IS_ERR(folio)) {
> - folio = filemap_alloc_folio(gfp, 0);
> + folio = filemap_alloc_folio(gfp,
> + mapping_min_folio_order(mapping));
> if (!folio)
> return ERR_PTR(-ENOMEM);
> + index = mapping_align_start_index(mapping, index);
> err = filemap_add_folio(mapping, folio, index, gfp);
> if (unlikely(err)) {
> folio_put(folio);
> --
> 2.34.1
>
>
On Fri, Apr 26, 2024 at 08:12:43AM -0700, Darrick J. Wong wrote:
> On Thu, Apr 25, 2024 at 01:37:38PM +0200, Pankaj Raghav (Samsung) wrote:
> > From: Luis Chamberlain <[email protected]>
> >
> > filemap_create_folio() and do_read_cache_folio() were always allocating
> > folio of order 0. __filemap_get_folio was trying to allocate higher
> > order folios when fgp_flags had higher order hint set but it will default
> > to order 0 folio if higher order memory allocation fails.
> >
> > Supporting mapping_min_order implies that we guarantee each folio in the
> > page cache has at least an order of mapping_min_order. When adding new
> > folios to the page cache we must also ensure the index used is aligned to
> > the mapping_min_order as the page cache requires the index to be aligned
> > to the order of the folio.
>
> If we cannot find a folio of at least min_order size, what error is sent
> back?
>
> If the answer is "the same error that you get if we cannot allocate a
> base page today (aka ENOMEM)", then I think I understand this enough to
> say
>
> Reviewed-by: Darrick J. Wong <[email protected]>
Yes. We will get a ENOMEM if we cannot allocate min_order size folio. :)
Thanks!
>
> --D
>
> > Signed-off-by: Luis Chamberlain <[email protected]>
> > Co-developed-by: Pankaj Raghav <[email protected]>
> > Signed-off-by: Pankaj Raghav <[email protected]>
> > ---
> > mm/filemap.c | 24 +++++++++++++++++-------
> > 1 file changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 30de18c4fd28..f0c0cfbbd134 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -858,6 +858,8 @@ noinline int __filemap_add_folio(struct address_space *mapping,
> >
> > VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
> > VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
> > + VM_BUG_ON_FOLIO(folio_order(folio) < mapping_min_folio_order(mapping),
> > + folio);
> > mapping_set_update(&xas, mapping);
> >
> > if (!huge) {
> > @@ -1895,8 +1897,10 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> > folio_wait_stable(folio);
> > no_page:
> > if (!folio && (fgp_flags & FGP_CREAT)) {
> > - unsigned order = FGF_GET_ORDER(fgp_flags);
> > + unsigned int min_order = mapping_min_folio_order(mapping);
> > + unsigned int order = max(min_order, FGF_GET_ORDER(fgp_flags));
> > int err;
> > + index = mapping_align_start_index(mapping, index);
> >
> > if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
> > gfp |= __GFP_WRITE;
> > @@ -1936,7 +1940,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> > break;
> > folio_put(folio);
> > folio = NULL;
> > - } while (order-- > 0);
> > + } while (order-- > min_order);
> >
> > if (err == -EEXIST)
> > goto repeat;
> > @@ -2425,13 +2429,16 @@ static int filemap_update_page(struct kiocb *iocb,
> > }
> >
> > static int filemap_create_folio(struct file *file,
> > - struct address_space *mapping, pgoff_t index,
> > + struct address_space *mapping, loff_t pos,
> > struct folio_batch *fbatch)
> > {
> > struct folio *folio;
> > int error;
> > + unsigned int min_order = mapping_min_folio_order(mapping);
> > + pgoff_t index;
> >
> > - folio = filemap_alloc_folio(mapping_gfp_mask(mapping), 0);
> > + folio = filemap_alloc_folio(mapping_gfp_mask(mapping),
> > + min_order);
> > if (!folio)
> > return -ENOMEM;
> >
> > @@ -2449,6 +2456,8 @@ static int filemap_create_folio(struct file *file,
> > * well to keep locking rules simple.
> > */
> > filemap_invalidate_lock_shared(mapping);
> > + /* index in PAGE units but aligned to min_order number of pages. */
> > + index = (pos >> (PAGE_SHIFT + min_order)) << min_order;
> > error = filemap_add_folio(mapping, folio, index,
> > mapping_gfp_constraint(mapping, GFP_KERNEL));
> > if (error == -EEXIST)
> > @@ -2509,8 +2518,7 @@ static int filemap_get_pages(struct kiocb *iocb, size_t count,
> > if (!folio_batch_count(fbatch)) {
> > if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
> > return -EAGAIN;
> > - err = filemap_create_folio(filp, mapping,
> > - iocb->ki_pos >> PAGE_SHIFT, fbatch);
> > + err = filemap_create_folio(filp, mapping, iocb->ki_pos, fbatch);
> > if (err == AOP_TRUNCATED_PAGE)
> > goto retry;
> > return err;
> > @@ -3708,9 +3716,11 @@ static struct folio *do_read_cache_folio(struct address_space *mapping,
> > repeat:
> > folio = filemap_get_folio(mapping, index);
> > if (IS_ERR(folio)) {
> > - folio = filemap_alloc_folio(gfp, 0);
> > + folio = filemap_alloc_folio(gfp,
> > + mapping_min_folio_order(mapping));
> > if (!folio)
> > return ERR_PTR(-ENOMEM);
> > + index = mapping_align_start_index(mapping, index);
> > err = filemap_add_folio(mapping, folio, index, gfp);
> > if (unlikely(err)) {
> > folio_put(folio);
> > --
> > 2.34.1
> >
> >