LinuxLists.cc - [PATCH 5/6] mm/page_alloc: Explicitly define how __GFP_HIGH non-blocking allocations accesses reserves

2023-01-13 11:53:40

Subject: [PATCH 5/6] mm/page_alloc: Explicitly define how __GFP_HIGH non-blocking allocations accesses reserves

GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague
description. In preparation for the removal of GFP_ATOMIC redefine
__GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to
ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to reserves
but non-blocking is granted more access. For example, GFP_NOWAIT is
non-blocking but has no special access to reserves. A __GFP_NOFAIL
blocking allocation is granted access similar to __GFP_HIGH if the
only alternative is an OOM kill.

Signed-off-by: Mel Gorman <[email protected]>
---
mm/internal.h | 7 +++++--
mm/page_alloc.c | 44 ++++++++++++++++++++++++--------------------
2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 8706d46863df..23a37588073a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -735,7 +735,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_OOM ALLOC_NO_WATERMARKS
#endif

-#define ALLOC_HARDER 0x10 /* try to alloc harder */
+#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
+ * to 25% of the min watermark or
+ * 62.5% if __GFP_HIGH is set.
+ */
#define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
* of the min watermark.
*/
@@ -750,7 +753,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
#define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */

/* Flags that allow allocations below the min watermark. */
-#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
+#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)

enum ttu_flags;
struct tlbflush_unmap_batch;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6f41b84a97ac..b9ae0ba0a2ab 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3989,18 +3989,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
* __GFP_HIGH allows access to 50% of the min reserve as well
* as OOM.
*/
- if (alloc_flags & ALLOC_MIN_RESERVE)
+ if (alloc_flags & ALLOC_MIN_RESERVE) {
min -= min / 2;

- /*
- * Non-blocking allocations can access some of the reserve
- * with more access if also __GFP_HIGH. The reasoning is that
- * a non-blocking caller may incur a more severe penalty
- * if it cannot get memory quickly, particularly if it's
- * also __GFP_HIGH.
- */
- if (alloc_flags & ALLOC_HARDER)
- min -= min / 4;
+ /*
+ * Non-blocking allocations (e.g. GFP_ATOMIC) can
+ * access more reserves than just __GFP_HIGH. Other
+ * non-blocking allocations requests such as GFP_NOWAIT
+ * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
+ * access to the min reserve.
+ */
+ if (alloc_flags & ALLOC_NON_BLOCK)
+ min -= min / 4;
+ }

/*
* OOM victims can try even harder than the normal reserve
@@ -4851,28 +4852,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
* The caller may dip into page reserves a bit more if the caller
* cannot run direct reclaim, or if the caller has realtime scheduling
* policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
- * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
+ * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
*/
alloc_flags |= (__force int)
(gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));

- if (gfp_mask & __GFP_ATOMIC) {
+ if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
/*
* Not worth trying to allocate harder for __GFP_NOMEMALLOC even
* if it can't schedule.
*/
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
- alloc_flags |= ALLOC_HARDER;
+ alloc_flags |= ALLOC_NON_BLOCK;

if (order > 0)
alloc_flags |= ALLOC_HIGHATOMIC;
}

/*
- * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
- * comment for __cpuset_node_allowed().
+ * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
+ * GFP_ATOMIC) rather than fail, see the comment for
+ * __cpuset_node_allowed().
*/
- alloc_flags &= ~ALLOC_CPUSET;
+ if (alloc_flags & ALLOC_MIN_RESERVE)
+ alloc_flags &= ~ALLOC_CPUSET;
} else if (unlikely(rt_task(current)) && in_task())
alloc_flags |= ALLOC_MIN_RESERVE;

@@ -5303,12 +5306,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
WARN_ON_ONCE_GFP(costly_order, gfp_mask);

/*
- * Help non-failing allocations by giving them access to memory
- * reserves but do not use ALLOC_NO_WATERMARKS because this
+ * Help non-failing allocations by giving some access to memory
+ * reserves normally used for high priority non-blocking
+ * allocations but do not use ALLOC_NO_WATERMARKS because this
* could deplete whole memory reserves which would just make
- * the situation worse
+ * the situation worse.
*/
- page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
+ page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
if (page)
goto got_pg;

--
2.35.3

2023-01-13 13:26:40

by Michal Hocko

[permalink] [raw]

Subject: Re: [PATCH 5/6] mm/page_alloc: Explicitly define how __GFP_HIGH non-blocking allocations accesses reserves

On Fri 13-01-23 11:12:16, Mel Gorman wrote:
> GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague
> description. In preparation for the removal of GFP_ATOMIC redefine
> __GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to
> ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to reserves
> but non-blocking is granted more access. For example, GFP_NOWAIT is
> non-blocking but has no special access to reserves. A __GFP_NOFAIL
> blocking allocation is granted access similar to __GFP_HIGH if the
> only alternative is an OOM kill.
>
> Signed-off-by: Mel Gorman <[email protected]>

Acked-by: Michal Hocko <[email protected]>

Thanks!

> ---
> mm/internal.h | 7 +++++--
> mm/page_alloc.c | 44 ++++++++++++++++++++++++--------------------
> 2 files changed, 29 insertions(+), 22 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 8706d46863df..23a37588073a 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -735,7 +735,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> #define ALLOC_OOM ALLOC_NO_WATERMARKS
> #endif
>
> -#define ALLOC_HARDER 0x10 /* try to alloc harder */
> +#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
> + * to 25% of the min watermark or
> + * 62.5% if __GFP_HIGH is set.
> + */
> #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
> * of the min watermark.
> */
> @@ -750,7 +753,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
>
> /* Flags that allow allocations below the min watermark. */
> -#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
> +#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
>
> enum ttu_flags;
> struct tlbflush_unmap_batch;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6f41b84a97ac..b9ae0ba0a2ab 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3989,18 +3989,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
> * __GFP_HIGH allows access to 50% of the min reserve as well
> * as OOM.
> */
> - if (alloc_flags & ALLOC_MIN_RESERVE)
> + if (alloc_flags & ALLOC_MIN_RESERVE) {
> min -= min / 2;
>
> - /*
> - * Non-blocking allocations can access some of the reserve
> - * with more access if also __GFP_HIGH. The reasoning is that
> - * a non-blocking caller may incur a more severe penalty
> - * if it cannot get memory quickly, particularly if it's
> - * also __GFP_HIGH.
> - */
> - if (alloc_flags & ALLOC_HARDER)
> - min -= min / 4;
> + /*
> + * Non-blocking allocations (e.g. GFP_ATOMIC) can
> + * access more reserves than just __GFP_HIGH. Other
> + * non-blocking allocations requests such as GFP_NOWAIT
> + * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
> + * access to the min reserve.
> + */
> + if (alloc_flags & ALLOC_NON_BLOCK)
> + min -= min / 4;
> + }
>
> /*
> * OOM victims can try even harder than the normal reserve
> @@ -4851,28 +4852,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
> * The caller may dip into page reserves a bit more if the caller
> * cannot run direct reclaim, or if the caller has realtime scheduling
> * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
> - * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
> + * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
> */
> alloc_flags |= (__force int)
> (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
>
> - if (gfp_mask & __GFP_ATOMIC) {
> + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
> /*
> * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
> * if it can't schedule.
> */
> if (!(gfp_mask & __GFP_NOMEMALLOC)) {
> - alloc_flags |= ALLOC_HARDER;
> + alloc_flags |= ALLOC_NON_BLOCK;
>
> if (order > 0)
> alloc_flags |= ALLOC_HIGHATOMIC;
> }
>
> /*
> - * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
> - * comment for __cpuset_node_allowed().
> + * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
> + * GFP_ATOMIC) rather than fail, see the comment for
> + * __cpuset_node_allowed().
> */
> - alloc_flags &= ~ALLOC_CPUSET;
> + if (alloc_flags & ALLOC_MIN_RESERVE)
> + alloc_flags &= ~ALLOC_CPUSET;
> } else if (unlikely(rt_task(current)) && in_task())
> alloc_flags |= ALLOC_MIN_RESERVE;
>
> @@ -5303,12 +5306,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> WARN_ON_ONCE_GFP(costly_order, gfp_mask);
>
> /*
> - * Help non-failing allocations by giving them access to memory
> - * reserves but do not use ALLOC_NO_WATERMARKS because this
> + * Help non-failing allocations by giving some access to memory
> + * reserves normally used for high priority non-blocking
> + * allocations but do not use ALLOC_NO_WATERMARKS because this
> * could deplete whole memory reserves which would just make
> - * the situation worse
> + * the situation worse.
> */
> - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
> + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
> if (page)
> goto got_pg;
>
> --
> 2.35.3

--
Michal Hocko
SUSE Labs

2023-02-07 13:32:47

by Vlastimil Babka

[permalink] [raw]

Subject: Re: [PATCH 5/6] mm/page_alloc: Explicitly define how __GFP_HIGH non-blocking allocations accesses reserves

On 1/13/23 12:12, Mel Gorman wrote:
> GFP_ATOMIC allocations get flagged ALLOC_HARDER which is a vague
> description. In preparation for the removal of GFP_ATOMIC redefine

^ __GFP_ATOMC

> __GFP_ATOMIC to simply mean non-blocking and renaming ALLOC_HARDER to
> ALLOC_NON_BLOCK accordingly. __GFP_HIGH is required for access to reserves
> but non-blocking is granted more access. For example, GFP_NOWAIT is
> non-blocking but has no special access to reserves. A __GFP_NOFAIL
> blocking allocation is granted access similar to __GFP_HIGH if the
> only alternative is an OOM kill.
>
> Signed-off-by: Mel Gorman <[email protected]>

Well just for the lore record (too late for git)

Acked-by: Vlastimil Babka <[email protected]>

Nit below:

> ---
> mm/internal.h | 7 +++++--
> mm/page_alloc.c | 44 ++++++++++++++++++++++++--------------------
> 2 files changed, 29 insertions(+), 22 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 8706d46863df..23a37588073a 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -735,7 +735,10 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> #define ALLOC_OOM ALLOC_NO_WATERMARKS
> #endif
>
> -#define ALLOC_HARDER 0x10 /* try to alloc harder */
> +#define ALLOC_NON_BLOCK 0x10 /* Caller cannot block. Allow access
> + * to 25% of the min watermark or
> + * 62.5% if __GFP_HIGH is set.

This is now (as of v3) inaccurate (the 25% part), right?

> + */
> #define ALLOC_MIN_RESERVE 0x20 /* __GFP_HIGH set. Allow access to 50%
> * of the min watermark.
> */
> @@ -750,7 +753,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> #define ALLOC_KSWAPD 0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
>
> /* Flags that allow allocations below the min watermark. */
> -#define ALLOC_RESERVES (ALLOC_HARDER|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
> +#define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
>
> enum ttu_flags;
> struct tlbflush_unmap_batch;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6f41b84a97ac..b9ae0ba0a2ab 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -3989,18 +3989,19 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
> * __GFP_HIGH allows access to 50% of the min reserve as well
> * as OOM.
> */
> - if (alloc_flags & ALLOC_MIN_RESERVE)
> + if (alloc_flags & ALLOC_MIN_RESERVE) {
> min -= min / 2;
>
> - /*
> - * Non-blocking allocations can access some of the reserve
> - * with more access if also __GFP_HIGH. The reasoning is that
> - * a non-blocking caller may incur a more severe penalty
> - * if it cannot get memory quickly, particularly if it's
> - * also __GFP_HIGH.
> - */
> - if (alloc_flags & ALLOC_HARDER)
> - min -= min / 4;
> + /*
> + * Non-blocking allocations (e.g. GFP_ATOMIC) can
> + * access more reserves than just __GFP_HIGH. Other
> + * non-blocking allocations requests such as GFP_NOWAIT
> + * or (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) do not get
> + * access to the min reserve.
> + */
> + if (alloc_flags & ALLOC_NON_BLOCK)
> + min -= min / 4;
> + }
>
> /*
> * OOM victims can try even harder than the normal reserve
> @@ -4851,28 +4852,30 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
> * The caller may dip into page reserves a bit more if the caller
> * cannot run direct reclaim, or if the caller has realtime scheduling
> * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
> - * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_MIN_RESERVE(__GFP_HIGH).
> + * set both ALLOC_NON_BLOCK and ALLOC_MIN_RESERVE(__GFP_HIGH).
> */
> alloc_flags |= (__force int)
> (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
>
> - if (gfp_mask & __GFP_ATOMIC) {
> + if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
> /*
> * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
> * if it can't schedule.
> */
> if (!(gfp_mask & __GFP_NOMEMALLOC)) {
> - alloc_flags |= ALLOC_HARDER;
> + alloc_flags |= ALLOC_NON_BLOCK;
>
> if (order > 0)
> alloc_flags |= ALLOC_HIGHATOMIC;
> }
>
> /*
> - * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
> - * comment for __cpuset_node_allowed().
> + * Ignore cpuset mems for non-blocking __GFP_HIGH (probably
> + * GFP_ATOMIC) rather than fail, see the comment for
> + * __cpuset_node_allowed().
> */
> - alloc_flags &= ~ALLOC_CPUSET;
> + if (alloc_flags & ALLOC_MIN_RESERVE)
> + alloc_flags &= ~ALLOC_CPUSET;
> } else if (unlikely(rt_task(current)) && in_task())
> alloc_flags |= ALLOC_MIN_RESERVE;
>
> @@ -5303,12 +5306,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
> WARN_ON_ONCE_GFP(costly_order, gfp_mask);
>
> /*
> - * Help non-failing allocations by giving them access to memory
> - * reserves but do not use ALLOC_NO_WATERMARKS because this
> + * Help non-failing allocations by giving some access to memory
> + * reserves normally used for high priority non-blocking
> + * allocations but do not use ALLOC_NO_WATERMARKS because this
> * could deplete whole memory reserves which would just make
> - * the situation worse
> + * the situation worse.
> */
> - page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac);
> + page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
> if (page)
> goto got_pg;
>