--- ./mm/page_alloc.c.mmreb 2005-10-28 04:02:08.000000000 +0400
+++ ./mm/page_alloc.c 2005-11-08 16:32:36.000000000 +0300
@@ -867,6 +867,7 @@ zone_reclaim_retry:
/* This allocation should allow future memory freeing. */
+rebalance:
if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
&& !in_interrupt()) {
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
@@ -879,14 +880,13 @@ zone_reclaim_retry:
goto got_pg;
}
}
- goto nopage;
+ goto empty;
}
/* Atomic allocations - we can't balance anything */
if (!wait)
goto nopage;
-rebalance:
cond_resched();
/* We now go into synchronous reclaim */
@@ -946,6 +946,7 @@ rebalance:
* In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order
* <= 3, but that may not be true in other implementations.
*/
+empty:
do_retry = 0;
if (!(gfp_mask & __GFP_NORETRY)) {
if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
--- ./mm/page_alloc.c.alpg 2005-11-09 21:42:50.000000000 +0300
+++ ./mm/page_alloc.c 2005-11-09 21:44:22.000000000 +0300
@@ -870,6 +870,7 @@ zone_reclaim_retry:
if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
&& !in_interrupt()) {
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+nofail_alloc:
/* go through the zonelist yet again, ignoring mins */
for (i = 0; (z = zones[i]) != NULL; i++) {
if (!cpuset_zone_allowed(z, gfp_mask))
@@ -878,6 +879,10 @@ zone_reclaim_retry:
if (page)
goto got_pg;
}
+ if (gfp_mask & __GFP_NOFAIL) {
+ blk_congestion_wait(WRITE, HZ/50);
+ goto nofail_alloc;
+ }
}
goto nopage;
}
Kirill Korotaev <[email protected]> wrote:
>
> as Andrey Savochkin pointed to me, the previous patch is incorrect since
> makes allocations with PF_MEMALLOC to be always success for order <= 3
> which is not what we usually want.
OK, thanks. This patch was still in my
things-i-need-to-spend-half-an-hour-thinking-about queue anyway.
Kirill Korotaev <[email protected]> wrote:
>
> So kswapd (which has PF_MEMALLOC flag) can fail to allocate memory even
> when it allocates it with __GFP_NOFAIL flag.
>
> --- ./mm/page_alloc.c.alpg 2005-11-09 21:42:50.000000000 +0300
> +++ ./mm/page_alloc.c 2005-11-09 21:44:22.000000000 +0300
> @@ -870,6 +870,7 @@ zone_reclaim_retry:
> if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
> && !in_interrupt()) {
> if (!(gfp_mask & __GFP_NOMEMALLOC)) {
> +nofail_alloc:
> /* go through the zonelist yet again, ignoring mins */
> for (i = 0; (z = zones[i]) != NULL; i++) {
> if (!cpuset_zone_allowed(z, gfp_mask))
> @@ -878,6 +879,10 @@ zone_reclaim_retry:
> if (page)
> goto got_pg;
> }
> + if (gfp_mask & __GFP_NOFAIL) {
> + blk_congestion_wait(WRITE, HZ/50);
> + goto nofail_alloc;
> + }
> }
> goto nopage;
> }
The problem here is that we'll loop if TIF_MEMDIE is set.
But given that the caller has specified __GFP_NOFAIL, I think that's
correct behaviour - __GFP_NOFAIL means "I am lame, and will oops if you
cannot allocate memory". So we just ignore TIF_MEMDIE..
That being said, why do we need another loop here? Would it not
be sufficient to do:
--- devel/mm/page_alloc.c~a 2005-11-09 19:15:03.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-09 19:15:32.000000000 -0800
@@ -907,7 +907,8 @@ zone_reclaim_retry:
goto got_pg;
}
}
- goto nopage;
+ if (!(gfp_mask & __GFP_NOFAIL))
+ goto nopage;
}
/* Atomic allocations - we can't balance anything */
_
Answer: because that way we'll go recursive if PF_MEMALLOC is set. Ho-hum.