LinuxLists.cc - [PATCH] Separate global/perzone inactive/free shortage

2001-07-14 06:51:21

Subject: [PATCH] Separate global/perzone inactive/free shortage

Hi,

As well known, the VM does not make a distiction between global and
per-zone shortages when trying to free memory. That means if only a given
memory zone is under shortage, the kernel will scan pages from all zones.

The following patch (against 2.4.6-ac2), changes the kernel behaviour to
avoid freeing pages from zones which do not have an inactive and/or
free shortage.

Now I'm able to run memory hogs allocating 4GB of memory (on 4GB machine)
without getting real long hangs on my ssh session. (which used to happen
on stock -ac2 due to exhaustion of DMA pages for networking).

Comments ?

Dirk, Can you please try the patch and tell us if it fixes your problem ?

diff --exclude-from=/home/marcelo/exclude -Nur linux.orig/include/linux/swap.h linux/include/linux/swap.h
--- linux.orig/include/linux/swap.h Sat Jul 14 02:47:14 2001
+++ linux/include/linux/swap.h Sat Jul 14 03:27:13 2001
@@ -123,9 +123,14 @@
extern wait_queue_head_t kreclaimd_wait;
extern int page_launder(int, int);
extern int free_shortage(void);
+extern int total_free_shortage(void);
extern int inactive_shortage(void);
+extern int total_inactive_shortage(void);
extern void wakeup_kswapd(void);
extern int try_to_free_pages(unsigned int gfp_mask);
+
+extern unsigned int zone_free_shortage(zone_t *zone);
+extern unsigned int zone_inactive_shortage(zone_t *zone);

/* linux/mm/page_io.c */
extern void rw_swap_page(int, struct page *);
diff --exclude-from=/home/marcelo/exclude -Nur linux.orig/mm/page_alloc.c linux/mm/page_alloc.c
--- linux.orig/mm/page_alloc.c Sat Jul 14 02:47:14 2001
+++ linux/mm/page_alloc.c Sat Jul 14 02:50:50 2001
@@ -451,7 +451,7 @@
* to give up than to deadlock the kernel looping here.
*/
if (gfp_mask & __GFP_WAIT) {
- if (!order || free_shortage()) {
+ if (!order || total_free_shortage()) {
int progress = try_to_free_pages(gfp_mask);
if (progress || (gfp_mask & __GFP_FS))
goto try_again;
@@ -689,6 +689,39 @@
return pages;
}
#endif
+
+unsigned int zone_free_shortage(zone_t *zone)
+{
+ int sum = 0;
+
+ if (!zone->size)
+ goto ret;
+
+ if (zone->inactive_clean_pages + zone->free_pages
+ < zone->pages_min) {
+ sum += zone->pages_min;
+ sum -= zone->free_pages;
+ sum -= zone->inactive_clean_pages;
+ }
+ret:
+ return sum;
+}
+
+unsigned int zone_inactive_shortage(zone_t *zone)
+{
+ int sum = 0;
+
+ if (!zone->size)
+ goto ret;
+
+ sum = zone->pages_high;
+ sum -= zone->inactive_dirty_pages;
+ sum -= zone->inactive_clean_pages;
+ sum -= zone->free_pages;
+
+ret:
+ return (sum > 0 ? sum : 0);
+}

/*
* Show free area list (used inside shift_scroll-lock stuff)
diff --exclude-from=/home/marcelo/exclude -Nur linux.orig/mm/vmscan.c linux/mm/vmscan.c
--- linux.orig/mm/vmscan.c Sat Jul 14 02:47:14 2001
+++ linux/mm/vmscan.c Sat Jul 14 03:22:19 2001
@@ -36,11 +36,19 @@
*/

/* mm->page_table_lock is held. mmap_sem is not held */
-static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
+static void try_to_swap_out(zone_t *zone, struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
{
pte_t pte;
swp_entry_t entry;

+ /*
+ * If we are doing a zone-specific scan, do not
+ * touch pages from zones which don't have a
+ * shortage.
+ */
+ if (zone && !zone_inactive_shortage(page->zone))
+ return;
+
/* Don't look at this pte if it's been accessed recently. */
if (ptep_test_and_clear_young(page_table)) {
page->age += PAGE_AGE_ADV;
@@ -131,7 +139,7 @@
}

/* mm->page_table_lock is held. mmap_sem is not held */
-static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
+static int swap_out_pmd(zone_t *zone, struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
{
pte_t * pte;
unsigned long pmd_end;
@@ -155,7 +163,7 @@
struct page *page = pte_page(*pte);

if (VALID_PAGE(page) && !PageReserved(page)) {
- try_to_swap_out(mm, vma, address, pte, page);
+ try_to_swap_out(zone, mm, vma, address, pte, page);
if (!--count)
break;
}
@@ -168,7 +176,7 @@
}

/* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
+static inline int swap_out_pgd(zone_t *zone, struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
{
pmd_t * pmd;
unsigned long pgd_end;
@@ -188,7 +196,7 @@
end = pgd_end;

do {
- count = swap_out_pmd(mm, vma, pmd, address, end, count);
+ count = swap_out_pmd(zone, mm, vma, pmd, address, end, count);
if (!count)
break;
address = (address + PMD_SIZE) & PMD_MASK;
@@ -198,7 +206,7 @@
}

/* mm->page_table_lock is held. mmap_sem is not held */
-static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
+static int swap_out_vma(zone_t *zone, struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
{
pgd_t *pgdir;
unsigned long end;
@@ -213,7 +221,7 @@
if (address >= end)
BUG();
do {
- count = swap_out_pgd(mm, vma, pgdir, address, end, count);
+ count = swap_out_pgd(zone, mm, vma, pgdir, address, end, count);
if (!count)
break;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
@@ -225,7 +233,7 @@
/*
* Returns non-zero if we scanned all `count' pages
*/
-static int swap_out_mm(struct mm_struct * mm, int count)
+static int swap_out_mm(zone_t *zone, struct mm_struct * mm, int count)
{
unsigned long address;
struct vm_area_struct* vma;
@@ -248,7 +256,7 @@
address = vma->vm_start;

for (;;) {
- count = swap_out_vma(mm, vma, address, count);
+ count = swap_out_vma(zone, mm, vma, address, count);
if (!count)
goto out_unlock;
vma = vma->vm_next;
@@ -280,7 +288,7 @@
return nr;
}

-static void swap_out(unsigned int priority, int gfp_mask)
+static void swap_out(zone_t *zone, unsigned int priority, int gfp_mask)
{
int counter;
int retval = 0;
@@ -288,7 +296,7 @@

/* Always start by trying to penalize the process that is allocating memory */
if (mm)
- retval = swap_out_mm(mm, swap_amount(mm));
+ retval = swap_out_mm(zone, mm, swap_amount(mm));

/* Then, look at the other mm's */
counter = (mmlist_nr << SWAP_MM_SHIFT) >> priority;
@@ -310,7 +318,7 @@
spin_unlock(&mmlist_lock);

/* Walk about 6% of the address space each time */
- retval |= swap_out_mm(mm, swap_amount(mm));
+ retval |= swap_out_mm(zone, mm, swap_amount(mm));
mmput(mm);
} while (--counter >= 0);
return;
@@ -426,7 +434,7 @@
#define MAX_LAUNDER (4 * (1 << page_cluster))
#define CAN_DO_FS (gfp_mask & __GFP_FS)
#define CAN_DO_IO (gfp_mask & __GFP_IO)
-int page_launder(int gfp_mask, int sync)
+int do_page_launder(zone_t *zone, int gfp_mask, int sync)
{
int launder_loop, maxscan, cleaned_pages, maxlaunder;
struct list_head * page_lru;
@@ -461,6 +469,17 @@
continue;
}

+ /*
+ * If we are doing zone-specific laundering,
+ * avoid touching pages from zones which do
+ * not have a free shortage.
+ */
+ if (zone && !zone_free_shortage(page->zone)) {
+ list_del(page_lru);
+ list_add(page_lru, &inactive_dirty_list);
+ continue;
+ }
+
/*
* The page is locked. IO in progress?
* Move it to the back of the list.
@@ -574,8 +593,13 @@
* If we're freeing buffer cache pages, stop when
* we've got enough free memory.
*/
- if (freed_page && !free_shortage())
- break;
+ if (freed_page) {
+ if (zone) {
+ if (!zone_free_shortage(zone))
+ break;
+ } else if (free_shortage())
+ break;
+ }
continue;
} else if (page->mapping && !PageDirty(page)) {
/*
@@ -613,7 +637,8 @@
* loads, flush out the dirty pages before we have to wait on
* IO.
*/
- if (CAN_DO_IO && !launder_loop && free_shortage()) {
+ if (CAN_DO_IO && !launder_loop && (free_shortage()
+ || (zone && zone_free_shortage(zone)))) {
launder_loop = 1;
/* If we cleaned pages, never do synchronous IO. */
if (cleaned_pages)
@@ -629,6 +654,34 @@
return cleaned_pages;
}

+int page_launder(int gfp_mask, int sync)
+{
+ int type = 0;
+ int ret;
+ pg_data_t *pgdat = pgdat_list;
+ /*
+ * First do a global scan if there is a
+ * global shortage.
+ */
+ if (free_shortage())
+ ret += do_page_launder(NULL, gfp_mask, sync);
+
+ /*
+ * Then check if there is any specific zone
+ * needs laundering.
+ */
+ for (type = 0; type < MAX_NR_ZONES; type++) {
+ zone_t *zone = pgdat->node_zones + type;
+
+ if (zone_free_shortage(zone))
+ ret += do_page_launder(zone, gfp_mask, sync);
+ }
+
+ return ret;
+}
+
+
+
/**
* refill_inactive_scan - scan the active list and find pages to deactivate
* @priority: the priority at which to scan
@@ -637,7 +690,7 @@
* This function will scan a portion of the active list to find
* unused pages, those pages will then be moved to the inactive list.
*/
-int refill_inactive_scan(unsigned int priority, int target)
+int refill_inactive_scan(zone_t *zone, unsigned int priority, int target)
{
struct list_head * page_lru;
struct page * page;
@@ -665,6 +718,16 @@
continue;
}

+ /*
+ * If we are doing zone-specific scanning, ignore
+ * pages from zones without shortage.
+ */
+
+ if (zone && !zone_inactive_shortage(page->zone)) {
+ page_active = 1;
+ goto skip_page;
+ }
+
/* Do aging on the pages. */
if (PageTestandClearReferenced(page)) {
age_page_up_nolock(page);
@@ -694,6 +757,7 @@
* to the other end of the list. Otherwise we exit if
* we have done enough work.
*/
+skip_page:
if (page_active || PageActive(page)) {
list_del(page_lru);
list_add(page_lru, &active_list);
@@ -709,12 +773,10 @@
}

/*
- * Check if there are zones with a severe shortage of free pages,
- * or if all zones have a minor shortage.
+ * Check if we have are low on free pages globally.
*/
int free_shortage(void)
{
- pg_data_t *pgdat = pgdat_list;
int sum = 0;
int freeable = nr_free_pages() + nr_inactive_clean_pages();
int freetarget = freepages.high;
@@ -722,6 +784,22 @@
/* Are we low on free pages globally? */
if (freeable < freetarget)
return freetarget - freeable;
+ return 0;
+}
+
+/*
+ *
+ * Check if there are zones with a severe shortage of free pages,
+ * or if all zones have a minor shortage.
+ */
+int total_free_shortage(void)
+{
+ int sum = 0;
+ pg_data_t *pgdat = pgdat_list;
+
+ /* Do we have a global free shortage? */
+ if((sum = free_shortage()))
+ return sum;

/* If not, are we very low on any particular zone? */
do {
@@ -739,15 +817,15 @@
} while (pgdat);

return sum;
+
}

/*
- * How many inactive pages are we short?
+ * How many inactive pages are we short globally?
*/
int inactive_shortage(void)
{
int shortage = 0;
- pg_data_t *pgdat = pgdat_list;

/* Is the inactive dirty list too small? */

@@ -759,10 +837,20 @@

if (shortage > 0)
return shortage;
+ return 0;
+}
+/*
+ * Are we low on inactive pages globally or in any zone?
+ */
+int total_inactive_shortage(void)
+{
+ int shortage = 0;
+ pg_data_t *pgdat = pgdat_list;

- /* If not, do we have enough per-zone pages on the inactive list? */
+ if((shortage = inactive_shortage()))
+ return shortage;

- shortage = 0;
+ shortage = 0;

do {
int i;
@@ -802,7 +890,7 @@
* when called from a user process.
*/
#define DEF_PRIORITY (6)
-static int refill_inactive(unsigned int gfp_mask, int user)
+static int refill_inactive_global(unsigned int gfp_mask, int user)
{
int count, start_count, maxtry;

@@ -824,9 +912,9 @@
}

/* Walk the VM space for a bit.. */
- swap_out(DEF_PRIORITY, gfp_mask);
+ swap_out(NULL, DEF_PRIORITY, gfp_mask);

- count -= refill_inactive_scan(DEF_PRIORITY, count);
+ count -= refill_inactive_scan(NULL, DEF_PRIORITY, count);
if (count <= 0)
goto done;

@@ -839,6 +927,60 @@
return (count < start_count);
}

+static int refill_inactive_zone(zone_t *zone, unsigned int gfp_mask, int user)
+{
+ int count, start_count, maxtry;
+
+ count = start_count = zone_inactive_shortage(zone);
+
+ maxtry = (1 << DEF_PRIORITY);
+
+ do {
+ swap_out(zone, DEF_PRIORITY, gfp_mask);
+
+ count -= refill_inactive_scan(zone, DEF_PRIORITY, count);
+
+ if (count <= 0)
+ goto done;
+
+ if (--maxtry <= 0)
+ return 0;
+
+ } while(zone_inactive_shortage(zone));
+done:
+ return (count < start_count);
+}
+
+
+static int refill_inactive(unsigned int gfp_mask, int user)
+{
+ int type = 0;
+ int ret;
+ pg_data_t *pgdat = pgdat_list;
+ /*
+ * First do a global scan if there is a
+ * global shortage.
+ */
+ if (inactive_shortage())
+ ret += refill_inactive_global(gfp_mask, user);
+
+ /*
+ * Then check if there is any specific zone
+ * with a shortage and try to refill it if
+ * so.
+ */
+ for (type = 0; type < MAX_NR_ZONES; type++) {
+ zone_t *zone = pgdat->node_zones + type;
+
+ if (zone_inactive_shortage(zone))
+ ret += refill_inactive_zone(zone, gfp_mask, user);
+ }
+
+ return ret;
+}
+
+#define DEF_PRIORITY (6)
+
static int do_try_to_free_pages(unsigned int gfp_mask, int user)
{
int ret = 0;
@@ -851,8 +993,10 @@
* before we get around to moving them to the other
* list, so this is a relatively cheap operation.
*/
- if (free_shortage()) {
- ret += page_launder(gfp_mask, user);
+
+ ret += page_launder(gfp_mask, user);
+
+ if (total_free_shortage()) {
shrink_dcache_memory(DEF_PRIORITY, gfp_mask);
shrink_icache_memory(DEF_PRIORITY, gfp_mask);
}
@@ -861,8 +1005,7 @@
* If needed, we move pages from the active list
* to the inactive list.
*/
- if (inactive_shortage())
- ret += refill_inactive(gfp_mask, user);
+ ret += refill_inactive(gfp_mask, user);

/*
* Reclaim unused slab cache if memory is low.
@@ -917,7 +1060,7 @@
static long recalc = 0;

/* If needed, try to free some memory. */
- if (inactive_shortage() || free_shortage())
+ if (total_inactive_shortage() || total_free_shortage())
do_try_to_free_pages(GFP_KSWAPD, 0);

/* Once a second ... */
@@ -928,7 +1071,7 @@
recalculate_vm_stats();

/* Do background page aging. */
- refill_inactive_scan(DEF_PRIORITY, 0);
+ refill_inactive_scan(NULL, DEF_PRIORITY, 0);
}

run_task_queue(&tq_disk);
@@ -944,7 +1087,7 @@
* We go to sleep for one second, but if it's needed
* we'll be woken up earlier...
*/
- if (!free_shortage() || !inactive_shortage()) {
+ if (!total_free_shortage() || !total_inactive_shortage()) {
interruptible_sleep_on_timeout(&kswapd_wait, HZ);
/*
* If we couldn't free enough memory, we see if it was

2001-07-14 08:43:12

by Marcelo Tosatti

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

There is a silly typo on the patch.

On Sat, 14 Jul 2001, Marcelo Tosatti wrote:

>
> Hi,
>
> As well known, the VM does not make a distiction between global and
> per-zone shortages when trying to free memory. That means if only a given
> memory zone is under shortage, the kernel will scan pages from all zones.
>
> The following patch (against 2.4.6-ac2), changes the kernel behaviour to
> avoid freeing pages from zones which do not have an inactive and/or
> free shortage.
>
> Now I'm able to run memory hogs allocating 4GB of memory (on 4GB machine)
> without getting real long hangs on my ssh session. (which used to happen
> on stock -ac2 due to exhaustion of DMA pages for networking).
>
> Comments ?
>
> Dirk, Can you please try the patch and tell us if it fixes your problem ?
>
>

mm/vmscan.c diff

> @@ -574,8 +593,13 @@
> * If we're freeing buffer cache pages, stop when
> * we've got enough free memory.
> */
> - if (freed_page && !free_shortage())
> - break;
> + if (freed_page) {
> + if (zone) {
> + if (!zone_free_shortage(zone))
> + break;
> + } else if (free_shortage())
^^^^^^^^ ^^^^^^
Should be
} else if (!free_shortage())

> + break;
> + }
> continue;

Well, updated patch at
http://bazar.conectiva.com.br/~marcelo/patches/v2.4/2.4.6ac2/zoned.patch

2001-07-14 20:12:46

by Dirk Wetter

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

Marcelo Tosatti wrote:

> Hi,
>
> As well known, the VM does not make a distiction between global and
> per-zone shortages when trying to free memory. That means if only a given
> memory zone is under shortage, the kernel will scan pages from all zones.
>
> The following patch (against 2.4.6-ac2), changes the kernel behaviour to
> avoid freeing pages from zones which do not have an inactive and/or
> free shortage.
>
> Now I'm able to run memory hogs allocating 4GB of memory (on 4GB machine)
> without getting real long hangs on my ssh session. (which used to happen
> on stock -ac2 due to exhaustion of DMA pages for networking).
>
> Comments ?
>
> Dirk, Can you please try the patch and tell us if it fixes your problem ?
>

great!! that is definitely better, the machine talks to me again. there are some
small "but"s. however. i write them up and let you know.

~dirkw

2001-07-16 15:51:40

by Kanoj Sarcar

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

--- Marcelo Tosatti <[email protected]> wrote:
> Hi,
>
> As well known, the VM does not make a distiction
> between global and
> per-zone shortages when trying to free memory. That
> means if only a given
> memory zone is under shortage, the kernel will scan
> pages from all zones.
>
> The following patch (against 2.4.6-ac2), changes the
> kernel behaviour to
> avoid freeing pages from zones which do not have an
> inactive and/or
> free shortage.
>
> Now I'm able to run memory hogs allocating 4GB of
> memory (on 4GB machine)
> without getting real long hangs on my ssh session.
> (which used to happen
> on stock -ac2 due to exhaustion of DMA pages for
> networking).
>
> Comments ?
>
> Dirk, Can you please try the patch and tell us if it
> fixes your problem ?
>
>

Just a quick note. A per-zone page reclamation
method like this was what I had advocated and sent
patches to Linus for in the 2.3.43 time frame or so.
I think later performance work ripped out that work.
I guess the problem is that a lot of the different
page reclamation schemes first of all do not know
how to reclaim pages for a specific zone, and secondly
have to go thru a lot of work before they discover the
page they are trying to reclaim does not belong to the
shortage zone, hence wasting a lot of work/cputime.
try_to_swap_out is a good example, which can be solved
by rmaps.

Kanoj

__________________________________________________
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail
http://personal.mail.yahoo.com/

2001-07-16 19:00:47

by Rik van Riel

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

On Mon, 16 Jul 2001, Kanoj Sarcar wrote:

> Just a quick note. A per-zone page reclamation
> method like this was what I had advocated and sent
> patches to Linus for in the 2.3.43 time frame or so.
> I think later performance work ripped out that work.

Yes, the system ended up swapping as soon as the first zone
was filled up and after that would fill up the other zones;
the way the system stabilised was cycling through the pages
of one zone and leaving the lower zones alone.

This reduced the amount of available VM of a 1GB system
to 128MB, which is somewhat suboptimal ;)

What we learned from that is that we need to have some
way to auto-balance the reclaiming, keeping the objective
of evicting the least used page from RAM in mind.

> I guess the problem is that a lot of the different
> page reclamation schemes first of all do not know
> how to reclaim pages for a specific zone,

> try_to_swap_out is a good example, which can be solved
> by rmaps.

Indeed. Most of the time things go right, but the current
system cannot cope at all when things go wrong. I think we
really want things like rmaps and more sturdy reclaiming
mechanisms to cope with these worst cases (and also to make
the common case easier to get right).

regards,

Rik
--
Virtual memory is like a game you can't win;
However, without VM there's truly nothing to lose...

http://www.surriel.com/ http://distro.conectiva.com/

Send all your spam to [email protected] (spam digging piggy)

2001-07-17 01:33:39

by Marcelo Tosatti

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

On Mon, 16 Jul 2001, Kanoj Sarcar wrote:

>
> --- Marcelo Tosatti <[email protected]> wrote:
> > Hi,
> >
> > As well known, the VM does not make a distiction
> > between global and
> > per-zone shortages when trying to free memory. That
> > means if only a given
> > memory zone is under shortage, the kernel will scan
> > pages from all zones.
> >
> > The following patch (against 2.4.6-ac2), changes the
> > kernel behaviour to
> > avoid freeing pages from zones which do not have an
> > inactive and/or
> > free shortage.
> >
> > Now I'm able to run memory hogs allocating 4GB of
> > memory (on 4GB machine)
> > without getting real long hangs on my ssh session.
> > (which used to happen
> > on stock -ac2 due to exhaustion of DMA pages for
> > networking).
> >
> > Comments ?
> >
> > Dirk, Can you please try the patch and tell us if it
> > fixes your problem ?
> >
> >
>
> Just a quick note. A per-zone page reclamation
> method like this was what I had advocated and sent
> patches to Linus for in the 2.3.43 time frame or so.
> I think later performance work ripped out that work.
> I guess the problem is that a lot of the different
> page reclamation schemes first of all do not know
> how to reclaim pages for a specific zone, and secondly
> have to go thru a lot of work before they discover the
> page they are trying to reclaim does not belong to the
> shortage zone, hence wasting a lot of work/cputime.
> try_to_swap_out is a good example, which can be solved
> by rmaps.

Oh sure, rmaps would fix the performance problem caused by this. But I we
dont have rmaps right now, and I doubt we want rmaps for 2.4.

Besides, the performance degradation of doing the perzone
aging/deactivation this way is nothing compared to _not_ doing the thing
on a perzone basis at all, IMHO.

2001-07-17 01:58:51

by Marcelo Tosatti

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

On Mon, 16 Jul 2001, Rik van Riel wrote:

> On Mon, 16 Jul 2001, Kanoj Sarcar wrote:
>
> > Just a quick note. A per-zone page reclamation
> > method like this was what I had advocated and sent
> > patches to Linus for in the 2.3.43 time frame or so.
> > I think later performance work ripped out that work.
>
> Yes, the system ended up swapping as soon as the first zone
> was filled up and after that would fill up the other zones;
> the way the system stabilised was cycling through the pages
> of one zone and leaving the lower zones alone.
>
> This reduced the amount of available VM of a 1GB system
> to 128MB, which is somewhat suboptimal ;)
>
> What we learned from that is that we need to have some
> way to auto-balance the reclaiming, keeping the objective
> of evicting the least used page from RAM in mind.
>
> > I guess the problem is that a lot of the different
> > page reclamation schemes first of all do not know
> > how to reclaim pages for a specific zone,
>
> > try_to_swap_out is a good example, which can be solved
> > by rmaps.
>
> Indeed. Most of the time things go right, but the current
> system cannot cope at all when things go wrong. I think we
> really want things like rmaps and more sturdy reclaiming
> mechanisms to cope with these worst cases (and also to make
> the common case easier to get right).

As I said to Kanoj, I agree that we really want rmaps to fix that thing
right.

Now I don't see any other way for fixing that on _2.4_ except something
similar to the patch I posted. That patch can still have problems in
practice, but fundamentally _it is the right thing_, IMO.

2001-07-17 02:07:21

by Kanoj Sarcar

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

--- Marcelo Tosatti <[email protected]> wrote:
>
> On Mon, 16 Jul 2001, Rik van Riel wrote:
>
> > On Mon, 16 Jul 2001, Kanoj Sarcar wrote:
> >
> > > Just a quick note. A per-zone page reclamation
> > > method like this was what I had advocated and
> sent
> > > patches to Linus for in the 2.3.43 time frame or
> so.
> > > I think later performance work ripped out that
> work.
> >
> > Yes, the system ended up swapping as soon as the
> first zone
> > was filled up and after that would fill up the
> other zones;
> > the way the system stabilised was cycling through
> the pages
> > of one zone and leaving the lower zones alone.
> >
> > This reduced the amount of available VM of a 1GB
> system
> > to 128MB, which is somewhat suboptimal ;)
> >
> > What we learned from that is that we need to have
> some
> > way to auto-balance the reclaiming, keeping the
> objective
> > of evicting the least used page from RAM in mind.
> >
> > > I guess the problem is that a lot of the
> different
> > > page reclamation schemes first of all do not
> know
> > > how to reclaim pages for a specific zone,
> >
> > > try_to_swap_out is a good example, which can be
> solved
> > > by rmaps.
> >
> > Indeed. Most of the time things go right, but the
> current
> > system cannot cope at all when things go wrong. I
> think we
> > really want things like rmaps and more sturdy
> reclaiming
> > mechanisms to cope with these worst cases (and
> also to make
> > the common case easier to get right).
>
> As I said to Kanoj, I agree that we really want
> rmaps to fix that thing
> right.
>
> Now I don't see any other way for fixing that on
> _2.4_ except something
> similar to the patch I posted. That patch can still
> have problems in
> practice, but fundamentally _it is the right thing_,
> IMO.

Yes, I agree with you, and that is why I had sent the
patch to Linus during 2.3 in the first place.

What I am trying to point out is that you should talk
to Rik, and understand why it was removed previously.
Rik obviously had his reasons at that point, but some
of those might not apply anymore, given that 2.4 is
quite different from 2.3.43.

Kanoj

>
>
> --
> To unsubscribe, send a message with 'unsubscribe
> linux-mm' in
> the body to [email protected]. For more info on
> Linux MM,
> see: http://www.linux-mm.org/

__________________________________________________
Do You Yahoo!?
Get personalized email addresses from Yahoo! Mail
http://personal.mail.yahoo.com/

2001-07-18 16:09:36

by Rik van Riel

[permalink] [raw]

Subject: Re: [PATCH] Separate global/perzone inactive/free shortage

On Wed, 18 Jul 2001, Dave McCracken wrote:
> --On Wednesday, July 18, 2001 10:54:52 +0200 Mike Galbraith
> <[email protected]> wrote:
>
> > Possible solution:
> >
> > Effectively reserving the last ~meg (pick a number, scaled by ramsize
> > would be better) of ZONE_DMA for real GFP_DMA allocations would cure
> > Dirk's problem I bet, and also cure most of the others too, simply by
>
> Couldn't something similar to this be accomplished by tweaking the
> pages_{min,low,high} values to ZONE_DMA based on the total memory in the
> machine?

I bet we can do this in a much simpler way with less
reliance on magic numbers. My theory goes as follows:

The problem with the current code is that the global
free target (freepages.high) is the same as the sum
of the per-zone free targets.

Because of this, we will always run into the local
free shortages and the VM has to eat free pages from
all zones and has no chance to properly balance usage
bettween the zones depending on VM activity in the
zone and desireability of allocating from this zone.

We could try increasing the _global_ free target to
something like 2 or 3 times the sum of the per-zone
free targets.

By doing that the system would have a much better
chance of leaving eg. the DMA zone alone for allocations
because kswapd doesn't just free the amount of pages
required to bring each zone to the edge, it would free
a whole bunch more pages, to whatever zone they happen
to be in. That way the VM would do the bulk of the
allocations from the least loaded zone and leave the
DMA zone (at the end of the fallback chain) alone.

I'm not sure if this would work, but just increasing
the global free target to something significantly
higher than the sum of the per-zone free targets
is an easy to test change ;)

regards,

Rik
--
Virtual memory is like a game you can't win;
However, without VM there's truly nothing to lose...

http://www.surriel.com/ http://distro.conectiva.com/

Send all your spam to [email protected] (spam digging piggy)