2002-07-20 20:21:12

by Robert Love

[permalink] [raw]
Subject: [PATCH] for_each_pgdat

This patch implements for_each_pgdat(pg_data_t *) which is a helper
macro to cleanup code that does a loop of the form:

pgdat = pgdat_list;
while(pgdat) {
/* ... */
pgdat = pgdat->node_next;
}

and replace it with:

for_each_pgdat(pgdat) {
/* ... */
}

This code is from Rik's 2.4-rmap patch and is by William Irwin.

Patch is against 2.5.27, please apply.

Robert Love

diff -urN linux-2.5.27/arch/ia64/mm/init.c linux/arch/ia64/mm/init.c
--- linux-2.5.27/arch/ia64/mm/init.c Sat Jul 20 12:11:15 2002
+++ linux/arch/ia64/mm/init.c Sat Jul 20 12:56:57 2002
@@ -167,10 +167,10 @@

#ifdef CONFIG_DISCONTIGMEM
{
- pg_data_t *pgdat = pgdat_list;
+ pg_data_t *pgdat;

printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- do {
+ for_each_pgdat(pgdat) {
printk("Node ID: %d\n", pgdat->node_id);
for(i = 0; i < pgdat->node_size; i++) {
if (PageReserved(pgdat->node_mem_map+i))
@@ -184,8 +184,7 @@
printk("\t%d reserved pages\n", reserved);
printk("\t%d pages shared\n", shared);
printk("\t%d pages swap cached\n", cached);
- pgdat = pgdat->node_next;
- } while (pgdat);
+ }
printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
printk("%d free buffer pages\n", nr_free_buffer_pages());
}
diff -urN linux-2.5.27/include/linux/mmzone.h linux/include/linux/mmzone.h
--- linux-2.5.27/include/linux/mmzone.h Sat Jul 20 12:11:05 2002
+++ linux/include/linux/mmzone.h Sat Jul 20 12:56:57 2002
@@ -163,6 +163,20 @@

extern pg_data_t contig_page_data;

+/**
+ * for_each_pgdat - helper macro to iterate over all nodes
+ * @pgdat - pointer to a pg_data_t variable
+ *
+ * Meant to help with common loops of the form
+ * pgdat = pgdat_list;
+ * while(pgdat) {
+ * ...
+ * pgdat = pgdat->node_next;
+ * }
+ */
+#define for_each_pgdat(pgdat) \
+ for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
+
#ifndef CONFIG_DISCONTIGMEM

#define NODE_DATA(nid) (&contig_page_data)
diff -urN linux-2.5.27/mm/bootmem.c linux/mm/bootmem.c
--- linux-2.5.27/mm/bootmem.c Sat Jul 20 12:11:12 2002
+++ linux/mm/bootmem.c Sat Jul 20 12:56:57 2002
@@ -339,12 +339,11 @@
pg_data_t *pgdat = pgdat_list;
void *ptr;

- while (pgdat) {
+ for_each_pgdat(pgdat)
if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
align, goal)))
return(ptr);
- pgdat = pgdat->node_next;
- }
+
/*
* Whoops, we cannot satisfy the allocation request.
*/
diff -urN linux-2.5.27/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.5.27/mm/page_alloc.c Sat Jul 20 12:11:07 2002
+++ linux/mm/page_alloc.c Sat Jul 20 12:56:57 2002
@@ -489,10 +489,10 @@

static unsigned int nr_free_zone_pages(int offset)
{
- pg_data_t *pgdat = pgdat_list;
+ pg_data_t *pgdat;
unsigned int sum = 0;

- do {
+ for_each_pgdat(pgdat) {
zonelist_t *zonelist = pgdat->node_zonelists + offset;
zone_t **zonep = zonelist->zones;
zone_t *zone;
@@ -503,9 +503,7 @@
if (size > high)
sum += size - high;
}
-
- pgdat = pgdat->node_next;
- } while (pgdat);
+ }

return sum;
}
@@ -529,13 +527,12 @@
#if CONFIG_HIGHMEM
unsigned int nr_free_highpages (void)
{
- pg_data_t *pgdat = pgdat_list;
+ pg_data_t *pgdat;
unsigned int pages = 0;

- while (pgdat) {
+ for_each_pgdat(pgdat)
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
- pgdat = pgdat->node_next;
- }
+
return pages;
}
#endif


2002-07-20 20:41:14

by Martin J. Bligh

[permalink] [raw]
Subject: Re: [PATCH] for_each_pgdat

> This patch implements for_each_pgdat(pg_data_t *) which is a helper
> macro to cleanup code that does a loop of the form:
>
> pgdat = pgdat_list;
> while(pgdat) {
> /* ... */
> pgdat = pgdat->node_next;
> }
>
> and replace it with:
>
> for_each_pgdat(pgdat) {
> /* ... */
> }

If you're going to do that (which I think is a good idea) can you
rename node_next to pgdat_next, as it often has nothing to do with
nodes whatsoever (discontigmem on a non-NUMA machine, or even more
confusingly a NUMA machine which is discontig within a node)? I'll
attatch a patch below, but it conflicts what what you're doing
horribly, and it's even easier to do after your abtraction ...


diff -urN virgin-2.5.25/arch/ia64/mm/init.c 2.5.25-A02-pgdat_next/arch/ia64/mm/init.c
--- virgin-2.5.25/arch/ia64/mm/init.c Fri Jul 5 16:42:22 2002
+++ 2.5.25-A02-pgdat_next/arch/ia64/mm/init.c Fri Jul 19 10:33:48 2002
@@ -184,7 +184,7 @@
printk("\t%d reserved pages\n", reserved);
printk("\t%d pages shared\n", shared);
printk("\t%d pages swap cached\n", cached);
- pgdat = pgdat->node_next;
+ pgdat = pgdat->pgdat_next;
} while (pgdat);
printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
printk("%d free buffer pages\n", nr_free_buffer_pages());
diff -urN virgin-2.5.25/include/linux/mmzone.h 2.5.25-A02-pgdat_next/include/linux/mmzone.h
--- virgin-2.5.25/include/linux/mmzone.h Fri Jul 5 16:42:02 2002
+++ 2.5.25-A02-pgdat_next/include/linux/mmzone.h Fri Jul 19 10:34:12 2002
@@ -136,11 +136,11 @@
unsigned long node_start_mapnr;
unsigned long node_size;
int node_id;
- struct pglist_data *node_next;
+ struct pglist_data *pgdat_next; /* global chain of pg_data_t's */
} pg_data_t;

extern int numnodes;
-extern pg_data_t *pgdat_list;
+extern pg_data_t *pgdat_list; /* head of global chain of pg_data_t's */

static inline int memclass(zone_t *pgzone, zone_t *classzone)
{
diff -urN virgin-2.5.25/mm/bootmem.c 2.5.25-A02-pgdat_next/mm/bootmem.c
--- virgin-2.5.25/mm/bootmem.c Fri Jul 5 16:42:20 2002
+++ 2.5.25-A02-pgdat_next/mm/bootmem.c Thu Jul 18 16:04:54 2002
@@ -49,7 +49,7 @@
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;

- pgdat->node_next = pgdat_list;
+ pgdat->pgdat_next = pgdat_list;
pgdat_list = pgdat;

mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
@@ -343,7 +343,7 @@
if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
align, goal)))
return(ptr);
- pgdat = pgdat->node_next;
+ pgdat = pgdat->pgdat_next;
}
/*
* Whoops, we cannot satisfy the allocation request.
diff -urN virgin-2.5.25/mm/numa.c 2.5.25-A02-pgdat_next/mm/numa.c
--- virgin-2.5.25/mm/numa.c Fri Jul 5 16:42:20 2002
+++ 2.5.25-A02-pgdat_next/mm/numa.c Thu Jul 18 15:21:16 2002
@@ -109,20 +109,20 @@
spin_lock_irqsave(&node_lock, flags);
if (!next) next = pgdat_list;
temp = next;
- next = next->node_next;
+ next = next->pgdat_next;
spin_unlock_irqrestore(&node_lock, flags);
#endif
start = temp;
while (temp) {
if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
return(ret);
- temp = temp->node_next;
+ temp = temp->pgdat_next;
}
temp = pgdat_list;
while (temp != start) {
if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
return(ret);
- temp = temp->node_next;
+ temp = temp->pgdat_next;
}
return(0);
}
diff -urN virgin-2.5.25/mm/page_alloc.c 2.5.25-A02-pgdat_next/mm/page_alloc.c
--- virgin-2.5.25/mm/page_alloc.c Fri Jul 5 16:42:03 2002
+++ 2.5.25-A02-pgdat_next/mm/page_alloc.c Thu Jul 18 15:25:35 2002
@@ -477,7 +477,7 @@
unsigned int i, sum = 0;
pg_data_t *pgdat;

- for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
+ for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
for (i = 0; i < MAX_NR_ZONES; ++i)
sum += pgdat->node_zones[i].free_pages;

@@ -501,7 +501,7 @@
sum += size - high;
}

- pgdat = pgdat->node_next;
+ pgdat = pgdat->pgdat_next;
} while (pgdat);

return sum;
@@ -531,7 +531,7 @@

while (pgdat) {
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
- pgdat = pgdat->node_next;
+ pgdat = pgdat->pgdat_next;
}
return pages;
}
@@ -621,7 +621,7 @@
K(zone->pages_low),
K(zone->pages_high));

- tmpdat = tmpdat->node_next;
+ tmpdat = tmpdat->pgdat_next;
}

printk("( Active:%lu inactive:%lu dirty:%lu writeback:%lu free:%u )\n",
diff -urN virgin-2.5.25/mm/vmscan.c 2.5.25-A02-pgdat_next/mm/vmscan.c
--- virgin-2.5.25/mm/vmscan.c Fri Jul 5 16:42:05 2002
+++ 2.5.25-A02-pgdat_next/mm/vmscan.c Thu Jul 18 15:28:03 2002
@@ -747,7 +747,7 @@
pgdat = pgdat_list;
do
need_more_balance |= kswapd_balance_pgdat(pgdat);
- while ((pgdat = pgdat->node_next));
+ while ((pgdat = pgdat->pgdat_next));
} while (need_more_balance);
}

@@ -775,7 +775,7 @@
if (kswapd_can_sleep_pgdat(pgdat))
continue;
return 0;
- } while ((pgdat = pgdat->node_next));
+ } while ((pgdat = pgdat->pgdat_next));

return 1;
}

2002-07-20 20:52:03

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH] for_each_pgdat

At some point in the past, Robert Love wrote:
>> This patch implements for_each_pgdat(pg_data_t *) which is a helper
>> macro to cleanup code that does a loop of the form:

On Sat, Jul 20, 2002 at 01:43:00PM -0700, Martin J. Bligh wrote:
> If you're going to do that (which I think is a good idea) can you
> rename node_next to pgdat_next, as it often has nothing to do with
> nodes whatsoever (discontigmem on a non-NUMA machine, or even more
> confusingly a NUMA machine which is discontig within a node)? I'll
> attatch a patch below, but it conflicts what what you're doing
> horribly, and it's even easier to do after your abtraction ...

Another option would be to convert pgdats to using list.h, which would
make things even prettier IMHO. After wrapping the list iterations it's
actually not difficult to swizzle the list linkage out from underneath.

And yes, s/pgdat/physcontig_region/ or whatever would make the name
match the intended usage.



Cheers,
Bill

2002-07-20 20:57:01

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] for_each_pgdat



On Sat, 20 Jul 2002, William Lee Irwin III wrote:
>
> Another option would be to convert pgdats to using list.h [ .. ]

Ok guys, you three (and whoever else wants to play? ;) fight it out amonst
yourselves, I'll wait for the end result (iow: I'll just ignore both
patches for now).

Linus

2002-07-20 21:34:26

by Robert Love

[permalink] [raw]
Subject: Re: [PATCH] for_each_pgdat

On Sat, 2002-07-20 at 14:00, Linus Torvalds wrote:

> Ok guys, you three (and whoever else wants to play? ;) fight it out amonst
> yourselves, I'll wait for the end result (iow: I'll just ignore both
> patches for now).

No no... the issues are fairly orthogonal.

Attached is a patch with the for_each_pgdat implementation and
s/node_next/pgdat_next/ per Martin.

If Bill wants to convert pgdats to lists that is fine but is another
step. Let's get in this first batch and that can be done off this.

Patch is against 2.5.27, please apply.

Robert Love

diff -urN linux-2.5.27/arch/ia64/mm/init.c linux/arch/ia64/mm/init.c
--- linux-2.5.27/arch/ia64/mm/init.c Sat Jul 20 12:11:15 2002
+++ linux/arch/ia64/mm/init.c Sat Jul 20 14:27:15 2002
@@ -167,10 +167,10 @@

#ifdef CONFIG_DISCONTIGMEM
{
- pg_data_t *pgdat = pgdat_list;
+ pg_data_t *pgdat;

printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- do {
+ for_each_pgdat(pgdat) {
printk("Node ID: %d\n", pgdat->node_id);
for(i = 0; i < pgdat->node_size; i++) {
if (PageReserved(pgdat->node_mem_map+i))
@@ -184,8 +184,7 @@
printk("\t%d reserved pages\n", reserved);
printk("\t%d pages shared\n", shared);
printk("\t%d pages swap cached\n", cached);
- pgdat = pgdat->node_next;
- } while (pgdat);
+ }
printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
printk("%d free buffer pages\n", nr_free_buffer_pages());
}
diff -urN linux-2.5.27/include/linux/mmzone.h linux/include/linux/mmzone.h
--- linux-2.5.27/include/linux/mmzone.h Sat Jul 20 12:11:05 2002
+++ linux/include/linux/mmzone.h Sat Jul 20 14:28:57 2002
@@ -136,7 +136,7 @@
unsigned long node_start_mapnr;
unsigned long node_size;
int node_id;
- struct pglist_data *node_next;
+ struct pglist_data *pgdat_next;
} pg_data_t;

extern int numnodes;
@@ -163,6 +163,20 @@

extern pg_data_t contig_page_data;

+/**
+ * for_each_pgdat - helper macro to iterate over all nodes
+ * @pgdat - pointer to a pg_data_t variable
+ *
+ * Meant to help with common loops of the form
+ * pgdat = pgdat_list;
+ * while(pgdat) {
+ * ...
+ * pgdat = pgdat->pgdat_next;
+ * }
+ */
+#define for_each_pgdat(pgdat) \
+ for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
+
#ifndef CONFIG_DISCONTIGMEM

#define NODE_DATA(nid) (&contig_page_data)
diff -urN linux-2.5.27/mm/bootmem.c linux/mm/bootmem.c
--- linux-2.5.27/mm/bootmem.c Sat Jul 20 12:11:12 2002
+++ linux/mm/bootmem.c Sat Jul 20 14:28:52 2002
@@ -49,7 +49,7 @@
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;

- pgdat->node_next = pgdat_list;
+ pgdat->pgdat_next = pgdat_list;
pgdat_list = pgdat;

mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL);
@@ -339,12 +339,11 @@
pg_data_t *pgdat = pgdat_list;
void *ptr;

- while (pgdat) {
+ for_each_pgdat(pgdat)
if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
align, goal)))
return(ptr);
- pgdat = pgdat->node_next;
- }
+
/*
* Whoops, we cannot satisfy the allocation request.
*/
diff -urN linux-2.5.27/mm/numa.c linux/mm/numa.c
--- linux-2.5.27/mm/numa.c Sat Jul 20 12:11:12 2002
+++ linux/mm/numa.c Sat Jul 20 14:29:11 2002
@@ -109,20 +109,20 @@
spin_lock_irqsave(&node_lock, flags);
if (!next) next = pgdat_list;
temp = next;
- next = next->node_next;
+ next = next->pgdat_next;
spin_unlock_irqrestore(&node_lock, flags);
#endif
start = temp;
while (temp) {
if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
return(ret);
- temp = temp->node_next;
+ temp = temp->pgdat_next;
}
temp = pgdat_list;
while (temp != start) {
if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
return(ret);
- temp = temp->node_next;
+ temp = temp->pgdat_next;
}
return(0);
}
diff -urN linux-2.5.27/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.5.27/mm/page_alloc.c Sat Jul 20 12:11:07 2002
+++ linux/mm/page_alloc.c Sat Jul 20 14:29:25 2002
@@ -480,7 +480,7 @@
unsigned int i, sum = 0;
pg_data_t *pgdat;

- for (pgdat = pgdat_list; pgdat; pgdat = pgdat->node_next)
+ for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
for (i = 0; i < MAX_NR_ZONES; ++i)
sum += pgdat->node_zones[i].free_pages;

@@ -489,10 +489,10 @@

static unsigned int nr_free_zone_pages(int offset)
{
- pg_data_t *pgdat = pgdat_list;
+ pg_data_t *pgdat;
unsigned int sum = 0;

- do {
+ for_each_pgdat(pgdat) {
zonelist_t *zonelist = pgdat->node_zonelists + offset;
zone_t **zonep = zonelist->zones;
zone_t *zone;
@@ -503,9 +503,7 @@
if (size > high)
sum += size - high;
}
-
- pgdat = pgdat->node_next;
- } while (pgdat);
+ }

return sum;
}
@@ -529,13 +527,12 @@
#if CONFIG_HIGHMEM
unsigned int nr_free_highpages (void)
{
- pg_data_t *pgdat = pgdat_list;
+ pg_data_t *pgdat;
unsigned int pages = 0;

- while (pgdat) {
+ for_each_pgdat(pgdat)
pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
- pgdat = pgdat->node_next;
- }
+
return pages;
}
#endif
@@ -627,7 +624,7 @@
K(zone->pages_low),
K(zone->pages_high));

- tmpdat = tmpdat->node_next;
+ tmpdat = tmpdat->pgdat_next;
}

printk("( Active:%lu inactive:%lu dirty:%lu writeback:%lu free:%u )\n",
diff -urN linux-2.5.27/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.5.27/mm/vmscan.c Sat Jul 20 12:11:08 2002
+++ linux/mm/vmscan.c Sat Jul 20 14:29:35 2002
@@ -471,7 +471,7 @@
pgdat = pgdat_list;
do
need_more_balance |= kswapd_balance_pgdat(pgdat);
- while ((pgdat = pgdat->node_next));
+ while ((pgdat = pgdat->pgdat_next));
} while (need_more_balance);
}

@@ -499,7 +499,7 @@
if (kswapd_can_sleep_pgdat(pgdat))
continue;
return 0;
- } while ((pgdat = pgdat->node_next));
+ } while ((pgdat = pgdat->pgdat_next));

return 1;
}


2002-07-20 22:50:18

by Martin J. Bligh

[permalink] [raw]
Subject: Re: [PATCH] for_each_pgdat

>> Ok guys, you three (and whoever else wants to play? ;) fight it out amonst
>> yourselves, I'll wait for the end result (iow: I'll just ignore both
>> patches for now).
>
> No no... the issues are fairly orthogonal.
>
> Attached is a patch with the for_each_pgdat implementation and
> s/node_next/pgdat_next/ per Martin.

I'm happy with this (obviously ;-))

> If Bill wants to convert pgdats to lists that is fine but is another
> step. Let's get in this first batch and that can be done off this.

As we now reference them in only two places (the macro defn and
numa.c:_alloc_pages) it hardly seems worth converting to lists ... ?
(I'm going to take an axe to NUMA _alloc_pages in a minute anyway ;-))

M.

2002-07-20 23:06:55

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH] for_each_pgdat

At some point in the past, Robert Love wrote:
>> If Bill wants to convert pgdats to lists that is fine but is another
>> step. Let's get in this first batch and that can be done off this.

On Sat, Jul 20, 2002 at 03:48:59PM -0700, Martin J. Bligh wrote:
> As we now reference them in only two places (the macro defn and
> numa.c:_alloc_pages) it hardly seems worth converting to lists ... ?
> (I'm going to take an axe to NUMA _alloc_pages in a minute anyway ;-))

I won't stand in the way of any of this, the list.h sort-of suggestion
was merely part of questioning the pgdat_next change. In fact, it was
meant more to point out what you just did, that the iterator takes the
field out of direct usage except for a couple of places.

I'll stand behind these changes as they are now.


Cheers,
Bill