Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1030472AbWHUNqV (ORCPT ); Mon, 21 Aug 2006 09:46:21 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1030471AbWHUNqU (ORCPT ); Mon, 21 Aug 2006 09:46:20 -0400 Received: from calculon.skynet.ie ([193.1.99.88]:59601 "EHLO calculon.skynet.ie") by vger.kernel.org with ESMTP id S1030463AbWHUNqT (ORCPT ); Mon, 21 Aug 2006 09:46:19 -0400 From: Mel Gorman To: akpm@osdl.org Cc: Mel Gorman , tony.luck@intel.com, linuxppc-dev@ozlabs.org, linux-kernel@vger.kernel.org, bob.picco@hp.com, ak@suse.de, linux-mm@kvack.org Message-Id: <20060821134618.22179.72400.sendpatchset@skynet.skynet.ie> In-Reply-To: <20060821134518.22179.46355.sendpatchset@skynet.skynet.ie> References: <20060821134518.22179.46355.sendpatchset@skynet.skynet.ie> Subject: [PATCH 3/6] Have x86 use add_active_range() and free_area_init_nodes Date: Mon, 21 Aug 2006 14:46:18 +0100 (IST) Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9977 Lines: 308 Size zones and holes in an architecture independent manner for x86. Kconfig | 8 +--- kernel/setup.c | 24 ++++-------- kernel/srat.c | 97 +--------------------------------------------------- mm/discontig.c | 69 +++++++++--------------------------- 4 files changed, 31 insertions(+), 167 deletions(-) Signed-off-by: Mel Gorman diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/Kconfig linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/Kconfig --- linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/Kconfig 2006-08-21 09:23:50.000000000 +0100 +++ linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/Kconfig 2006-08-21 10:14:43.000000000 +0100 @@ -602,12 +602,10 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on ARCH_SPARSEMEM_ENABLE -source "mm/Kconfig" +config ARCH_POPULATES_NODE_MAP + def_bool y -config HAVE_ARCH_EARLY_PFN_TO_NID - bool - default y - depends on NUMA +source "mm/Kconfig" config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/kernel/setup.c --- linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/kernel/setup.c 2006-08-21 09:23:50.000000000 +0100 +++ linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/kernel/setup.c 2006-08-21 10:14:43.000000000 +0100 @@ -1107,22 +1107,16 @@ static unsigned long __init setup_memory void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - unsigned int max_dma, low; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = highend_pfn - low; + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn}; +#ifndef CONFIG_HIGHMEM + unsigned long highend_pfn = max_low_pfn; #endif - } - free_area_init(zones_size); + + add_active_range(0, 0, highend_pfn); + free_area_init_nodes(max_zone_pfns); } #else extern unsigned long __init setup_memory(void); diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/kernel/srat.c --- linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/kernel/srat.c 2006-08-21 09:23:50.000000000 +0100 +++ linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/kernel/srat.c 2006-08-21 10:14:43.000000000 +0100 @@ -55,8 +55,6 @@ struct node_memory_chunk_s { static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS]; static int num_memory_chunks; /* total number of memory chunks */ -static int zholes_size_init; -static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES]; static u8 __initdata apicid_to_pxm[MAX_APICID]; extern void * boot_ioremap(unsigned long, unsigned long); @@ -139,47 +137,6 @@ static void __init parse_memory_affinity "enabled and removable" : "enabled" ) ); } -/* Take a chunk of pages from page frame cstart to cend and count the number - * of pages in each zone, returned via zones[]. - */ -static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, - unsigned long *zones) -{ - unsigned long max_dma; - extern unsigned long max_low_pfn; - - int z; - unsigned long rend; - - /* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide - * similarly scoped information and should be handled in a consistant - * manner. - */ - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - /* Split the hole into the zones in which it falls. Repeatedly - * take the segment in which the remaining hole starts, round it - * to the end of that zone. - */ - memset(zones, 0, MAX_NR_ZONES * sizeof(long)); - while (cstart < cend) { - if (cstart < max_dma) { - z = ZONE_DMA; - rend = (cend < max_dma)? cend : max_dma; - - } else if (cstart < max_low_pfn) { - z = ZONE_NORMAL; - rend = (cend < max_low_pfn)? cend : max_low_pfn; - - } else { - z = ZONE_HIGHMEM; - rend = cend; - } - zones[z] += rend - cstart; - cstart = rend; - } -} - /* * The SRAT table always lists ascending addresses, so can always * assume that the first "start" address that you see is the real @@ -224,7 +181,6 @@ static int __init acpi20_parse_srat(stru memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); - memset(zholes_size, 0, sizeof(zholes_size)); num_memory_chunks = 0; while (p < end) { @@ -291,6 +247,7 @@ static int __init acpi20_parse_srat(stru printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); node_read_chunk(chunk->nid, chunk); + add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); } for_each_online_node(nid) { @@ -399,57 +356,7 @@ int __init get_memcfg_from_srat(void) return acpi20_parse_srat((struct acpi_table_srat *)header); } out_err: + remove_all_active_ranges(); printk("failed to get NUMA memory information from SRAT table\n"); return 0; } - -/* For each node run the memory list to determine whether there are - * any memory holes. For each hole determine which ZONE they fall - * into. - * - * NOTE#1: this requires knowledge of the zone boundries and so - * _cannot_ be performed before those are calculated in setup_memory. - * - * NOTE#2: we rely on the fact that the memory chunks are ordered by - * start pfn number during setup. - */ -static void __init get_zholes_init(void) -{ - int nid; - int c; - int first; - unsigned long end = 0; - - for_each_online_node(nid) { - first = 1; - for (c = 0; c < num_memory_chunks; c++){ - if (node_memory_chunk[c].nid == nid) { - if (first) { - end = node_memory_chunk[c].end_pfn; - first = 0; - - } else { - /* Record any gap between this chunk - * and the previous chunk on this node - * against the zones it spans. - */ - chunk_to_zones(end, - node_memory_chunk[c].start_pfn, - &zholes_size[nid * MAX_NR_ZONES]); - } - } - } - } -} - -unsigned long * __init get_zholes_size(int nid) -{ - if (!zholes_size_init) { - zholes_size_init++; - get_zholes_init(); - } - if (nid >= MAX_NUMNODES || !node_online(nid)) - printk("%s: nid = %d is invalid/offline. num_online_nodes = %d", - __FUNCTION__, nid, num_online_nodes()); - return &zholes_size[nid * MAX_NR_ZONES]; -} diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/mm/discontig.c --- linux-2.6.18-rc4-mm2-102-powerpc_use_init_nodes/arch/i386/mm/discontig.c 2006-08-21 09:23:50.000000000 +0100 +++ linux-2.6.18-rc4-mm2-103-x86_use_init_nodes/arch/i386/mm/discontig.c 2006-08-21 10:14:43.000000000 +0100 @@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int BUG(); } -/* Find the owning node for a pfn. */ -int early_pfn_to_nid(unsigned long pfn) -{ - int nid; - - for_each_node(nid) { - if (node_end_pfn[nid] == 0) - break; - if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) - return nid; - } - - return 0; -} - /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -227,6 +212,8 @@ static unsigned long calculate_numa_rema unsigned long pfn; for_each_online_node(nid) { + unsigned old_end_pfn = node_end_pfn[nid]; + /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -276,6 +263,7 @@ static unsigned long calculate_numa_rema node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; + shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); @@ -369,45 +357,22 @@ void __init numa_kva_reserve(void) void __init zone_sizes_init(void) { int nid; - - - for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - unsigned long *zholes_size; - unsigned int max_dma; - - unsigned long low = max_low_pfn; - unsigned long start = node_start_pfn[nid]; - unsigned long high = node_end_pfn[nid]; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - if (node_has_online_mem(nid)){ - if (start > low) { -#ifdef CONFIG_HIGHMEM - BUG_ON(start > high); - zones_size[ZONE_HIGHMEM] = high - start; -#endif - } else { - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - BUG_ON(max_dma > low); - BUG_ON(low > high); - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - } + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn + }; + + /* If SRAT has not registered memory, register it now */ + if (find_max_pfn_with_active_regions() == 0) { + for_each_online_node(nid) { + if (node_has_online_mem(nid)) + add_active_range(nid, node_start_pfn[nid], + node_end_pfn[nid]); } - - zholes_size = get_zholes_size(nid); - - free_area_init_node(nid, NODE_DATA(nid), zones_size, start, - zholes_size); } + + free_area_init_nodes(max_zone_pfns); return; } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/