Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S970316AbXEIDLi (ORCPT ); Tue, 8 May 2007 23:11:38 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1030521AbXEIDLY (ORCPT ); Tue, 8 May 2007 23:11:24 -0400 Received: from fgwmail7.fujitsu.co.jp ([192.51.44.37]:58962 "EHLO fgwmail7.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S970267AbXEIDLW (ORCPT ); Tue, 8 May 2007 23:11:22 -0400 Date: Wed, 09 May 2007 12:10:38 +0900 From: Yasunori Goto To: Linux Kernel ML , linux-mm Subject: [RFC] memory hotremove patch take 2 [02/10] (make page unused) Cc: Andrew Morton , Christoph Lameter , Mel Gorman In-Reply-To: <20070509115506.B904.Y-GOTO@jp.fujitsu.com> References: <20070509115506.B904.Y-GOTO@jp.fujitsu.com> X-Mailer-Plugin: BkASPil for Becky!2 Ver.2.068 Message-Id: <20070509120248.B908.Y-GOTO@jp.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit X-Mailer: Becky! ver. 2.27 [ja] Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9510 Lines: 335 This patch is for supporting making page unused. Isolate pages by capturing freed pages before inserting free_area[], buddy allocator. If you have an idea for avoiding spin_lock(), please advise me. Isolating pages in free_area[] is implemented in other patch. Signed-Off-By: KAMEZAWA Hiroyuki Signed-off-by: Yasunori Goto include/linux/mmzone.h | 8 + include/linux/page_isolation.h | 52 +++++++++++ mm/Kconfig | 7 + mm/page_alloc.c | 187 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 254 insertions(+) Index: current_test/include/linux/mmzone.h =================================================================== --- current_test.orig/include/linux/mmzone.h 2007-05-08 15:06:49.000000000 +0900 +++ current_test/include/linux/mmzone.h 2007-05-08 15:08:03.000000000 +0900 @@ -314,6 +314,14 @@ struct zone { /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; +#ifdef CONFIG_PAGE_ISOLATION + /* + * For pages which are not used but not free. + * See include/linux/page_isolation.h + */ + spinlock_t isolation_lock; + struct list_head isolation_list; +#endif /* * zone_start_pfn, spanned_pages and present_pages are all * protected by span_seqlock. It is a seqlock because it has Index: current_test/mm/page_alloc.c =================================================================== --- current_test.orig/mm/page_alloc.c 2007-05-08 15:07:20.000000000 +0900 +++ current_test/mm/page_alloc.c 2007-05-08 15:08:34.000000000 +0900 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -448,6 +449,9 @@ static inline void __free_one_page(struc if (unlikely(PageCompound(page))) destroy_compound_page(page, order); + if (page_under_isolation(zone, page, order)) + return; + page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); VM_BUG_ON(page_idx & (order_size - 1)); @@ -3259,6 +3263,10 @@ static void __meminit free_area_init_cor zone->nr_scan_inactive = 0; zap_zone_vm_stats(zone); atomic_set(&zone->reclaim_in_progress, 0); +#ifdef CONFIG_PAGE_ISOLATION + spin_lock_init(&zone->isolation_lock); + INIT_LIST_HEAD(&zone->isolation_list); +#endif if (!size) continue; @@ -4214,3 +4222,182 @@ void set_pageblock_flags_group(struct pa else __clear_bit(bitidx + start_bitidx, bitmap); } + +#ifdef CONFIG_PAGE_ISOLATION +/* + * Page Isolation. + * + * If a page is removed from usual free_list and will never be used, + * It is linked to "struct isolation_info" and set Reserved, Private + * bit. page->mapping points to isolation_info in it. + * and page_count(page) is 0. + * + * This can be used for creating a chunk of contiguous *unused* memory. + * + * current user is Memory-Hot-Remove. + * maybe move to some other file is better. + */ +static void +isolate_page_nolock(struct isolation_info *info, struct page *page, int order) +{ + int pagenum; + pagenum = 1 << order; + while (pagenum > 0) { + SetPageReserved(page); + SetPagePrivate(page); + page->private = (unsigned long)info; + list_add(&page->lru, &info->pages); + page++; + pagenum--; + } +} + +/* + * This function is called from page_under_isolation()l + */ + +int __page_under_isolation(struct zone *zone, struct page *page, int order) +{ + struct isolation_info *info; + unsigned long pfn = page_to_pfn(page); + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&zone->isolation_lock,flags); + list_for_each_entry(info, &zone->isolation_list, list) { + if (info->start_pfn <= pfn && pfn < info->end_pfn) { + found = 1; + break; + } + } + if (found) { + isolate_page_nolock(info, page, order); + } + spin_unlock_irqrestore(&zone->isolation_lock, flags); + return found; +} + +/* + * start and end must be in the same zone. + * + */ +struct isolation_info * +register_isolation(unsigned long start, unsigned long end) +{ + struct zone *zone; + struct isolation_info *info = NULL, *tmp; + unsigned long flags; + unsigned long last_pfn = end - 1; + + if (!pfn_valid(start) || !pfn_valid(last_pfn) || (start >= end)) + return ERR_PTR(-EINVAL); + /* check start and end is in the same zone */ + zone = page_zone(pfn_to_page(start)); + + if (zone != page_zone(pfn_to_page(last_pfn))) + return ERR_PTR(-EINVAL); + /* target range has to match MAX_ORDER alignmet */ + if ((start & (MAX_ORDER_NR_PAGES - 1)) || + (end & (MAX_ORDER_NR_PAGES - 1))) + return ERR_PTR(-EINVAL); + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + spin_lock_irqsave(&zone->isolation_lock, flags); + /* we don't allow overlap among isolation areas */ + if (!list_empty(&zone->isolation_list)) { + list_for_each_entry(tmp, &zone->isolation_list, list) { + if (start < tmp->end_pfn && end > tmp->start_pfn) { + goto out_free; + } + } + } + info->start_pfn = start; + info->end_pfn = end; + info->zone = zone; + INIT_LIST_HEAD(&info->list); + INIT_LIST_HEAD(&info->pages); + list_add(&info->list, &zone->isolation_list); +out_unlock: + spin_unlock_irqrestore(&zone->isolation_lock, flags); + return info; +out_free: + kfree(info); + info = ERR_PTR(-EBUSY); + goto out_unlock; +} +/* + * Remove IsolationInfo from zone. + * After this, we can unuse memory in info or + * free back to freelist. + */ + +void +detach_isolation_info_zone(struct isolation_info *info) +{ + unsigned long flags; + struct zone *zone = info->zone; + spin_lock_irqsave(&zone->isolation_lock,flags); + list_del(&info->list); + info->zone = NULL; + spin_unlock_irqrestore(&zone->isolation_lock,flags); +} + +/* + * All pages in info->pages should be remvoed before calling this. + * And info should be detached from zone. + */ +void +free_isolation_info(struct isolation_info *info) +{ + BUG_ON(!list_empty(&info->pages)); + BUG_ON(info->zone); + kfree(info); + return; +} + +/* + * Mark All pages in the isolation_info to be Reserved. + * When onlining these pages again, a user must check + * which page is usable by IORESOURCE_RAM + * please see memory_hotplug.c/online_pages() if unclear. + * + * info should be detached from zone before calling this. + */ +void +unuse_all_isolated_pages(struct isolation_info *info) +{ + struct page *page, *n; + BUG_ON(info->zone); + list_for_each_entry_safe(page, n, &info->pages, lru) { + SetPageReserved(page); + page->private = 0; + ClearPagePrivate(page); + list_del(&page->lru); + } +} + +/* + * Free all pages connected in isolation list. + * pages are moved back to free_list. + */ +void +free_all_isolated_pages(struct isolation_info *info) +{ + struct page *page, *n; + BUG_ON(info->zone); + list_for_each_entry_safe(page, n ,&info->pages, lru) { + ClearPagePrivate(page); + ClearPageReserved(page); + page->private = 0; + list_del(&page->lru); + set_page_count(page, 0); + set_page_refcounted(page); + /* This is sage because info is detached from zone */ + __free_page(page); + } +} + +#endif /* CONFIG_PAGE_ISOLATION */ + + Index: current_test/mm/Kconfig =================================================================== --- current_test.orig/mm/Kconfig 2007-05-08 15:06:50.000000000 +0900 +++ current_test/mm/Kconfig 2007-05-08 15:08:31.000000000 +0900 @@ -225,3 +225,10 @@ config DEBUG_READAHEAD Say N for production servers. +config PAGE_ISOLATION + bool "Page Isolation Framework" + help + This option adds page isolation framework to mm. + This is used for isolate amount of contiguous pages from linux + memory management. + Say N if unsure. Index: current_test/include/linux/page_isolation.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ current_test/include/linux/page_isolation.h 2007-05-08 15:08:34.000000000 +0900 @@ -0,0 +1,52 @@ +#ifndef __LINIX_PAGE_ISOLATION_H +#define __LINUX_PAGE_ISOLATION_H + +#ifdef CONFIG_PAGE_ISOLATION + +struct isolation_info { + struct list_head list; + unsigned long start_pfn; + unsigned long end_pfn; + struct zone *zone; + struct list_head pages; +}; + +extern int +__page_under_isolation(struct zone *zone, struct page *page, int order); + +static inline int +page_under_isolation(struct zone *zone, struct page *page, int order) +{ + if (likely(list_empty(&zone->isolation_list))) + return 0; + return __page_under_isolation(zone, page, order); +} + +static inline int +is_page_isolated(struct isolation_info *info, struct page *page) +{ + if (PageReserved(page) && PagePrivate(page) && + page_count(page) == 0 && + page->private == (unsigned long)info) + return 1; + return 0; +} + +extern struct isolation_info * +register_isolation(unsigned long start, unsigned long end); + +extern void detach_isolation_info_zone(struct isolation_info *info); +extern void free_isolation_info(struct isolation_info *info); +extern void unuse_all_isolated_pages(struct isolation_info *info); +extern void free_all_isolated_pages(struct isolation_info *info); + +#else + +static inline int +page_under_isolation(struct zone *zone, struct page *page, int order) +{ + return 0; +} + +#endif +#endif -- Yasunori Goto - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/