Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760643AbZLJHcJ (ORCPT ); Thu, 10 Dec 2009 02:32:09 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1760519AbZLJHcF (ORCPT ); Thu, 10 Dec 2009 02:32:05 -0500 Received: from fgwmail7.fujitsu.co.jp ([192.51.44.37]:56789 "EHLO fgwmail7.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1760470AbZLJHcC (ORCPT ); Thu, 10 Dec 2009 02:32:02 -0500 X-SecurityPolicyCheck-FJ: OK by FujitsuOutboundMailChecker v1.3.1 From: KOSAKI Motohiro To: LKML Subject: [RFC][PATCH v2 4/8] Replace page_referenced() with wipe_page_reference() Cc: kosaki.motohiro@jp.fujitsu.com, linux-mm , Rik van Riel , Andrea Arcangeli , Larry Woodman In-Reply-To: <20091210154822.2550.A69D9226@jp.fujitsu.com> References: <20091210154822.2550.A69D9226@jp.fujitsu.com> Message-Id: <20091210163123.255C.A69D9226@jp.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit X-Mailer: Becky! ver. 2.50.07 [ja] Date: Thu, 10 Dec 2009 16:32:03 +0900 (JST) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 15983 Lines: 489 page_referenced() imply "test the page was referenced or not", but shrink_active_list() use it for drop pte young bit. then, it should be renamed. Plus, vm_flags argument is really ugly. instead, introduce new struct page_reference_context, it's for collect some statistics. This patch doesn't have any behavior change. Signed-off-by: KOSAKI Motohiro Reviewed-by: Rik van Riel --- include/linux/ksm.h | 11 +++-- include/linux/rmap.h | 16 +++++-- mm/ksm.c | 17 +++++--- mm/rmap.c | 112 ++++++++++++++++++++++++------------------------- mm/vmscan.c | 46 ++++++++++++-------- 5 files changed, 112 insertions(+), 90 deletions(-) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index bed5f16..e1a60d3 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -85,8 +85,8 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, return ksm_does_need_to_copy(page, vma, address); } -int page_referenced_ksm(struct page *page, - struct mem_cgroup *memcg, unsigned long *vm_flags); +int wipe_page_reference_ksm(struct page *page, struct mem_cgroup *memcg, + struct page_reference_context *refctx); int try_to_unmap_ksm(struct page *page, enum ttu_flags flags); int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, struct vm_area_struct *, unsigned long, void *), void *arg); @@ -120,10 +120,11 @@ static inline struct page *ksm_might_need_to_copy(struct page *page, return page; } -static inline int page_referenced_ksm(struct page *page, - struct mem_cgroup *memcg, unsigned long *vm_flags) +static inline int wipe_page_reference_ksm(struct page *page, + struct mem_cgroup *memcg, + struct page_reference_context *refctx) { - return 0; + return SWAP_SUCCESS; } static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b019ae6..564d981 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -108,13 +108,21 @@ static inline void page_dup_rmap(struct page *page) atomic_inc(&page->_mapcount); } +struct page_reference_context { + int is_page_locked; + unsigned long referenced; + unsigned long exec_referenced; + int maybe_mlocked; /* found VM_LOCKED, but it's unstable result */ +}; + /* * Called from mm/vmscan.c to handle paging out */ -int page_referenced(struct page *, int is_locked, - struct mem_cgroup *cnt, unsigned long *vm_flags); -int page_referenced_one(struct page *, struct vm_area_struct *, - unsigned long address, unsigned int *mapcount, unsigned long *vm_flags); +int wipe_page_reference(struct page *page, struct mem_cgroup *memcg, + struct page_reference_context *refctx); +int wipe_page_reference_one(struct page *page, + struct page_reference_context *refctx, + struct vm_area_struct *vma, unsigned long address); enum ttu_flags { TTU_UNMAP = 0, /* unmap mode */ diff --git a/mm/ksm.c b/mm/ksm.c index 56a0da1..19559ae 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1544,15 +1544,15 @@ struct page *ksm_does_need_to_copy(struct page *page, return new_page; } -int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, - unsigned long *vm_flags) +int wipe_page_reference_ksm(struct page *page, struct mem_cgroup *memcg, + struct page_reference_context *refctx) { struct stable_node *stable_node; struct rmap_item *rmap_item; struct hlist_node *hlist; unsigned int mapcount = page_mapcount(page); - int referenced = 0; int search_new_forks = 0; + int ret = SWAP_SUCCESS; VM_BUG_ON(!PageKsm(page)); VM_BUG_ON(!PageLocked(page)); @@ -1582,10 +1582,15 @@ again: if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; - referenced += page_referenced_one(page, vma, - rmap_item->address, &mapcount, vm_flags); + ret = wipe_page_reference_one(page, refctx, vma, + rmap_item->address); + if (ret != SWAP_SUCCESS) + goto out; + mapcount--; if (!search_new_forks || !mapcount) break; + if (refctx->maybe_mlocked) + goto out; } spin_unlock(&anon_vma->lock); if (!mapcount) @@ -1594,7 +1599,7 @@ again: if (!search_new_forks++) goto again; out: - return referenced; + return ret; } int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) diff --git a/mm/rmap.c b/mm/rmap.c index fb0983a..2f4451b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -371,17 +371,16 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) } /* - * Subfunctions of page_referenced: page_referenced_one called - * repeatedly from either page_referenced_anon or page_referenced_file. + * Subfunctions of wipe_page_reference: wipe_page_reference_one called + * repeatedly from either wipe_page_reference_anon or wipe_page_reference_file. */ -int page_referenced_one(struct page *page, struct vm_area_struct *vma, - unsigned long address, unsigned int *mapcount, - unsigned long *vm_flags) +int wipe_page_reference_one(struct page *page, + struct page_reference_context *refctx, + struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; pte_t *pte; spinlock_t *ptl; - int referenced = 0; /* * Don't want to elevate referenced for mlocked page that gets this far, @@ -389,8 +388,7 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, * unevictable list. */ if (vma->vm_flags & VM_LOCKED) { - *mapcount = 0; /* break early from loop */ - *vm_flags |= VM_LOCKED; + refctx->maybe_mlocked = 1; goto out; } @@ -406,37 +404,38 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, * mapping is already gone, the unmap path will have * set PG_referenced or activated the page. */ - if (likely(!VM_SequentialReadHint(vma))) - referenced++; + if (likely(!VM_SequentialReadHint(vma))) { + refctx->referenced++; + if (vma->vm_flags & VM_EXEC) { + refctx->exec_referenced++; + } + } } /* Pretend the page is referenced if the task has the swap token and is in the middle of a page fault. */ if (mm != current->mm && has_swap_token(mm) && rwsem_is_locked(&mm->mmap_sem)) - referenced++; + refctx->referenced++; - (*mapcount)--; pte_unmap_unlock(pte, ptl); - if (referenced) - *vm_flags |= vma->vm_flags; out: - return referenced; + return SWAP_SUCCESS; } -static int page_referenced_anon(struct page *page, - struct mem_cgroup *mem_cont, - unsigned long *vm_flags) +static int wipe_page_reference_anon(struct page *page, + struct mem_cgroup *memcg, + struct page_reference_context *refctx) { unsigned int mapcount; struct anon_vma *anon_vma; struct vm_area_struct *vma; - int referenced = 0; + int ret = SWAP_SUCCESS; anon_vma = page_lock_anon_vma(page); if (!anon_vma) - return referenced; + return ret; mapcount = page_mapcount(page); list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { @@ -448,20 +447,22 @@ static int page_referenced_anon(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; - referenced += page_referenced_one(page, vma, address, - &mapcount, vm_flags); - if (!mapcount) + ret = wipe_page_reference_one(page, refctx, vma, address); + if (ret != SWAP_SUCCESS) + break; + mapcount--; + if (!mapcount || refctx->maybe_mlocked) break; } page_unlock_anon_vma(anon_vma); - return referenced; + return ret; } /** - * page_referenced_file - referenced check for object-based rmap + * wipe_page_reference_file - wipe page reference for object-based rmap * @page: the page we're checking references on. * @mem_cont: target memory controller * @vm_flags: collect encountered vma->vm_flags who actually referenced the page @@ -471,18 +472,18 @@ static int page_referenced_anon(struct page *page, * pointer, then walking the chain of vmas it holds. It returns the number * of references it found. * - * This function is only called from page_referenced for object-based pages. + * This function is only called from wipe_page_reference for object-based pages. */ -static int page_referenced_file(struct page *page, - struct mem_cgroup *mem_cont, - unsigned long *vm_flags) +static int wipe_page_reference_file(struct page *page, + struct mem_cgroup *memcg, + struct page_reference_context *refctx) { unsigned int mapcount; struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; struct prio_tree_iter iter; - int referenced = 0; + int ret = SWAP_SUCCESS; /* * The caller's checks on page->mapping and !PageAnon have made @@ -516,65 +517,62 @@ static int page_referenced_file(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; - referenced += page_referenced_one(page, vma, address, - &mapcount, vm_flags); - if (!mapcount) + ret = wipe_page_reference_one(page, refctx, vma, address); + if (ret != SWAP_SUCCESS) + break; + mapcount--; + if (!mapcount || refctx->maybe_mlocked) break; } spin_unlock(&mapping->i_mmap_lock); - return referenced; + return ret; } /** - * page_referenced - test if the page was referenced + * wipe_page_reference - clear and test the page reference and pte young bit * @page: the page to test - * @is_locked: caller holds lock on the page - * @mem_cont: target memory controller - * @vm_flags: collect encountered vma->vm_flags who actually referenced the page + * @memcg: target memory controller + * @refctx: context for collect some statistics * * Quick test_and_clear_referenced for all mappings to a page, * returns the number of ptes which referenced the page. */ -int page_referenced(struct page *page, - int is_locked, - struct mem_cgroup *mem_cont, - unsigned long *vm_flags) +int wipe_page_reference(struct page *page, + struct mem_cgroup *memcg, + struct page_reference_context *refctx) { - int referenced = 0; int we_locked = 0; + int ret = SWAP_SUCCESS; if (TestClearPageReferenced(page)) - referenced++; + refctx->referenced++; - *vm_flags = 0; if (page_mapped(page) && page_rmapping(page)) { - if (!is_locked && (!PageAnon(page) || PageKsm(page))) { + if (!refctx->is_page_locked && + (!PageAnon(page) || PageKsm(page))) { we_locked = trylock_page(page); if (!we_locked) { - referenced++; + refctx->referenced++; goto out; } } if (unlikely(PageKsm(page))) - referenced += page_referenced_ksm(page, mem_cont, - vm_flags); + ret = wipe_page_reference_ksm(page, memcg, refctx); else if (PageAnon(page)) - referenced += page_referenced_anon(page, mem_cont, - vm_flags); + ret = wipe_page_reference_anon(page, memcg, refctx); else if (page->mapping) - referenced += page_referenced_file(page, mem_cont, - vm_flags); + ret = wipe_page_reference_file(page, memcg, refctx); if (we_locked) unlock_page(page); } out: if (page_test_and_clear_young(page)) - referenced++; + refctx->referenced++; - return referenced; + return ret; } static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, diff --git a/mm/vmscan.c b/mm/vmscan.c index 3366bec..c59baa9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -569,7 +569,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct pagevec freed_pvec; int pgactivate = 0; unsigned long nr_reclaimed = 0; - unsigned long vm_flags; cond_resched(); @@ -578,7 +577,9 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct address_space *mapping; struct page *page; int may_enter_fs; - int referenced; + struct page_reference_context refctx = { + .is_page_locked = 1, + }; cond_resched(); @@ -620,16 +621,15 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep_locked; } - referenced = page_referenced(page, 1, - sc->mem_cgroup, &vm_flags); + wipe_page_reference(page, sc->mem_cgroup, &refctx); /* * In active use or really unfreeable? Activate it. * If page which have PG_mlocked lost isoltation race, * try_to_unmap moves it to unevictable list */ if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && - referenced && page_mapped(page) - && !(vm_flags & VM_LOCKED)) + page_mapped(page) && refctx.referenced && + !refctx.maybe_mlocked) goto activate_locked; /* @@ -664,7 +664,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (PageDirty(page)) { - if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced) + if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && + refctx.referenced) goto keep_locked; if (!may_enter_fs) goto keep_locked; @@ -1241,9 +1242,10 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) * * If the pages are mostly unmapped, the processing is fast and it is * appropriate to hold zone->lru_lock across the whole operation. But if - * the pages are mapped, the processing is slow (page_referenced()) so we - * should drop zone->lru_lock around each page. It's impossible to balance - * this, so instead we remove the pages from the LRU while processing them. + * the pages are mapped, the processing is slow (because wipe_page_reference() + * walk each ptes) so we should drop zone->lru_lock around each page. It's + * impossible to balance this, so instead we remove the pages from the LRU + * while processing them. * It is safe to rely on PG_active against the non-LRU pages in here because * nobody will play with that bit on a non-LRU page. * @@ -1289,7 +1291,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, { unsigned long nr_taken; unsigned long pgscanned; - unsigned long vm_flags; LIST_HEAD(l_hold); /* The pages which were snipped off */ LIST_HEAD(l_active); LIST_HEAD(l_inactive); @@ -1320,6 +1321,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, spin_unlock_irq(&zone->lru_lock); while (!list_empty(&l_hold)) { + struct page_reference_context refctx = { + .is_page_locked = 0, + }; + cond_resched(); page = lru_to_page(&l_hold); list_del(&page->lru); @@ -1329,10 +1334,17 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, continue; } - /* page_referenced clears PageReferenced */ - if (page_mapped(page) && - page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { + if (!page_mapped(page)) { + ClearPageActive(page); /* we are de-activating */ + list_add(&page->lru, &l_inactive); + continue; + } + + wipe_page_reference(page, sc->mem_cgroup, &refctx); + + if (refctx.referenced) nr_rotated++; + if (refctx.exec_referenced && page_is_file_cache(page)) { /* * Identify referenced, file-backed active pages and * give them one more trip around the active list. So @@ -1342,10 +1354,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ - if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { - list_add(&page->lru, &l_active); - continue; - } + list_add(&page->lru, &l_active); + continue; } ClearPageActive(page); /* we are de-activating */ -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/