Here is a series of six KSM patches against 2.6.32-rc5-mm1, plus the six
earlier "mm: prepare for ksm swapping" patches (and the ksm cond_resched
patch in 2.6.32-rc6-git).
These are all internal housekeeping changes to ksm.c, to minimize the
actual, functional KSM-page swapping patches, coming in a few days.
include/linux/ksm.h | 24 +
mm/ksm.c | 540 +++++++++++++++++++-----------------------
2 files changed, 266 insertions(+), 298 deletions(-)
Hugh
1. remove_rmap_item_from_tree() is called as a precaution from
various places: don't dirty the rmap_item cacheline unnecessarily,
just mask the flags out of the address when they have been set.
2. First get_next_rmap_item() removes an unstable rmap_item from its tree,
then shortly afterwards cmp_and_merge_page() removes a stable rmap_item
from its tree: it's easier just to do both at once (but definitely keep
the BUG_ON(age > 1) which guards against a future omission).
3. When cmp_and_merge_page() moves an rmap_item from unstable to stable
tree, it does its own rb_erase() and accounting: that's better
expressed by remove_rmap_item_from_tree().
Signed-off-by: Hugh Dickins <[email protected]>
---
mm/ksm.c | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
--- ksm0/mm/ksm.c 2009-11-10 09:06:23.000000000 +0000
+++ ksm1/mm/ksm.c 2009-11-12 15:28:36.000000000 +0000
@@ -453,6 +453,7 @@ static void remove_rmap_item_from_tree(s
}
rmap_item->next = NULL;
+ rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & NODE_FLAG) {
unsigned char age;
@@ -467,11 +468,11 @@ static void remove_rmap_item_from_tree(s
BUG_ON(age > 1);
if (!age)
rb_erase(&rmap_item->node, &root_unstable_tree);
+
ksm_pages_unshared--;
+ rmap_item->address &= PAGE_MASK;
}
- rmap_item->address &= PAGE_MASK;
-
cond_resched(); /* we're called from many long loops */
}
@@ -1086,8 +1087,7 @@ static void cmp_and_merge_page(struct pa
unsigned int checksum;
int err;
- if (in_stable_tree(rmap_item))
- remove_rmap_item_from_tree(rmap_item);
+ remove_rmap_item_from_tree(rmap_item);
/* We first start with searching the page inside the stable tree */
tree_rmap_item = stable_tree_search(page, page2, rmap_item);
@@ -1143,9 +1143,7 @@ static void cmp_and_merge_page(struct pa
* tree, and insert it instead as new node in the stable tree.
*/
if (!err) {
- rb_erase(&tree_rmap_item->node, &root_unstable_tree);
- tree_rmap_item->address &= ~NODE_FLAG;
- ksm_pages_unshared--;
+ remove_rmap_item_from_tree(tree_rmap_item);
/*
* If we fail to insert the page into the stable tree,
@@ -1174,11 +1172,8 @@ static struct rmap_item *get_next_rmap_i
while (cur != &mm_slot->rmap_list) {
rmap_item = list_entry(cur, struct rmap_item, link);
- if ((rmap_item->address & PAGE_MASK) == addr) {
- if (!in_stable_tree(rmap_item))
- remove_rmap_item_from_tree(rmap_item);
+ if ((rmap_item->address & PAGE_MASK) == addr)
return rmap_item;
- }
if (rmap_item->address > addr)
break;
cur = cur->next;
There is no need for replace_page() to calculate a write-protected prot
vm_page_prot must already be write-protected for an anonymous page (see
mm/memory.c do_anonymous_page() for similar reliance on vm_page_prot).
There is no need for try_to_merge_one_page() to get_page and put_page
on newpage and oldpage: in every case we already hold a reference to
each of them.
But some instinct makes me move try_to_merge_one_page()'s unlock_page
of oldpage down after replace_page(): that doesn't increase contention
on the ksm page, and makes thinking about the transition easier.
Signed-off-by: Hugh Dickins <[email protected]>
---
mm/ksm.c | 26 ++++++--------------------
1 file changed, 6 insertions(+), 20 deletions(-)
--- ksm1/mm/ksm.c 2009-11-12 15:28:36.000000000 +0000
+++ ksm2/mm/ksm.c 2009-11-12 15:28:42.000000000 +0000
@@ -647,7 +647,7 @@ static int write_protect_page(struct vm_
* Check that no O_DIRECT or similar I/O is in progress on the
* page
*/
- if ((page_mapcount(page) + 2 + swapped) != page_count(page)) {
+ if (page_mapcount(page) + 1 + swapped != page_count(page)) {
set_pte_at_notify(mm, addr, ptep, entry);
goto out_unlock;
}
@@ -682,11 +682,8 @@ static int replace_page(struct vm_area_s
pte_t *ptep;
spinlock_t *ptl;
unsigned long addr;
- pgprot_t prot;
int err = -EFAULT;
- prot = vm_get_page_prot(vma->vm_flags & ~VM_WRITE);
-
addr = page_address_in_vma(oldpage, vma);
if (addr == -EFAULT)
goto out;
@@ -714,7 +711,7 @@ static int replace_page(struct vm_area_s
flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, prot));
+ set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, vma->vm_page_prot));
page_remove_rmap(oldpage);
put_page(oldpage);
@@ -746,13 +743,9 @@ static int try_to_merge_one_page(struct
if (!(vma->vm_flags & VM_MERGEABLE))
goto out;
-
if (!PageAnon(oldpage))
goto out;
- get_page(newpage);
- get_page(oldpage);
-
/*
* We need the page lock to read a stable PageSwapCache in
* write_protect_page(). We use trylock_page() instead of
@@ -761,25 +754,18 @@ static int try_to_merge_one_page(struct
* then come back to this page when it is unlocked.
*/
if (!trylock_page(oldpage))
- goto out_putpage;
+ goto out;
/*
* If this anonymous page is mapped only here, its pte may need
* to be write-protected. If it's mapped elsewhere, all of its
* ptes are necessarily already write-protected. But in either
* case, we need to lock and check page_count is not raised.
*/
- if (write_protect_page(vma, oldpage, &orig_pte)) {
- unlock_page(oldpage);
- goto out_putpage;
- }
- unlock_page(oldpage);
-
- if (pages_identical(oldpage, newpage))
+ if (write_protect_page(vma, oldpage, &orig_pte) == 0 &&
+ pages_identical(oldpage, newpage))
err = replace_page(vma, oldpage, newpage, orig_pte);
-out_putpage:
- put_page(oldpage);
- put_page(newpage);
+ unlock_page(oldpage);
out:
return err;
}
Cleanup: make argument names more consistent from cmp_and_merge_page()
down to replace_page(), so that it's easier to follow the rmap_item's
page and the matching tree_page and the merged kpage through that code.
In some places, e.g. break_cow(), pass rmap_item instead of separate
mm and address.
cmp_and_merge_page() initialize tree_page to NULL, to avoid a
"may be used uninitialized" warning seen in one config by Anil SB.
Signed-off-by: Hugh Dickins <[email protected]>
---
mm/ksm.c | 234 +++++++++++++++++++++++++----------------------------
1 file changed, 112 insertions(+), 122 deletions(-)
--- ksm2/mm/ksm.c 2009-11-12 15:28:42.000000000 +0000
+++ ksm3/mm/ksm.c 2009-11-12 15:28:47.000000000 +0000
@@ -356,8 +356,10 @@ static int break_ksm(struct vm_area_stru
return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
}
-static void break_cow(struct mm_struct *mm, unsigned long addr)
+static void break_cow(struct rmap_item *rmap_item)
{
+ struct mm_struct *mm = rmap_item->mm;
+ unsigned long addr = rmap_item->address;
struct vm_area_struct *vma;
down_read(&mm->mmap_sem);
@@ -665,15 +667,15 @@ out:
/**
* replace_page - replace page in vma by new ksm page
- * @vma: vma that holds the pte pointing to oldpage
- * @oldpage: the page we are replacing by newpage
- * @newpage: the ksm page we replace oldpage by
+ * @vma: vma that holds the pte pointing to page
+ * @page: the page we are replacing by kpage
+ * @kpage: the ksm page we replace page by
* @orig_pte: the original value of the pte
*
* Returns 0 on success, -EFAULT on failure.
*/
-static int replace_page(struct vm_area_struct *vma, struct page *oldpage,
- struct page *newpage, pte_t orig_pte)
+static int replace_page(struct vm_area_struct *vma, struct page *page,
+ struct page *kpage, pte_t orig_pte)
{
struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
@@ -684,7 +686,7 @@ static int replace_page(struct vm_area_s
unsigned long addr;
int err = -EFAULT;
- addr = page_address_in_vma(oldpage, vma);
+ addr = page_address_in_vma(page, vma);
if (addr == -EFAULT)
goto out;
@@ -706,15 +708,15 @@ static int replace_page(struct vm_area_s
goto out;
}
- get_page(newpage);
- page_add_ksm_rmap(newpage);
+ get_page(kpage);
+ page_add_ksm_rmap(kpage);
flush_cache_page(vma, addr, pte_pfn(*ptep));
ptep_clear_flush(vma, addr, ptep);
- set_pte_at_notify(mm, addr, ptep, mk_pte(newpage, vma->vm_page_prot));
+ set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
- page_remove_rmap(oldpage);
- put_page(oldpage);
+ page_remove_rmap(page);
+ put_page(page);
pte_unmap_unlock(ptep, ptl);
err = 0;
@@ -724,26 +726,22 @@ out:
/*
* try_to_merge_one_page - take two pages and merge them into one
- * @vma: the vma that hold the pte pointing into oldpage
- * @oldpage: the page that we want to replace with newpage
- * @newpage: the page that we want to map instead of oldpage
- *
- * Note:
- * oldpage should be a PageAnon page, while newpage should be a PageKsm page,
- * or a newly allocated kernel page which page_add_ksm_rmap will make PageKsm.
+ * @vma: the vma that holds the pte pointing to page
+ * @page: the PageAnon page that we want to replace with kpage
+ * @kpage: the PageKsm page (or newly allocated page which page_add_ksm_rmap
+ * will make PageKsm) that we want to map instead of page
*
* This function returns 0 if the pages were merged, -EFAULT otherwise.
*/
static int try_to_merge_one_page(struct vm_area_struct *vma,
- struct page *oldpage,
- struct page *newpage)
+ struct page *page, struct page *kpage)
{
pte_t orig_pte = __pte(0);
int err = -EFAULT;
if (!(vma->vm_flags & VM_MERGEABLE))
goto out;
- if (!PageAnon(oldpage))
+ if (!PageAnon(page))
goto out;
/*
@@ -753,7 +751,7 @@ static int try_to_merge_one_page(struct
* prefer to continue scanning and merging different pages,
* then come back to this page when it is unlocked.
*/
- if (!trylock_page(oldpage))
+ if (!trylock_page(page))
goto out;
/*
* If this anonymous page is mapped only here, its pte may need
@@ -761,11 +759,11 @@ static int try_to_merge_one_page(struct
* ptes are necessarily already write-protected. But in either
* case, we need to lock and check page_count is not raised.
*/
- if (write_protect_page(vma, oldpage, &orig_pte) == 0 &&
- pages_identical(oldpage, newpage))
- err = replace_page(vma, oldpage, newpage, orig_pte);
+ if (write_protect_page(vma, page, &orig_pte) == 0 &&
+ pages_identical(page, kpage))
+ err = replace_page(vma, page, kpage, orig_pte);
- unlock_page(oldpage);
+ unlock_page(page);
out:
return err;
}
@@ -773,26 +771,26 @@ out:
/*
* try_to_merge_with_ksm_page - like try_to_merge_two_pages,
* but no new kernel page is allocated: kpage must already be a ksm page.
+ *
+ * This function returns 0 if the pages were merged, -EFAULT otherwise.
*/
-static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
- unsigned long addr1,
- struct page *page1,
- struct page *kpage)
+static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
+ struct page *page, struct page *kpage)
{
+ struct mm_struct *mm = rmap_item->mm;
struct vm_area_struct *vma;
int err = -EFAULT;
- down_read(&mm1->mmap_sem);
- if (ksm_test_exit(mm1))
+ down_read(&mm->mmap_sem);
+ if (ksm_test_exit(mm))
goto out;
-
- vma = find_vma(mm1, addr1);
- if (!vma || vma->vm_start > addr1)
+ vma = find_vma(mm, rmap_item->address);
+ if (!vma || vma->vm_start > rmap_item->address)
goto out;
- err = try_to_merge_one_page(vma, page1, kpage);
+ err = try_to_merge_one_page(vma, page, kpage);
out:
- up_read(&mm1->mmap_sem);
+ up_read(&mm->mmap_sem);
return err;
}
@@ -800,16 +798,18 @@ out:
* try_to_merge_two_pages - take two identical pages and prepare them
* to be merged into one page.
*
- * This function returns 0 if we successfully mapped two identical pages
- * into one page, -EFAULT otherwise.
+ * This function returns the kpage if we successfully merged two identical
+ * pages into one ksm page, NULL otherwise.
*
* Note that this function allocates a new kernel page: if one of the pages
* is already a ksm page, try_to_merge_with_ksm_page should be used.
*/
-static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
- struct page *page1, struct mm_struct *mm2,
- unsigned long addr2, struct page *page2)
+static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
+ struct page *page,
+ struct rmap_item *tree_rmap_item,
+ struct page *tree_page)
{
+ struct mm_struct *mm = rmap_item->mm;
struct vm_area_struct *vma;
struct page *kpage;
int err = -EFAULT;
@@ -820,47 +820,43 @@ static int try_to_merge_two_pages(struct
*/
if (ksm_max_kernel_pages &&
ksm_max_kernel_pages <= ksm_pages_shared)
- return err;
+ return NULL;
kpage = alloc_page(GFP_HIGHUSER);
if (!kpage)
- return err;
-
- down_read(&mm1->mmap_sem);
- if (ksm_test_exit(mm1)) {
- up_read(&mm1->mmap_sem);
- goto out;
- }
- vma = find_vma(mm1, addr1);
- if (!vma || vma->vm_start > addr1) {
- up_read(&mm1->mmap_sem);
- goto out;
- }
+ return NULL;
- copy_user_highpage(kpage, page1, addr1, vma);
- err = try_to_merge_one_page(vma, page1, kpage);
- up_read(&mm1->mmap_sem);
+ down_read(&mm->mmap_sem);
+ if (ksm_test_exit(mm))
+ goto up;
+ vma = find_vma(mm, rmap_item->address);
+ if (!vma || vma->vm_start > rmap_item->address)
+ goto up;
+
+ copy_user_highpage(kpage, page, rmap_item->address, vma);
+ err = try_to_merge_one_page(vma, page, kpage);
+up:
+ up_read(&mm->mmap_sem);
if (!err) {
- err = try_to_merge_with_ksm_page(mm2, addr2, page2, kpage);
+ err = try_to_merge_with_ksm_page(tree_rmap_item,
+ tree_page, kpage);
/*
* If that fails, we have a ksm page with only one pte
* pointing to it: so break it.
*/
if (err)
- break_cow(mm1, addr1);
+ break_cow(rmap_item);
}
-out:
- put_page(kpage);
- return err;
+ if (err) {
+ put_page(kpage);
+ kpage = NULL;
+ }
+ return kpage;
}
/*
- * stable_tree_search - search page inside the stable tree
- * @page: the page that we are searching identical pages to.
- * @page2: pointer into identical page that we are holding inside the stable
- * tree that we have found.
- * @rmap_item: the reverse mapping item
+ * stable_tree_search - search for page inside the stable tree
*
* This function checks if there is a page inside the stable tree
* with identical content to the page that we are scanning right now.
@@ -869,21 +865,21 @@ out:
* NULL otherwise.
*/
static struct rmap_item *stable_tree_search(struct page *page,
- struct page **page2,
- struct rmap_item *rmap_item)
+ struct page **tree_pagep)
{
struct rb_node *node = root_stable_tree.rb_node;
while (node) {
struct rmap_item *tree_rmap_item, *next_rmap_item;
+ struct page *tree_page;
int ret;
tree_rmap_item = rb_entry(node, struct rmap_item, node);
while (tree_rmap_item) {
BUG_ON(!in_stable_tree(tree_rmap_item));
cond_resched();
- page2[0] = get_ksm_page(tree_rmap_item);
- if (page2[0])
+ tree_page = get_ksm_page(tree_rmap_item);
+ if (tree_page)
break;
next_rmap_item = tree_rmap_item->next;
remove_rmap_item_from_tree(tree_rmap_item);
@@ -892,15 +888,16 @@ static struct rmap_item *stable_tree_sea
if (!tree_rmap_item)
return NULL;
- ret = memcmp_pages(page, page2[0]);
+ ret = memcmp_pages(page, tree_page);
if (ret < 0) {
- put_page(page2[0]);
+ put_page(tree_page);
node = node->rb_left;
} else if (ret > 0) {
- put_page(page2[0]);
+ put_page(tree_page);
node = node->rb_right;
} else {
+ *tree_pagep = tree_page;
return tree_rmap_item;
}
}
@@ -912,13 +909,9 @@ static struct rmap_item *stable_tree_sea
* stable_tree_insert - insert rmap_item pointing to new ksm page
* into the stable tree.
*
- * @page: the page that we are searching identical page to inside the stable
- * tree.
- * @rmap_item: pointer to the reverse mapping item.
- *
* This function returns rmap_item if success, NULL otherwise.
*/
-static struct rmap_item *stable_tree_insert(struct page *page,
+static struct rmap_item *stable_tree_insert(struct page *kpage,
struct rmap_item *rmap_item)
{
struct rb_node **new = &root_stable_tree.rb_node;
@@ -943,7 +936,7 @@ static struct rmap_item *stable_tree_ins
if (!tree_rmap_item)
return NULL;
- ret = memcmp_pages(page, tree_page);
+ ret = memcmp_pages(kpage, tree_page);
put_page(tree_page);
parent = *new;
@@ -971,12 +964,8 @@ static struct rmap_item *stable_tree_ins
}
/*
- * unstable_tree_search_insert - search and insert items into the unstable tree.
- *
- * @page: the page that we are going to search for identical page or to insert
- * into the unstable tree
- * @page2: pointer into identical page that was found inside the unstable tree
- * @rmap_item: the reverse mapping item of page
+ * unstable_tree_search_insert - search for identical page,
+ * else insert rmap_item into the unstable tree.
*
* This function searches for a page in the unstable tree identical to the
* page currently being scanned; and if no identical page is found in the
@@ -988,42 +977,45 @@ static struct rmap_item *stable_tree_ins
* This function does both searching and inserting, because they share
* the same walking algorithm in an rbtree.
*/
-static struct rmap_item *unstable_tree_search_insert(struct page *page,
- struct page **page2,
- struct rmap_item *rmap_item)
+static
+struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
+ struct page *page,
+ struct page **tree_pagep)
+
{
struct rb_node **new = &root_unstable_tree.rb_node;
struct rb_node *parent = NULL;
while (*new) {
struct rmap_item *tree_rmap_item;
+ struct page *tree_page;
int ret;
cond_resched();
tree_rmap_item = rb_entry(*new, struct rmap_item, node);
- page2[0] = get_mergeable_page(tree_rmap_item);
- if (!page2[0])
+ tree_page = get_mergeable_page(tree_rmap_item);
+ if (!tree_page)
return NULL;
/*
- * Don't substitute an unswappable ksm page
- * just for one good swappable forked page.
+ * Don't substitute a ksm page for a forked page.
*/
- if (page == page2[0]) {
- put_page(page2[0]);
+ if (page == tree_page) {
+ put_page(tree_page);
return NULL;
}
- ret = memcmp_pages(page, page2[0]);
+ ret = memcmp_pages(page, tree_page);
parent = *new;
if (ret < 0) {
- put_page(page2[0]);
+ put_page(tree_page);
new = &parent->rb_left;
} else if (ret > 0) {
- put_page(page2[0]);
+ put_page(tree_page);
new = &parent->rb_right;
} else {
+ *tree_pagep = tree_page;
return tree_rmap_item;
}
}
@@ -1068,24 +1060,23 @@ static void stable_tree_append(struct rm
*/
static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
{
- struct page *page2[1];
struct rmap_item *tree_rmap_item;
+ struct page *tree_page = NULL;
+ struct page *kpage;
unsigned int checksum;
int err;
remove_rmap_item_from_tree(rmap_item);
/* We first start with searching the page inside the stable tree */
- tree_rmap_item = stable_tree_search(page, page2, rmap_item);
+ tree_rmap_item = stable_tree_search(page, &tree_page);
if (tree_rmap_item) {
- if (page == page2[0]) /* forked */
+ kpage = tree_page;
+ if (page == kpage) /* forked */
err = 0;
else
- err = try_to_merge_with_ksm_page(rmap_item->mm,
- rmap_item->address,
- page, page2[0]);
- put_page(page2[0]);
-
+ err = try_to_merge_with_ksm_page(rmap_item,
+ page, kpage);
if (!err) {
/*
* The page was successfully merged:
@@ -1093,6 +1084,7 @@ static void cmp_and_merge_page(struct pa
*/
stable_tree_append(rmap_item, tree_rmap_item);
}
+ put_page(kpage);
return;
}
@@ -1103,7 +1095,7 @@ static void cmp_and_merge_page(struct pa
* when the mem_cgroup had reached its limit: try again now.
*/
if (PageKsm(page))
- break_cow(rmap_item->mm, rmap_item->address);
+ break_cow(rmap_item);
/*
* In case the hash value of the page was changed from the last time we
@@ -1117,18 +1109,18 @@ static void cmp_and_merge_page(struct pa
return;
}
- tree_rmap_item = unstable_tree_search_insert(page, page2, rmap_item);
+ tree_rmap_item =
+ unstable_tree_search_insert(rmap_item, page, &tree_page);
if (tree_rmap_item) {
- err = try_to_merge_two_pages(rmap_item->mm,
- rmap_item->address, page,
- tree_rmap_item->mm,
- tree_rmap_item->address, page2[0]);
+ kpage = try_to_merge_two_pages(rmap_item, page,
+ tree_rmap_item, tree_page);
+ put_page(tree_page);
/*
* As soon as we merge this page, we want to remove the
* rmap_item of the page we have merged with from the unstable
* tree, and insert it instead as new node in the stable tree.
*/
- if (!err) {
+ if (kpage) {
remove_rmap_item_from_tree(tree_rmap_item);
/*
@@ -1137,16 +1129,14 @@ static void cmp_and_merge_page(struct pa
* to a ksm page left outside the stable tree,
* in which case we need to break_cow on both.
*/
- if (stable_tree_insert(page2[0], tree_rmap_item))
+ if (stable_tree_insert(kpage, tree_rmap_item))
stable_tree_append(rmap_item, tree_rmap_item);
else {
- break_cow(tree_rmap_item->mm,
- tree_rmap_item->address);
- break_cow(rmap_item->mm, rmap_item->address);
+ break_cow(tree_rmap_item);
+ break_cow(rmap_item);
}
+ put_page(kpage);
}
-
- put_page(page2[0]);
}
}
@@ -1308,7 +1298,7 @@ static void ksm_do_scan(unsigned int sca
/*
* Replace now-unshared ksm page by ordinary page.
*/
- break_cow(rmap_item->mm, rmap_item->address);
+ break_cow(rmap_item);
remove_rmap_item_from_tree(rmap_item);
rmap_item->oldchecksum = calc_checksum(page);
}
Free up a pointer in struct rmap_item, by making the mm_slot's rmap_list
a singly-linked list: we always traverse that list sequentially, and we
don't even lose any prefetches (but should consider adding a few later).
Name it rmap_list throughout.
Do we need to free up that pointer? Not immediately, and in the end, we
could continue to avoid it with a union; but having done the conversion,
let's keep it this way, since there's no downside, and maybe we'll want
more in future (struct rmap_item is a cache-friendly 32 bytes on 32-bit
and 64 bytes on 64-bit, so we shall want to avoid expanding it).
Signed-off-by: Hugh Dickins <[email protected]>
---
mm/ksm.c | 56 ++++++++++++++++++++++++-----------------------------
1 file changed, 26 insertions(+), 30 deletions(-)
--- ksm3/mm/ksm.c 2009-11-12 15:28:47.000000000 +0000
+++ ksm4/mm/ksm.c 2009-11-12 15:28:54.000000000 +0000
@@ -79,13 +79,13 @@
* struct mm_slot - ksm information per mm that is being scanned
* @link: link to the mm_slots hash list
* @mm_list: link into the mm_slots list, rooted in ksm_mm_head
- * @rmap_list: head for this mm_slot's list of rmap_items
+ * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
* @mm: the mm that this information is valid for
*/
struct mm_slot {
struct hlist_node link;
struct list_head mm_list;
- struct list_head rmap_list;
+ struct rmap_item *rmap_list;
struct mm_struct *mm;
};
@@ -93,7 +93,7 @@ struct mm_slot {
* struct ksm_scan - cursor for scanning
* @mm_slot: the current mm_slot we are scanning
* @address: the next address inside that to be scanned
- * @rmap_item: the current rmap that we are scanning inside the rmap_list
+ * @rmap_list: link to the next rmap to be scanned in the rmap_list
* @seqnr: count of completed full scans (needed when removing unstable node)
*
* There is only the one ksm_scan instance of this cursor structure.
@@ -101,13 +101,14 @@ struct mm_slot {
struct ksm_scan {
struct mm_slot *mm_slot;
unsigned long address;
- struct rmap_item *rmap_item;
+ struct rmap_item **rmap_list;
unsigned long seqnr;
};
/**
* struct rmap_item - reverse mapping item for virtual addresses
- * @link: link into mm_slot's rmap_list (rmap_list is per mm)
+ * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
+ * @filler: unused space we're making available in this patch
* @mm: the memory structure this rmap_item is pointing into
* @address: the virtual address this rmap_item tracks (+ flags in low bits)
* @oldchecksum: previous checksum of the page at that virtual address
@@ -116,7 +117,8 @@ struct ksm_scan {
* @prev: previous rmap_item hanging off the same node of the stable tree
*/
struct rmap_item {
- struct list_head link;
+ struct rmap_item *rmap_list;
+ unsigned long filler;
struct mm_struct *mm;
unsigned long address; /* + low bits used for flags below */
union {
@@ -275,7 +277,6 @@ static void insert_to_mm_slots_hash(stru
bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
% MM_SLOTS_HASH_HEADS];
mm_slot->mm = mm;
- INIT_LIST_HEAD(&mm_slot->rmap_list);
hlist_add_head(&mm_slot->link, bucket);
}
@@ -479,15 +480,12 @@ static void remove_rmap_item_from_tree(s
}
static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
- struct list_head *cur)
+ struct rmap_item **rmap_list)
{
- struct rmap_item *rmap_item;
-
- while (cur != &mm_slot->rmap_list) {
- rmap_item = list_entry(cur, struct rmap_item, link);
- cur = cur->next;
+ while (*rmap_list) {
+ struct rmap_item *rmap_item = *rmap_list;
+ *rmap_list = rmap_item->rmap_list;
remove_rmap_item_from_tree(rmap_item);
- list_del(&rmap_item->link);
free_rmap_item(rmap_item);
}
}
@@ -553,7 +551,7 @@ static int unmerge_and_remove_all_rmap_i
goto error;
}
- remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
+ remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
spin_lock(&ksm_mmlist_lock);
ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
@@ -1141,20 +1139,19 @@ static void cmp_and_merge_page(struct pa
}
static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
- struct list_head *cur,
+ struct rmap_item **rmap_list,
unsigned long addr)
{
struct rmap_item *rmap_item;
- while (cur != &mm_slot->rmap_list) {
- rmap_item = list_entry(cur, struct rmap_item, link);
+ while (*rmap_list) {
+ rmap_item = *rmap_list;
if ((rmap_item->address & PAGE_MASK) == addr)
return rmap_item;
if (rmap_item->address > addr)
break;
- cur = cur->next;
+ *rmap_list = rmap_item->rmap_list;
remove_rmap_item_from_tree(rmap_item);
- list_del(&rmap_item->link);
free_rmap_item(rmap_item);
}
@@ -1163,7 +1160,8 @@ static struct rmap_item *get_next_rmap_i
/* It has already been zeroed */
rmap_item->mm = mm_slot->mm;
rmap_item->address = addr;
- list_add_tail(&rmap_item->link, cur);
+ rmap_item->rmap_list = *rmap_list;
+ *rmap_list = rmap_item;
}
return rmap_item;
}
@@ -1188,8 +1186,7 @@ static struct rmap_item *scan_get_next_r
spin_unlock(&ksm_mmlist_lock);
next_mm:
ksm_scan.address = 0;
- ksm_scan.rmap_item = list_entry(&slot->rmap_list,
- struct rmap_item, link);
+ ksm_scan.rmap_list = &slot->rmap_list;
}
mm = slot->mm;
@@ -1215,10 +1212,10 @@ next_mm:
flush_anon_page(vma, *page, ksm_scan.address);
flush_dcache_page(*page);
rmap_item = get_next_rmap_item(slot,
- ksm_scan.rmap_item->link.next,
- ksm_scan.address);
+ ksm_scan.rmap_list, ksm_scan.address);
if (rmap_item) {
- ksm_scan.rmap_item = rmap_item;
+ ksm_scan.rmap_list =
+ &rmap_item->rmap_list;
ksm_scan.address += PAGE_SIZE;
} else
put_page(*page);
@@ -1234,14 +1231,13 @@ next_mm:
if (ksm_test_exit(mm)) {
ksm_scan.address = 0;
- ksm_scan.rmap_item = list_entry(&slot->rmap_list,
- struct rmap_item, link);
+ ksm_scan.rmap_list = &slot->rmap_list;
}
/*
* Nuke all the rmap_items that are above this current rmap:
* because there were no VM_MERGEABLE vmas with such addresses.
*/
- remove_trailing_rmap_items(slot, ksm_scan.rmap_item->link.next);
+ remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
spin_lock(&ksm_mmlist_lock);
ksm_scan.mm_slot = list_entry(slot->mm_list.next,
@@ -1423,7 +1419,7 @@ void __ksm_exit(struct mm_struct *mm)
spin_lock(&ksm_mmlist_lock);
mm_slot = get_mm_slot(mm);
if (mm_slot && ksm_scan.mm_slot != mm_slot) {
- if (list_empty(&mm_slot->rmap_list)) {
+ if (!mm_slot->rmap_list) {
hlist_del(&mm_slot->link);
list_del(&mm_slot->mm_list);
easy_to_free = 1;
Though we still do well to keep rmap_items in the unstable tree without
a separate tree_item at the node, for several reasons it becomes awkward
to keep rmap_items in the stable tree without a separate stable_node:
lack of space in the nicely-sized rmap_item, the need for an anchor as
rmap_items are removed, the need for a node even when temporarily no
rmap_items are attached to it.
So declare struct stable_node (rb_node to place it in the tree and
hlist_head for the rmap_items hanging off it), and convert stable tree
handling to use it: without yet taking advantage of it. Note how one
stable_tree_insert() of a node now has _two_ stable_tree_append()s of
the two rmap_items being merged.
Signed-off-by: Hugh Dickins <[email protected]>
---
mm/ksm.c | 180 +++++++++++++++++++++++++++++------------------------
1 file changed, 101 insertions(+), 79 deletions(-)
--- ksm4/mm/ksm.c 2009-11-12 15:28:54.000000000 +0000
+++ ksm5/mm/ksm.c 2009-11-12 15:29:00.000000000 +0000
@@ -106,34 +106,44 @@ struct ksm_scan {
};
/**
+ * struct stable_node - node of the stable rbtree
+ * @node: rb node of this ksm page in the stable tree
+ * @hlist: hlist head of rmap_items using this ksm page
+ */
+struct stable_node {
+ struct rb_node node;
+ struct hlist_head hlist;
+};
+
+/**
* struct rmap_item - reverse mapping item for virtual addresses
* @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list
* @filler: unused space we're making available in this patch
* @mm: the memory structure this rmap_item is pointing into
* @address: the virtual address this rmap_item tracks (+ flags in low bits)
* @oldchecksum: previous checksum of the page at that virtual address
- * @node: rb_node of this rmap_item in either unstable or stable tree
- * @next: next rmap_item hanging off the same node of the stable tree
- * @prev: previous rmap_item hanging off the same node of the stable tree
+ * @node: rb node of this rmap_item in the unstable tree
+ * @head: pointer to stable_node heading this list in the stable tree
+ * @hlist: link into hlist of rmap_items hanging off that stable_node
*/
struct rmap_item {
struct rmap_item *rmap_list;
unsigned long filler;
struct mm_struct *mm;
unsigned long address; /* + low bits used for flags below */
+ unsigned int oldchecksum; /* when unstable */
union {
- unsigned int oldchecksum; /* when unstable */
- struct rmap_item *next; /* when stable */
- };
- union {
- struct rb_node node; /* when tree node */
- struct rmap_item *prev; /* in stable list */
+ struct rb_node node; /* when node of unstable tree */
+ struct { /* when listed from stable tree */
+ struct stable_node *head;
+ struct hlist_node hlist;
+ };
};
};
#define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */
-#define NODE_FLAG 0x100 /* is a node of unstable or stable tree */
-#define STABLE_FLAG 0x200 /* is a node or list item of stable tree */
+#define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */
+#define STABLE_FLAG 0x200 /* is listed from the stable tree */
/* The stable and unstable tree heads */
static struct rb_root root_stable_tree = RB_ROOT;
@@ -150,6 +160,7 @@ static struct ksm_scan ksm_scan = {
};
static struct kmem_cache *rmap_item_cache;
+static struct kmem_cache *stable_node_cache;
static struct kmem_cache *mm_slot_cache;
/* The number of nodes in the stable tree */
@@ -192,13 +203,19 @@ static int __init ksm_slab_init(void)
if (!rmap_item_cache)
goto out;
+ stable_node_cache = KSM_KMEM_CACHE(stable_node, 0);
+ if (!stable_node_cache)
+ goto out_free1;
+
mm_slot_cache = KSM_KMEM_CACHE(mm_slot, 0);
if (!mm_slot_cache)
- goto out_free;
+ goto out_free2;
return 0;
-out_free:
+out_free2:
+ kmem_cache_destroy(stable_node_cache);
+out_free1:
kmem_cache_destroy(rmap_item_cache);
out:
return -ENOMEM;
@@ -207,6 +224,7 @@ out:
static void __init ksm_slab_free(void)
{
kmem_cache_destroy(mm_slot_cache);
+ kmem_cache_destroy(stable_node_cache);
kmem_cache_destroy(rmap_item_cache);
mm_slot_cache = NULL;
}
@@ -228,6 +246,16 @@ static inline void free_rmap_item(struct
kmem_cache_free(rmap_item_cache, rmap_item);
}
+static inline struct stable_node *alloc_stable_node(void)
+{
+ return kmem_cache_alloc(stable_node_cache, GFP_KERNEL);
+}
+
+static inline void free_stable_node(struct stable_node *stable_node)
+{
+ kmem_cache_free(stable_node_cache, stable_node);
+}
+
static inline struct mm_slot *alloc_mm_slot(void)
{
if (!mm_slot_cache) /* initialization failed */
@@ -429,36 +457,22 @@ static struct page *get_ksm_page(struct
*/
static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
{
- if (in_stable_tree(rmap_item)) {
- struct rmap_item *next_item = rmap_item->next;
+ if (rmap_item->address & STABLE_FLAG) {
+ struct stable_node *stable_node;
- if (rmap_item->address & NODE_FLAG) {
- if (next_item) {
- rb_replace_node(&rmap_item->node,
- &next_item->node,
- &root_stable_tree);
- next_item->address |= NODE_FLAG;
- ksm_pages_sharing--;
- } else {
- rb_erase(&rmap_item->node, &root_stable_tree);
- ksm_pages_shared--;
- }
- } else {
- struct rmap_item *prev_item = rmap_item->prev;
-
- BUG_ON(prev_item->next != rmap_item);
- prev_item->next = next_item;
- if (next_item) {
- BUG_ON(next_item->prev != rmap_item);
- next_item->prev = rmap_item->prev;
- }
+ stable_node = rmap_item->head;
+ hlist_del(&rmap_item->hlist);
+ if (stable_node->hlist.first)
ksm_pages_sharing--;
+ else {
+ rb_erase(&stable_node->node, &root_stable_tree);
+ free_stable_node(stable_node);
+ ksm_pages_shared--;
}
- rmap_item->next = NULL;
rmap_item->address &= PAGE_MASK;
- } else if (rmap_item->address & NODE_FLAG) {
+ } else if (rmap_item->address & UNSTABLE_FLAG) {
unsigned char age;
/*
* Usually ksmd can and must skip the rb_erase, because
@@ -859,31 +873,32 @@ up:
* This function checks if there is a page inside the stable tree
* with identical content to the page that we are scanning right now.
*
- * This function return rmap_item pointer to the identical item if found,
+ * This function returns the stable tree node of identical content if found,
* NULL otherwise.
*/
-static struct rmap_item *stable_tree_search(struct page *page,
- struct page **tree_pagep)
+static struct stable_node *stable_tree_search(struct page *page,
+ struct page **tree_pagep)
{
struct rb_node *node = root_stable_tree.rb_node;
+ struct stable_node *stable_node;
while (node) {
- struct rmap_item *tree_rmap_item, *next_rmap_item;
+ struct hlist_node *hlist, *hnext;
+ struct rmap_item *tree_rmap_item;
struct page *tree_page;
int ret;
- tree_rmap_item = rb_entry(node, struct rmap_item, node);
- while (tree_rmap_item) {
+ stable_node = rb_entry(node, struct stable_node, node);
+ hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
+ &stable_node->hlist, hlist) {
BUG_ON(!in_stable_tree(tree_rmap_item));
cond_resched();
tree_page = get_ksm_page(tree_rmap_item);
if (tree_page)
break;
- next_rmap_item = tree_rmap_item->next;
remove_rmap_item_from_tree(tree_rmap_item);
- tree_rmap_item = next_rmap_item;
}
- if (!tree_rmap_item)
+ if (!hlist)
return NULL;
ret = memcmp_pages(page, tree_page);
@@ -896,7 +911,7 @@ static struct rmap_item *stable_tree_sea
node = node->rb_right;
} else {
*tree_pagep = tree_page;
- return tree_rmap_item;
+ return stable_node;
}
}
@@ -907,31 +922,32 @@ static struct rmap_item *stable_tree_sea
* stable_tree_insert - insert rmap_item pointing to new ksm page
* into the stable tree.
*
- * This function returns rmap_item if success, NULL otherwise.
+ * This function returns the stable tree node just allocated on success,
+ * NULL otherwise.
*/
-static struct rmap_item *stable_tree_insert(struct page *kpage,
- struct rmap_item *rmap_item)
+static struct stable_node *stable_tree_insert(struct page *kpage)
{
struct rb_node **new = &root_stable_tree.rb_node;
struct rb_node *parent = NULL;
+ struct stable_node *stable_node;
while (*new) {
- struct rmap_item *tree_rmap_item, *next_rmap_item;
+ struct hlist_node *hlist, *hnext;
+ struct rmap_item *tree_rmap_item;
struct page *tree_page;
int ret;
- tree_rmap_item = rb_entry(*new, struct rmap_item, node);
- while (tree_rmap_item) {
+ stable_node = rb_entry(*new, struct stable_node, node);
+ hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
+ &stable_node->hlist, hlist) {
BUG_ON(!in_stable_tree(tree_rmap_item));
cond_resched();
tree_page = get_ksm_page(tree_rmap_item);
if (tree_page)
break;
- next_rmap_item = tree_rmap_item->next;
remove_rmap_item_from_tree(tree_rmap_item);
- tree_rmap_item = next_rmap_item;
}
- if (!tree_rmap_item)
+ if (!hlist)
return NULL;
ret = memcmp_pages(kpage, tree_page);
@@ -952,13 +968,16 @@ static struct rmap_item *stable_tree_ins
}
}
- rmap_item->address |= NODE_FLAG | STABLE_FLAG;
- rmap_item->next = NULL;
- rb_link_node(&rmap_item->node, parent, new);
- rb_insert_color(&rmap_item->node, &root_stable_tree);
+ stable_node = alloc_stable_node();
+ if (!stable_node)
+ return NULL;
- ksm_pages_shared++;
- return rmap_item;
+ rb_link_node(&stable_node->node, parent, new);
+ rb_insert_color(&stable_node->node, &root_stable_tree);
+
+ INIT_HLIST_HEAD(&stable_node->hlist);
+
+ return stable_node;
}
/*
@@ -1018,7 +1037,7 @@ struct rmap_item *unstable_tree_search_i
}
}
- rmap_item->address |= NODE_FLAG;
+ rmap_item->address |= UNSTABLE_FLAG;
rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK);
rb_link_node(&rmap_item->node, parent, new);
rb_insert_color(&rmap_item->node, &root_unstable_tree);
@@ -1033,18 +1052,16 @@ struct rmap_item *unstable_tree_search_i
* the same ksm page.
*/
static void stable_tree_append(struct rmap_item *rmap_item,
- struct rmap_item *tree_rmap_item)
+ struct stable_node *stable_node)
{
- rmap_item->next = tree_rmap_item->next;
- rmap_item->prev = tree_rmap_item;
-
- if (tree_rmap_item->next)
- tree_rmap_item->next->prev = rmap_item;
-
- tree_rmap_item->next = rmap_item;
+ rmap_item->head = stable_node;
rmap_item->address |= STABLE_FLAG;
+ hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
- ksm_pages_sharing++;
+ if (rmap_item->hlist.next)
+ ksm_pages_sharing++;
+ else
+ ksm_pages_shared++;
}
/*
@@ -1060,6 +1077,7 @@ static void cmp_and_merge_page(struct pa
{
struct rmap_item *tree_rmap_item;
struct page *tree_page = NULL;
+ struct stable_node *stable_node;
struct page *kpage;
unsigned int checksum;
int err;
@@ -1067,8 +1085,8 @@ static void cmp_and_merge_page(struct pa
remove_rmap_item_from_tree(rmap_item);
/* We first start with searching the page inside the stable tree */
- tree_rmap_item = stable_tree_search(page, &tree_page);
- if (tree_rmap_item) {
+ stable_node = stable_tree_search(page, &tree_page);
+ if (stable_node) {
kpage = tree_page;
if (page == kpage) /* forked */
err = 0;
@@ -1080,7 +1098,7 @@ static void cmp_and_merge_page(struct pa
* The page was successfully merged:
* add its rmap_item to the stable tree.
*/
- stable_tree_append(rmap_item, tree_rmap_item);
+ stable_tree_append(rmap_item, stable_node);
}
put_page(kpage);
return;
@@ -1121,19 +1139,23 @@ static void cmp_and_merge_page(struct pa
if (kpage) {
remove_rmap_item_from_tree(tree_rmap_item);
+ stable_node = stable_tree_insert(kpage);
+ if (stable_node) {
+ stable_tree_append(tree_rmap_item, stable_node);
+ stable_tree_append(rmap_item, stable_node);
+ }
+ put_page(kpage);
+
/*
* If we fail to insert the page into the stable tree,
* we will have 2 virtual addresses that are pointing
* to a ksm page left outside the stable tree,
* in which case we need to break_cow on both.
*/
- if (stable_tree_insert(kpage, tree_rmap_item))
- stable_tree_append(rmap_item, tree_rmap_item);
- else {
+ if (!stable_node) {
break_cow(tree_rmap_item);
break_cow(rmap_item);
}
- put_page(kpage);
}
}
}
Add a pointer to the ksm page into struct stable_node, holding a
reference to the page while the node exists. Put a pointer to the
stable_node into the ksm page's ->mapping.
Then we don't need get_ksm_page() while traversing the stable tree:
the page to compare against is sure to be present and correct, even
if it's no longer visible through any of its existing rmap_items.
And we can handle the forked ksm page case more efficiently:
no need to memcmp our way through the tree to find its match.
Signed-off-by: Hugh Dickins <[email protected]>
---
include/linux/ksm.h | 24 +++++++---
mm/ksm.c | 99 ++++++++++++++----------------------------
2 files changed, 51 insertions(+), 72 deletions(-)
--- ksm5/include/linux/ksm.h 2009-11-04 10:52:45.000000000 +0000
+++ ksm6/include/linux/ksm.h 2009-11-12 15:29:06.000000000 +0000
@@ -12,6 +12,8 @@
#include <linux/sched.h>
#include <linux/vmstat.h>
+struct stable_node;
+
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
@@ -34,7 +36,8 @@ static inline void ksm_exit(struct mm_st
/*
* A KSM page is one of those write-protected "shared pages" or "merged pages"
* which KSM maps into multiple mms, wherever identical anonymous page content
- * is found in VM_MERGEABLE vmas. It's a PageAnon page, with NULL anon_vma.
+ * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any
+ * anon_vma, but to that page's node of the stable tree.
*/
static inline int PageKsm(struct page *page)
{
@@ -42,15 +45,22 @@ static inline int PageKsm(struct page *p
(PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
}
-/*
- * But we have to avoid the checking which page_add_anon_rmap() performs.
- */
+static inline struct stable_node *page_stable_node(struct page *page)
+{
+ return PageKsm(page) ? page_rmapping(page) : NULL;
+}
+
+static inline void set_page_stable_node(struct page *page,
+ struct stable_node *stable_node)
+{
+ page->mapping = (void *)stable_node +
+ (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+}
+
static inline void page_add_ksm_rmap(struct page *page)
{
- if (atomic_inc_and_test(&page->_mapcount)) {
- page->mapping = (void *) (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
+ if (atomic_inc_and_test(&page->_mapcount))
__inc_zone_page_state(page, NR_ANON_PAGES);
- }
}
#else /* !CONFIG_KSM */
--- ksm5/mm/ksm.c 2009-11-12 15:29:00.000000000 +0000
+++ ksm6/mm/ksm.c 2009-11-12 15:29:06.000000000 +0000
@@ -107,10 +107,12 @@ struct ksm_scan {
/**
* struct stable_node - node of the stable rbtree
+ * @page: pointer to struct page of the ksm page
* @node: rb node of this ksm page in the stable tree
* @hlist: hlist head of rmap_items using this ksm page
*/
struct stable_node {
+ struct page *page;
struct rb_node node;
struct hlist_head hlist;
};
@@ -435,23 +437,6 @@ out: page = NULL;
}
/*
- * get_ksm_page: checks if the page at the virtual address in rmap_item
- * is still PageKsm, in which case we can trust the content of the page,
- * and it returns the gotten page; but NULL if the page has been zapped.
- */
-static struct page *get_ksm_page(struct rmap_item *rmap_item)
-{
- struct page *page;
-
- page = get_mergeable_page(rmap_item);
- if (page && !PageKsm(page)) {
- put_page(page);
- page = NULL;
- }
- return page;
-}
-
-/*
* Removing rmap_item from stable or unstable tree.
* This function will clean the information from the stable/unstable tree.
*/
@@ -465,6 +450,9 @@ static void remove_rmap_item_from_tree(s
if (stable_node->hlist.first)
ksm_pages_sharing--;
else {
+ set_page_stable_node(stable_node->page, NULL);
+ put_page(stable_node->page);
+
rb_erase(&stable_node->node, &root_stable_tree);
free_stable_node(stable_node);
ksm_pages_shared--;
@@ -740,8 +728,7 @@ out:
* try_to_merge_one_page - take two pages and merge them into one
* @vma: the vma that holds the pte pointing to page
* @page: the PageAnon page that we want to replace with kpage
- * @kpage: the PageKsm page (or newly allocated page which page_add_ksm_rmap
- * will make PageKsm) that we want to map instead of page
+ * @kpage: the PageKsm page that we want to map instead of page
*
* This function returns 0 if the pages were merged, -EFAULT otherwise.
*/
@@ -793,6 +780,9 @@ static int try_to_merge_with_ksm_page(st
struct vm_area_struct *vma;
int err = -EFAULT;
+ if (page == kpage) /* ksm page forked */
+ return 0;
+
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
goto out;
@@ -846,6 +836,9 @@ static struct page *try_to_merge_two_pag
goto up;
copy_user_highpage(kpage, page, rmap_item->address, vma);
+
+ set_page_stable_node(kpage, NULL); /* mark it PageKsm */
+
err = try_to_merge_one_page(vma, page, kpage);
up:
up_read(&mm->mmap_sem);
@@ -876,41 +869,31 @@ up:
* This function returns the stable tree node of identical content if found,
* NULL otherwise.
*/
-static struct stable_node *stable_tree_search(struct page *page,
- struct page **tree_pagep)
+static struct stable_node *stable_tree_search(struct page *page)
{
struct rb_node *node = root_stable_tree.rb_node;
struct stable_node *stable_node;
+ stable_node = page_stable_node(page);
+ if (stable_node) { /* ksm page forked */
+ get_page(page);
+ return stable_node;
+ }
+
while (node) {
- struct hlist_node *hlist, *hnext;
- struct rmap_item *tree_rmap_item;
- struct page *tree_page;
int ret;
+ cond_resched();
stable_node = rb_entry(node, struct stable_node, node);
- hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
- &stable_node->hlist, hlist) {
- BUG_ON(!in_stable_tree(tree_rmap_item));
- cond_resched();
- tree_page = get_ksm_page(tree_rmap_item);
- if (tree_page)
- break;
- remove_rmap_item_from_tree(tree_rmap_item);
- }
- if (!hlist)
- return NULL;
- ret = memcmp_pages(page, tree_page);
+ ret = memcmp_pages(page, stable_node->page);
- if (ret < 0) {
- put_page(tree_page);
+ if (ret < 0)
node = node->rb_left;
- } else if (ret > 0) {
- put_page(tree_page);
+ else if (ret > 0)
node = node->rb_right;
- } else {
- *tree_pagep = tree_page;
+ else {
+ get_page(stable_node->page);
return stable_node;
}
}
@@ -932,26 +915,12 @@ static struct stable_node *stable_tree_i
struct stable_node *stable_node;
while (*new) {
- struct hlist_node *hlist, *hnext;
- struct rmap_item *tree_rmap_item;
- struct page *tree_page;
int ret;
+ cond_resched();
stable_node = rb_entry(*new, struct stable_node, node);
- hlist_for_each_entry_safe(tree_rmap_item, hlist, hnext,
- &stable_node->hlist, hlist) {
- BUG_ON(!in_stable_tree(tree_rmap_item));
- cond_resched();
- tree_page = get_ksm_page(tree_rmap_item);
- if (tree_page)
- break;
- remove_rmap_item_from_tree(tree_rmap_item);
- }
- if (!hlist)
- return NULL;
- ret = memcmp_pages(kpage, tree_page);
- put_page(tree_page);
+ ret = memcmp_pages(kpage, stable_node->page);
parent = *new;
if (ret < 0)
@@ -977,6 +946,10 @@ static struct stable_node *stable_tree_i
INIT_HLIST_HEAD(&stable_node->hlist);
+ get_page(kpage);
+ stable_node->page = kpage;
+ set_page_stable_node(kpage, stable_node);
+
return stable_node;
}
@@ -1085,14 +1058,10 @@ static void cmp_and_merge_page(struct pa
remove_rmap_item_from_tree(rmap_item);
/* We first start with searching the page inside the stable tree */
- stable_node = stable_tree_search(page, &tree_page);
+ stable_node = stable_tree_search(page);
if (stable_node) {
- kpage = tree_page;
- if (page == kpage) /* forked */
- err = 0;
- else
- err = try_to_merge_with_ksm_page(rmap_item,
- page, kpage);
+ kpage = stable_node->page;
+ err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
if (!err) {
/*
* The page was successfully merged: