From: Martin Schwidefsky <[email protected]>
From: Hubertus Franke <[email protected]>
From: Himanshu Raj <[email protected]>
[patch 8/9] Guest page hinting: discarded page list.
The discarded page list is used to postpone the freeing of discarded
pages. The PG_discarded is set by either __remove_from_page_cache,
__delete_from_swap_cache or the discard fault handler for pages that
have been removed by the host. free_hot_cold_page test for the bit
and puts the page to a per-cpu discarded page list if it is set.
try_to_free_pages does an smp_call_function to collect all the
partial discarded page lists and frees them.
There are two reasons why this is desirable. First, discarded page are
really cold. Before the guest can reaccess the page frame the host
needs to provide a fresh page. It is faster to use only non-discarded
pages which do not require a host action as long as the working set
of the guest allows it.
The second reason has to do with the peculiars of the s390 architecture.
The discard fault exception delivers the absolute address of the page
that caused the fault to the guest instead of the virtual address. With
the virtual address we could have used the page table entry of the
current process to safely get a reference to the discarded page. We can
get to the struct page from the absolute page address but it is rather
hard to get to a proper page reference. The page that caused the fault
could already have been freed and reused for a different purpose. None
of the fields in the struct page would be reliable to use. The discard
list and the call of smp_call_function before freeing discarded pages
makes sure that the discard fault handler is reached only for pages that
have not been freed yet. A call to get_page_unless_zero can then be used
to get a proper page reference.
Signed-off-by: Martin Schwidefsky <[email protected]>
---
include/linux/page-states.h | 4 +++
mm/page_alloc.c | 56 ++++++++++++++++++++++++++++++++++++++++++++
mm/vmscan.c | 3 ++
3 files changed, 63 insertions(+)
diff -urpN linux-2.6/include/linux/page-states.h linux-2.6-patched/include/linux/page-states.h
--- linux-2.6/include/linux/page-states.h 2006-09-01 12:50:25.000000000 +0200
+++ linux-2.6-patched/include/linux/page-states.h 2006-09-01 12:50:25.000000000 +0200
@@ -69,9 +69,13 @@
* - page_reset_writable:
* Resets the page state after the last writable page table entry
* refering to the page has been removed.
+ * - page_shrink_discards:
+ * Frees all pages that free_hot_cold_page has put on the list of
+ * discarded pages.
*/
extern void page_unmap_all(struct page *page);
extern void page_discard(struct page *page);
+extern unsigned long page_shrink_discards(void);
static inline int page_make_stable(struct page *page)
{
diff -urpN linux-2.6/mm/page_alloc.c linux-2.6-patched/mm/page_alloc.c
--- linux-2.6/mm/page_alloc.c 2006-09-01 12:50:25.000000000 +0200
+++ linux-2.6-patched/mm/page_alloc.c 2006-09-01 12:50:25.000000000 +0200
@@ -786,6 +786,42 @@ void drain_local_pages(void)
}
#endif /* CONFIG_PM */
+#if defined(CONFIG_PAGE_DISCARD_LIST)
+DEFINE_PER_CPU(struct list_head, page_discard_list);
+
+static void __page_shrink_discards(void *info)
+{
+ static DEFINE_SPINLOCK(splice_lock);
+ struct list_head *discard_list = info;
+ struct list_head *cpu_list = &__get_cpu_var(page_discard_list);
+
+ if (list_empty(cpu_list))
+ return;
+ spin_lock(&splice_lock);
+ list_splice_init(cpu_list, discard_list);
+ spin_unlock(&splice_lock);
+}
+
+unsigned long page_shrink_discards(void)
+{
+ struct list_head pages_to_free = LIST_HEAD_INIT(pages_to_free);
+ struct page *page, *next;
+ unsigned long freed = 0;
+
+ if (!page_host_discards())
+ return 0;
+
+ on_each_cpu(__page_shrink_discards, &pages_to_free, 0, 1);
+
+ list_for_each_entry_safe(page, next, &pages_to_free, lru) {
+ ClearPageDiscarded(page);
+ free_cold_page(page);
+ freed++;
+ }
+ return freed;
+}
+#endif
+
/*
* Free a 0-order page
*/
@@ -795,6 +831,16 @@ static void fastcall free_hot_cold_page(
struct per_cpu_pages *pcp;
unsigned long flags;
+#if defined(CONFIG_PAGE_DISCARD_LIST)
+ if (page_host_discards() && unlikely(PageDiscarded(page))) {
+ local_irq_disable();
+ list_add_tail(&page->lru,
+ &__get_cpu_var(page_discard_list));
+ local_irq_enable();
+ return;
+ }
+#endif
+
arch_free_page(page, 0);
if (PageAnon(page))
@@ -2810,6 +2856,10 @@ static int page_alloc_cpu_notify(struct
local_irq_disable();
__drain_pages(cpu);
vm_events_fold_cpu(cpu);
+#if defined(CONFIG_PAGE_DISCARD_LIST)
+ list_splice_init(&per_cpu(page_discard_list, cpu),
+ &__get_cpu_var(page_discard_list));
+#endif
local_irq_enable();
refresh_cpu_vm_stats(cpu);
}
@@ -2819,6 +2869,12 @@ static int page_alloc_cpu_notify(struct
void __init page_alloc_init(void)
{
+#if defined(CONFIG_PAGE_DISCARD_LIST)
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(page_discard_list, i));
+#endif
hotcpu_notifier(page_alloc_cpu_notify, 0);
}
diff -urpN linux-2.6/mm/vmscan.c linux-2.6-patched/mm/vmscan.c
--- linux-2.6/mm/vmscan.c 2006-09-01 12:50:23.000000000 +0200
+++ linux-2.6-patched/mm/vmscan.c 2006-09-01 12:50:25.000000000 +0200
@@ -1034,6 +1034,9 @@ unsigned long try_to_free_pages(struct z
sc.nr_scanned = 0;
if (!priority)
disable_swap_token();
+#ifdef CONFIG_PAGE_DISCARD_LIST
+ nr_reclaimed += page_shrink_discards();
+#endif
nr_reclaimed += shrink_zones(priority, zones, &sc);
shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
if (reclaim_state) {
On Fri, 2006-09-01 at 13:11 +0200, Martin Schwidefsky wrote:
>
> +#if defined(CONFIG_PAGE_DISCARD_LIST)
> + if (page_host_discards() && unlikely(PageDiscarded(page))) {
> + local_irq_disable();
> + list_add_tail(&page->lru,
> + &__get_cpu_var(page_discard_list));
> + local_irq_enable();
> + return;
> + }
> +#endif
If PageDiscarded() was #ifdef'd in the header, you wouldn't need this in
the .c file.
-- Dave
On Fri, 2006-09-01 at 08:17 -0700, Dave Hansen wrote:
> > +#if defined(CONFIG_PAGE_DISCARD_LIST)
> > + if (page_host_discards() && unlikely(PageDiscarded(page))) {
> > + local_irq_disable();
> > + list_add_tail(&page->lru,
> > + &__get_cpu_var(page_discard_list));
> > + local_irq_enable();
> > + return;
> > + }
> > +#endif
>
> If PageDiscarded() was #ifdef'd in the header, you wouldn't need this in
> the .c file.
No, unfortunately not. There is a new variable page_discard_list that is
only defined if CONFG_PAGE_DISCARD_LIST is set. The compiler will
complain about the absence of the variable, even if the code is never
reached because PageDiscarded always returns 0.
--
blue skies,
Martin.
Martin Schwidefsky
Linux for zSeries Development & Services
IBM Deutschland Entwicklung GmbH
"Reality continues to ruin my life." - Calvin.
On Fri, 2006-09-01 at 17:40 +0200, Martin Schwidefsky wrote:
> No, unfortunately not. There is a new variable page_discard_list that is
> only defined if CONFG_PAGE_DISCARD_LIST is set. The compiler will
> complain about the absence of the variable, even if the code is never
> reached because PageDiscarded always returns 0.
Ahh. I see that now. How about a nice inlined helper function
instead? ;)
-- Dave