2006-03-20 13:36:38

by Stone Wang

[permalink] [raw]
Subject: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

1. Add make_pages_unwired routine.
2. Replace make_pages_present with make_pages_wired, support rollback.
3. Pass 1 more param ("wire") to get_user_pages.

Signed-off-by: Shaoping Wang <[email protected]>


--
include/linux/mm.h | 8 ++++--
mm/memory.c | 65 +++++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 62 insertions(+), 11 deletions(-)

diff -urN linux-2.6.15.orig/include/linux/mm.h linux-2.6.15/include/linux/mm.h
--- linux-2.6.15.orig/include/linux/mm.h 2006-01-02 22:21:10.000000000 -0500
+++ linux-2.6.15/include/linux/mm.h 2006-03-07 01:49:12.000000000 -0500
@@ -59,6 +59,9 @@
unsigned long vm_start; /* Our start address within vm_mm. */
unsigned long vm_end; /* The first byte after our end address
within vm_mm. */
+ int vm_wire_change; /* VM_LOCKED bit of vm_flags was just changed.
+ * For rollback support of sys_mlock series system calls.
+ */

/* linked list of VM areas per task, sorted by address */
struct vm_area_struct *vm_next;
@@ -699,12 +706,13 @@
return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE);
}

-extern int make_pages_present(unsigned long addr, unsigned long end);
+extern int make_pages_wired(unsigned long addr, unsigned long end);
+void make_pages_unwired(struct mm_struct *mm, unsigned long addr,
unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long
addr, void *buf, int len, int write);
void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);

int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start,
- int len, int write, int force, struct page **pages, struct
vm_area_struct **vmas);
+ int len, int write, int force, int wire, struct page **pages,
struct vm_area_struct **vmas);
void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);

int __set_page_dirty_buffers(struct page *page);
diff -urN linux-2.6.15.orig/mm/memory.c linux-2.6.15/mm/memory.c
--- linux-2.6.15.orig/mm/memory.c 2006-01-02 22:21:10.000000000 -0500
+++ linux-2.6.15/mm/memory.c 2006-03-07 11:14:59.000000000 -0500
@@ -950,8 +950,30 @@
return page;
}

+void make_pages_unwired(struct mm_struct *mm,
+ unsigned long start,unsigned long end)
+{
+ struct vm_area_struct *vma;
+ struct page *page;
+ unsigned int foll_flags;
+
+ foll_flags =0;
+
+ vma=find_vma(mm,start);
+ if(!vma)
+ BUG();
+ if(is_vm_hugetlb_page(vma))
+ return;
+
+ for(; start<end ; start+=PAGE_SIZE) {
+ page=follow_page(vma,start,foll_flags);
+ if(page)
+ unwire_page(page);
+ }
+}
+
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, int len, int write, int force,
+ unsigned long start, int len, int write,int force, int wire,
struct page **pages, struct vm_area_struct **vmas)
{
int i;
@@ -973,6 +995,7 @@
if (!vma && in_gate_area(tsk, start)) {
unsigned long pg = start & PAGE_MASK;
struct vm_area_struct *gate_vma = get_gate_vma(tsk);
+ struct page *page;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -994,6 +1017,7 @@
pte_unmap(pte);
return i ? : -EFAULT;
}
+ page = vm_normal_page(gate_vma, start, *pte);
if (pages) {
struct page *page = vm_normal_page(gate_vma, start, *pte);
pages[i] = page;
@@ -1003,9 +1027,12 @@
pte_unmap(pte);
if (vmas)
vmas[i] = gate_vma;
+ if(wire)
+ wire_page(page);
i++;
start += PAGE_SIZE;
len--;
+
continue;
}

@@ -1013,6 +1040,7 @@
|| !(vm_flags & vma->vm_flags))
return i ? : -EFAULT;

+ /* We dont account wired HugeTLB pages */
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &len, i);
@@ -1067,17 +1095,20 @@
}
if (vmas)
vmas[i] = vma;
+ if(wire)
+ wire_page(page);
i++;
start += PAGE_SIZE;
len--;
} while (len && start < vma->vm_end);
} while (len);
+
return i;
}
EXPORT_SYMBOL(get_user_pages);

-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr, unsigned long end, pgprot_t prot)
+static int zeromap_pte_range(struct mm_struct *mm, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot)
{
pte_t *pte;
spinlock_t *ptl;

@@ -2306,10 +2338,13 @@
}
#endif /* __PAGETABLE_PMD_FOLDED */

-int make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_wired(unsigned long addr, unsigned long end)
{
int ret, len, write;
+ struct page *page;
struct vm_area_struct * vma;
+ struct mm_struct *mm=current->mm;
+ int wire_change;

vma = find_vma(current->mm, addr);
if (!vma)
@@ -2320,13 +2355,26 @@
if (end > vma->vm_end)
BUG();
len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
- ret = get_user_pages(current, current->mm, addr,
- len, write, 0, NULL, NULL);
- if (ret < 0)
- return ret;
- return ret == len ? 0 : -1;
+ wire_change = vma->vm_wire_change;
+ vma->vm_wire_change = 1;
+ ret = get_user_pages(current, mm, addr,
+ len, write, 1, 1, NULL, NULL); /* write,set_wire */
+ vma->vm_wire_change = wire_change;
+ if(ret < len) {
+ for(; addr< end ; addr += PAGE_SIZE) {
+ page=follow_page(vma,addr,0);
+ if(page)
+ unwire_page(page);
+ else
+ BUG();
+ }
+ return -1;
+ }
+ else
+ return 0;
}

+
/*
* Map a vmalloc()-space virtual address to the physical page.
*/


2006-03-20 13:42:18

by Arjan van de Ven

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

On Mon, 2006-03-20 at 08:36 -0500, Stone Wang wrote:
> 1. Add make_pages_unwired routine.
> 2. Replace make_pages_present with make_pages_wired, support rollback.
> 3. Pass 1 more param ("wire") to get_user_pages.

hmm again "wire" is a meaningless name
also.. get_user_pages ALWAYS pins the page ... so might as well make
that automatic (with an unpin when the pinning is released)



2006-03-21 13:06:48

by Nick Piggin

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

Stone Wang wrote:
> 1. Add make_pages_unwired routine.

Unfortunately you forgot wire_page and unwire_page, so this patch will
not even compile.

> 2. Replace make_pages_present with make_pages_wired, support rollback.

What does support rollback mean?

> 3. Pass 1 more param ("wire") to get_user_pages.
>

As others have pointed out, wire may be a BSD / other unix thing, but
it does not feature in Linux memory management terminology. If you
want to introduce it, you need to do a better job of specifying it.

> Signed-off-by: Shaoping Wang <[email protected]>
>

> +void make_pages_unwired(struct mm_struct *mm,
> + unsigned long start,unsigned long end)
> +{
> + struct vm_area_struct *vma;
> + struct page *page;
> + unsigned int foll_flags;
> +
> + foll_flags =0;
> +
> + vma=find_vma(mm,start);
> + if(!vma)
> + BUG();
> + if(is_vm_hugetlb_page(vma))
> + return;
> +
> + for(; start<end ; start+=PAGE_SIZE) {
> + page=follow_page(vma,start,foll_flags);
> + if(page)
> + unwire_page(page);
> + }
> +}
> +

What happens when start goes past vma->vm_end?

> int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
> - unsigned long start, int len, int write, int force,
> + unsigned long start, int len, int write,int force, int wire,
> struct page **pages, struct vm_area_struct **vmas)
> {
> int i;
> @@ -973,6 +995,7 @@
> if (!vma && in_gate_area(tsk, start)) {
> unsigned long pg = start & PAGE_MASK;
> struct vm_area_struct *gate_vma = get_gate_vma(tsk);
> + struct page *page;
> pgd_t *pgd;
> pud_t *pud;
> pmd_t *pmd;
> @@ -994,6 +1017,7 @@
> pte_unmap(pte);
> return i ? : -EFAULT;
> }
> + page = vm_normal_page(gate_vma, start, *pte);

You wire gate_vma pages? But it doesn't look like you can unwire them with
make_pages_unwired.

> if (pages) {
> struct page *page = vm_normal_page(gate_vma, start, *pte);

This can go now?

> pages[i] = page;
> @@ -1003,9 +1027,12 @@
> pte_unmap(pte);
> if (vmas)
> vmas[i] = gate_vma;
> + if(wire)
> + wire_page(page);
> i++;
> start += PAGE_SIZE;
> len--;
> +
> continue;
> }
>
> @@ -1013,6 +1040,7 @@
> || !(vm_flags & vma->vm_flags))
> return i ? : -EFAULT;
>
> + /* We dont account wired HugeTLB pages */

You don't account wired HugeTLB pages? If you can wire them you should be able
to unwire them as well shouldn't you?

--
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com

2006-03-21 16:03:08

by Stone Wang

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

We dont account HugeTLB pages for:

1. HugeTLB pages themselves are not reclaimable.

2. If we count HugeTLB pages in "Wired",then we would have no mind
how many of the "Wired" are HugeTLB pages, and how many are
normal-size pages.
Thus, hard to get a clear map of physical memory use,for example:
how many pages are reclaimable?
If we must count HugeTLB pages,more fields should be added to
"/proc/meminfo",
for exmaple: "Wired HugeTLB:", "Wired Normal:".

Shaoping Wang

2006/3/21, Nick Piggin <[email protected]>:
> Stone Wang wrote:
> > 1. Add make_pages_unwired routine.
>
> Unfortunately you forgot wire_page and unwire_page, so this patch will
> not even compile.
>
> > 2. Replace make_pages_present with make_pages_wired, support rollback.
>
> What does support rollback mean?
>
> > 3. Pass 1 more param ("wire") to get_user_pages.
> >
>
> As others have pointed out, wire may be a BSD / other unix thing, but
> it does not feature in Linux memory management terminology. If you
> want to introduce it, you need to do a better job of specifying it.
>
> > Signed-off-by: Shaoping Wang <[email protected]>
> >
>
> > +void make_pages_unwired(struct mm_struct *mm,
> > + unsigned long start,unsigned long end)
> > +{
> > + struct vm_area_struct *vma;
> > + struct page *page;
> > + unsigned int foll_flags;
> > +
> > + foll_flags =0;
> > +
> > + vma=find_vma(mm,start);
> > + if(!vma)
> > + BUG();
> > + if(is_vm_hugetlb_page(vma))
> > + return;
> > +
> > + for(; start<end ; start+=PAGE_SIZE) {
> > + page=follow_page(vma,start,foll_flags);
> > + if(page)
> > + unwire_page(page);
> > + }
> > +}
> > +
>
> What happens when start goes past vma->vm_end?
>
> > int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
> > - unsigned long start, int len, int write, int force,
> > + unsigned long start, int len, int write,int force, int wire,
> > struct page **pages, struct vm_area_struct **vmas)
> > {
> > int i;
> > @@ -973,6 +995,7 @@
> > if (!vma && in_gate_area(tsk, start)) {
> > unsigned long pg = start & PAGE_MASK;
> > struct vm_area_struct *gate_vma = get_gate_vma(tsk);
> > + struct page *page;
> > pgd_t *pgd;
> > pud_t *pud;
> > pmd_t *pmd;
> > @@ -994,6 +1017,7 @@
> > pte_unmap(pte);
> > return i ? : -EFAULT;
> > }
> > + page = vm_normal_page(gate_vma, start, *pte);
>
> You wire gate_vma pages? But it doesn't look like you can unwire them with
> make_pages_unwired.
>
> > if (pages) {
> > struct page *page = vm_normal_page(gate_vma, start, *pte);
>
> This can go now?
>
> > pages[i] = page;
> > @@ -1003,9 +1027,12 @@
> > pte_unmap(pte);
> > if (vmas)
> > vmas[i] = gate_vma;
> > + if(wire)
> > + wire_page(page);
> > i++;
> > start += PAGE_SIZE;
> > len--;
> > +
> > continue;
> > }
> >
> > @@ -1013,6 +1040,7 @@
> > || !(vm_flags & vma->vm_flags))
> > return i ? : -EFAULT;
> >
> > + /* We dont account wired HugeTLB pages */
>
> You don't account wired HugeTLB pages? If you can wire them you should be able
> to unwire them as well shouldn't you?
>
> --
> SUSE Labs, Novell Inc.
>
> Send instant messages to your online friends http://au.messenger.yahoo.com
>
>

2006-03-22 00:28:27

by Nick Piggin

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

Stone Wang wrote:
> We dont account HugeTLB pages for:
>
> 1. HugeTLB pages themselves are not reclaimable.
>
> 2. If we count HugeTLB pages in "Wired",then we would have no mind
> how many of the "Wired" are HugeTLB pages, and how many are
> normal-size pages.
> Thus, hard to get a clear map of physical memory use,for example:
> how many pages are reclaimable?
> If we must count HugeTLB pages,more fields should be added to
> "/proc/meminfo",
> for exmaple: "Wired HugeTLB:", "Wired Normal:".
>

Then why do you wire them at all? Your unwire function does not appear
to be able to unwire them.

--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com

2006-03-22 08:59:36

by Stone Wang

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

2006/3/21, Nick Piggin <[email protected]>:
> Stone Wang wrote:
> > We dont account HugeTLB pages for:
> >
> > 1. HugeTLB pages themselves are not reclaimable.
> >
> > 2. If we count HugeTLB pages in "Wired",then we would have no mind
> > how many of the "Wired" are HugeTLB pages, and how many are
> > normal-size pages.
> > Thus, hard to get a clear map of physical memory use,for example:
> > how many pages are reclaimable?
> > If we must count HugeTLB pages,more fields should be added to
> > "/proc/meminfo",
> > for exmaple: "Wired HugeTLB:", "Wired Normal:".
> >
>
> Then why do you wire them at all? Your unwire function does not appear
> to be able to unwire them.

We didnt wire them.

Check get_user_pages():

/* We dont account wired HugeTLB pages */
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &len, i);
continue;
}


Shaoping Wang

>
> --
> SUSE Labs, Novell Inc.
> Send instant messages to your online friends http://au.messenger.yahoo.com
>
>

2006-03-22 11:11:48

by Nick Piggin

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

Stone Wang wrote:
> 2006/3/21, Nick Piggin <[email protected]>:

>
> We didnt wire them.
>

But your comment said they were wired.

--
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com

2006-03-22 11:59:49

by Stone Wang

[permalink] [raw]
Subject: Re: PATCH][1/8] 2.6.15 mlock: make_pages_wired/unwired

Right, it made confusions.

I will correct it.

2006/3/22, Nick Piggin <[email protected]>:
> Stone Wang wrote:
> > 2006/3/21, Nick Piggin <[email protected]>:
>
> >
> > We didnt wire them.
> >
>
> But your comment said they were wired.
>
> --
> SUSE Labs, Novell Inc.
> Send instant messages to your online friends http://au.messenger.yahoo.com
>
>