2005-02-25 00:46:24

by Darren Hart

[permalink] [raw]
Subject: [PATCH] vm: mlock superfluous variable

diff -purN -X /home/dvhart/.diff.exclude /home/linux/views/linux-2.6.11-rc5/mm/mlock.c 2.6.11-rc5-mlock/mm/mlock.c
--- /home/linux/views/linux-2.6.11-rc5/mm/mlock.c 2004-12-24 15:26:12.000000000 -0800
+++ 2.6.11-rc5-mlock/mm/mlock.c 2005-02-24 13:57:38.000000000 -0800
@@ -58,8 +58,8 @@ out:

static int do_mlock(unsigned long start, size_t len, int on)
{
- unsigned long nstart, end, tmp;
- struct vm_area_struct * vma, * next;
+ unsigned long nstart, end;
+ struct vm_area_struct * vma;
int error;

len = PAGE_ALIGN(len);
@@ -86,13 +86,11 @@ static int do_mlock(unsigned long start,
break;
}

- tmp = vma->vm_end;
- next = vma->vm_next;
- error = mlock_fixup(vma, nstart, tmp, newflags);
+ error = mlock_fixup(vma, nstart, vma->vm_end, newflags);
if (error)
break;
- nstart = tmp;
- vma = next;
+ nstart = vma->vm_end;
+ vma = vma->vm_next;
if (!vma || vma->vm_start != nstart) {
error = -ENOMEM;
break;


Attachments:
mlock (948.00 B)

2005-02-25 17:11:44

by Chris Wright

[permalink] [raw]
Subject: Re: [PATCH] vm: mlock superfluous variable

* Darren Hart ([email protected]) wrote:
> The were a couple long standing (since at least 2.4.21) superfluous
> variables and two unnecessary assignments in do_mlock(). The intent of
> the resulting code is also more obvious.
>
> Tested on a 4 way x86 box running a simple mlock test program. No
> problems detected.

Did you test with multiple page ranges, and locking subsets of vmas?
Seems that splitting could cause a problem since you now sample vm_end
before and after fixup, where the vma could be changed in the middle.

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2005-02-25 22:06:26

by Chris Wright

[permalink] [raw]
Subject: [PATCH] allow vma merging with mlock et. al.

* Chris Wright ([email protected]) wrote:
> * Darren Hart ([email protected]) wrote:
> > The were a couple long standing (since at least 2.4.21) superfluous
> > variables and two unnecessary assignments in do_mlock(). The intent of
> > the resulting code is also more obvious.
> >
> > Tested on a 4 way x86 box running a simple mlock test program. No
> > problems detected.
>
> Did you test with multiple page ranges, and locking subsets of vmas?
> Seems that splitting could cause a problem since you now sample vm_end
> before and after fixup, where the vma could be changed in the middle.

Actually I think it winds up being fine since we don't do merging with
mlock. But why not? Patch below remedies that.

thanks,
-chris
--

Successive mlock/munlock calls can leave fragmented vmas because they can
be split but not merged. Give mlock et. al. full vma merging support.

Signed-off-by: Chris Wright <[email protected]>

===== mm/mlock.c 1.19 vs edited =====
--- 1.19/mm/mlock.c 2005-02-11 11:07:35 -08:00
+++ edited/mm/mlock.c 2005-02-24 23:53:10 -08:00
@@ -7,18 +7,32 @@

#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/mempolicy.h>
#include <linux/syscalls.h>


-static int mlock_fixup(struct vm_area_struct * vma,
+static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, unsigned int newflags)
{
struct mm_struct * mm = vma->vm_mm;
+ pgoff_t pgoff;
int pages;
int ret = 0;

- if (newflags == vma->vm_flags)
+ if (newflags == vma->vm_flags) {
+ *prev = vma;
goto out;
+ }
+
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma));
+ if (*prev) {
+ vma = *prev;
+ goto success;
+ }
+
+ *prev = vma;

if (start != vma->vm_start) {
ret = split_vma(mm, vma, start, 1);
@@ -32,6 +46,7 @@ static int mlock_fixup(struct vm_area_st
goto out;
}

+success:
/*
* vm_flags is protected by the mmap_sem held in write mode.
* It's okay if try_to_unmap_one unmaps a page just after we
@@ -59,7 +74,7 @@ out:
static int do_mlock(unsigned long start, size_t len, int on)
{
unsigned long nstart, end, tmp;
- struct vm_area_struct * vma, * next;
+ struct vm_area_struct * vma, * prev;
int error;

len = PAGE_ALIGN(len);
@@ -68,7 +83,7 @@ static int do_mlock(unsigned long start,
return -EINVAL;
if (end == start)
return 0;
- vma = find_vma(current->mm, start);
+ vma = find_vma_prev(current->mm, start, &prev);
if (!vma || vma->vm_start > start)
return -ENOMEM;

@@ -81,18 +96,19 @@ static int do_mlock(unsigned long start,
if (!on)
newflags &= ~VM_LOCKED;

- if (vma->vm_end >= end) {
- error = mlock_fixup(vma, nstart, end, newflags);
- break;
- }
-
tmp = vma->vm_end;
- next = vma->vm_next;
- error = mlock_fixup(vma, nstart, tmp, newflags);
+ if (tmp > end)
+ tmp = end;
+ error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
if (error)
break;
nstart = tmp;
- vma = next;
+ if (nstart < prev->vm_end)
+ nstart = prev->vm_end;
+ if (nstart >= end)
+ break;
+
+ vma = prev->vm_next;
if (!vma || vma->vm_start != nstart) {
error = -ENOMEM;
break;
@@ -141,7 +157,7 @@ asmlinkage long sys_munlock(unsigned lon

static int do_mlockall(int flags)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev;
unsigned int def_flags = 0;

if (flags & MCL_FUTURE)
@@ -150,7 +166,7 @@ static int do_mlockall(int flags)
if (flags == MCL_FUTURE)
goto out;

- for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
+ for (prev = vma = current->mm->mmap; vma ; vma = vma->vm_next) {
unsigned int newflags;

newflags = vma->vm_flags | VM_LOCKED;
@@ -158,7 +174,8 @@ static int do_mlockall(int flags)
newflags &= ~VM_LOCKED;

/* Ignore errors */
- mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
+ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
+ vma = prev;
}
out:
return 0;

2005-02-25 22:19:21

by Darren Hart

[permalink] [raw]
Subject: Re: [PATCH] vm: mlock superfluous variable

Chris Wright wrote:
> * Darren Hart ([email protected]) wrote:
>
>>The were a couple long standing (since at least 2.4.21) superfluous
>>variables and two unnecessary assignments in do_mlock(). The intent of
>>the resulting code is also more obvious.
>>
>>Tested on a 4 way x86 box running a simple mlock test program. No
>>problems detected.
>
>
> Did you test with multiple page ranges, and locking subsets of vmas?
> Seems that splitting could cause a problem since you now sample vm_end
> before and after fixup, where the vma could be changed in the middle.

Thanks for catching that Chris. Both the tmp variable and the next
variable are indeed needed since mlock_fixup could modify both. Please
disregard this patch.

--Darren

2005-02-25 22:26:42

by Darren Hart

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

Chris Wright wrote:
> * Chris Wright ([email protected]) wrote:
>
>>* Darren Hart ([email protected]) wrote:
>>
>>>The were a couple long standing (since at least 2.4.21) superfluous
>>>variables and two unnecessary assignments in do_mlock(). The intent of
>>>the resulting code is also more obvious.
>>>
>>>Tested on a 4 way x86 box running a simple mlock test program. No
>>>problems detected.
>>
>>Did you test with multiple page ranges, and locking subsets of vmas?
>>Seems that splitting could cause a problem since you now sample vm_end
>>before and after fixup, where the vma could be changed in the middle.
>
>
> Actually I think it winds up being fine since we don't do merging with
> mlock. But why not? Patch below remedies that.

We don't merge, but we do split if necessary, so the temp variables are
still needed. As I understand it, the reason we don't merge is because
it is expected that a task will lock and unlock the same memory range
more than once and we don't want to waste our time merging and splitting
the VMAs.

Thanks,

--Darren

2005-02-25 23:39:58

by Chris Wright

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

* Darren Hart ([email protected]) wrote:
> As I understand it, the reason we don't merge is because
> it is expected that a task will lock and unlock the same memory range
> more than once and we don't want to waste our time merging and splitting
> the VMAs.

I don't have a good sampling of applications. The one's I've used are
temporal like gpg, or they mlockall the whole thing and never look back.
But I did a quick benchmark since I was curious, a simple loop of a
million lock/unlock cycles of a page that could trigger a merge:

vanilla
(no merge): 659706 usecs

patched
(merge): 3567020 usecs

Heh, I was surprised to see it that much slower.

cheers,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2005-02-26 00:56:59

by Andrea Arcangeli

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

On Fri, Feb 25, 2005 at 03:38:06PM -0800, Chris Wright wrote:
> I don't have a good sampling of applications. The one's I've used are
> temporal like gpg, or they mlockall the whole thing and never look back.
> But I did a quick benchmark since I was curious, a simple loop of a
> million lock/unlock cycles of a page that could trigger a merge:
>
> vanilla
> (no merge): 659706 usecs
>
> patched
> (merge): 3567020 usecs
>
> Heh, I was surprised to see it that much slower.

The object of the merge is to save memory, and to reduce the size of the
rbtree that might payoff during other operations (with a smaller tree,
lookups will be faster too). If you only measure the time of creating
and removing a mapping then it should be normal that you see a slowdown
since merging involves more work than non-merging. The payoff is
supposed to be in the other operations.

The reason mlock doesn't merge is that nobody asked for it yet, but it
was originally supposed to merge too (I stopped at mremap since mlock
wasn't high prio to fixup). But the long term plan was to eventually add
merging to mlock too and it's good that you're optimizing it now.

2005-02-26 01:17:20

by Chris Wright

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

* Andrea Arcangeli ([email protected]) wrote:
> The object of the merge is to save memory, and to reduce the size of the
> rbtree that might payoff during other operations (with a smaller tree,
> lookups will be faster too). If you only measure the time of creating
> and removing a mapping then it should be normal that you see a slowdown
> since merging involves more work than non-merging. The payoff is
> supposed to be in the other operations.

I agree, that test is pathological worst case.

> The reason mlock doesn't merge is that nobody asked for it yet, but it
> was originally supposed to merge too (I stopped at mremap since mlock
> wasn't high prio to fixup). But the long term plan was to eventually add
> merging to mlock too and it's good that you're optimizing it now.

Do you support merging this patch? Or did you mean further optimizations?

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2005-02-26 17:22:00

by Hugh Dickins

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

On Fri, 25 Feb 2005, Chris Wright wrote:
>
> Actually I think it winds up being fine since we don't do merging with
> mlock. But why not? Patch below remedies that.

I shared Darren's assumption, that mlock merging had been found too
expensive. But Andrea says it's just that nobody asked for it, so
now you've done the work, let's give it a try in -mm. We can always
back it out if somebody perceives a regression.

Do madvise and mempolicy too? I've no strong feelings about them.

> Successive mlock/munlock calls can leave fragmented vmas because they can
> be split but not merged. Give mlock et. al. full vma merging support.

Phew, you followed mprotect, saving me from having to think too deeply
about the correctness of this (I'm assuming mprotect is perfect ;))
Some remarks then on the three places where you departed from mprotect.

> ===== mm/mlock.c 1.19 vs edited =====
> --- 1.19/mm/mlock.c 2005-02-11 11:07:35 -08:00
> +++ edited/mm/mlock.c 2005-02-24 23:53:10 -08:00
> @@ -7,18 +7,32 @@
>
> #include <linux/mman.h>
> #include <linux/mm.h>
> +#include <linux/mempolicy.h>
> #include <linux/syscalls.h>
>
>
> -static int mlock_fixup(struct vm_area_struct * vma,
> +static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
> unsigned long start, unsigned long end, unsigned int newflags)
> {
> struct mm_struct * mm = vma->vm_mm;
> + pgoff_t pgoff;
> int pages;
> int ret = 0;
>
> - if (newflags == vma->vm_flags)
> + if (newflags == vma->vm_flags) {
> + *prev = vma;
> goto out;
> + }
> +
> + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
> + *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
> + vma->vm_file, pgoff, vma_policy(vma));
> + if (*prev) {
> + vma = *prev;
> + goto success;
> + }
> +
> + *prev = vma;

You've raised that line higher (so do_mlockall's "Ignore errors" works):
that's an improvement because it saves special casing errors, I'd like
you to make the same adjustment to mprotect_fixup, even though it's not
required there (and delete "Unless it returns an error, " from comment).
Let's keep the two in step.

> if (start != vma->vm_start) {
> ret = split_vma(mm, vma, start, 1);
> @@ -32,6 +46,7 @@ static int mlock_fixup(struct vm_area_st
> goto out;
> }
>
> +success:
> /*
> * vm_flags is protected by the mmap_sem held in write mode.
> * It's okay if try_to_unmap_one unmaps a page just after we
> @@ -59,7 +74,7 @@ out:
> static int do_mlock(unsigned long start, size_t len, int on)
> {
> unsigned long nstart, end, tmp;
> - struct vm_area_struct * vma, * next;
> + struct vm_area_struct * vma, * prev;
> int error;
>
> len = PAGE_ALIGN(len);
> @@ -68,7 +83,7 @@ static int do_mlock(unsigned long start,
> return -EINVAL;
> if (end == start)
> return 0;
> - vma = find_vma(current->mm, start);
> + vma = find_vma_prev(current->mm, start, &prev);
> if (!vma || vma->vm_start > start)
> return -ENOMEM;

But here sys_mprotect also says:

if (start > vma->vm_start)
prev = vma;

Perhaps you've worked your way through vma_merge and convinced yourself
this is never necessary, that's quite possible; but I'd still be happier
if you were to add it into your do_mlock: it limits the cases vma_merge
has to worry about. Or if you feel strongly about it, explain why I'm
just being silly, and delete it from mprotect too.

> @@ -81,18 +96,19 @@ static int do_mlock(unsigned long start,
> if (!on)
> newflags &= ~VM_LOCKED;
>
> - if (vma->vm_end >= end) {
> - error = mlock_fixup(vma, nstart, end, newflags);
> - break;
> - }
> -
> tmp = vma->vm_end;
> - next = vma->vm_next;
> - error = mlock_fixup(vma, nstart, tmp, newflags);
> + if (tmp > end)
> + tmp = end;
> + error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
> if (error)
> break;
> nstart = tmp;
> - vma = next;
> + if (nstart < prev->vm_end)
> + nstart = prev->vm_end;
> + if (nstart >= end)
> + break;
> +
> + vma = prev->vm_next;
> if (!vma || vma->vm_start != nstart) {
> error = -ENOMEM;
> break;
> @@ -141,7 +157,7 @@ asmlinkage long sys_munlock(unsigned lon
>
> static int do_mlockall(int flags)
> {
> - struct vm_area_struct * vma;
> + struct vm_area_struct * vma, * prev;
> unsigned int def_flags = 0;
>
> if (flags & MCL_FUTURE)
> @@ -150,7 +166,7 @@ static int do_mlockall(int flags)
> if (flags == MCL_FUTURE)
> goto out;
>
> - for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
> + for (prev = vma = current->mm->mmap; vma ; vma = vma->vm_next) {

Here prev should be initialized to NULL, rather than the first vma.
Again, you've probably worked out that it's safe as you've written it,
but vma_merge does expect prev NULL at the beginning.

> unsigned int newflags;
>
> newflags = vma->vm_flags | VM_LOCKED;
> @@ -158,7 +174,8 @@ static int do_mlockall(int flags)
> newflags &= ~VM_LOCKED;
>
> /* Ignore errors */
> - mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
> + mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
> + vma = prev;

Scrap that "vma = prev;" line, just say "vma = prev->vm_next" in the loop?

> }
> out:
> return 0;

Hugh

2005-02-28 20:34:08

by Chris Wright

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

* Hugh Dickins ([email protected]) wrote:
> On Fri, 25 Feb 2005, Chris Wright wrote:
> >
> > Actually I think it winds up being fine since we don't do merging with
> > mlock. But why not? Patch below remedies that.
>
> > Successive mlock/munlock calls can leave fragmented vmas because they can
> > be split but not merged. Give mlock et. al. full vma merging support.
>
> Phew, you followed mprotect, saving me from having to think too deeply
> about the correctness of this (I'm assuming mprotect is perfect ;))

Heh, same assumption here ;-)

> Some remarks then on the three places where you departed from mprotect.
>
> > ===== mm/mlock.c 1.19 vs edited =====
> > --- 1.19/mm/mlock.c 2005-02-11 11:07:35 -08:00
> > +++ edited/mm/mlock.c 2005-02-24 23:53:10 -08:00
> > @@ -7,18 +7,32 @@
> >
> > #include <linux/mman.h>
> > #include <linux/mm.h>
> > +#include <linux/mempolicy.h>
> > #include <linux/syscalls.h>
> >
> >
> > -static int mlock_fixup(struct vm_area_struct * vma,
> > +static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
> > unsigned long start, unsigned long end, unsigned int newflags)
> > {
> > struct mm_struct * mm = vma->vm_mm;
> > + pgoff_t pgoff;
> > int pages;
> > int ret = 0;
> >
> > - if (newflags == vma->vm_flags)
> > + if (newflags == vma->vm_flags) {
> > + *prev = vma;
> > goto out;
> > + }
> > +
> > + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
> > + *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
> > + vma->vm_file, pgoff, vma_policy(vma));
> > + if (*prev) {
> > + vma = *prev;
> > + goto success;
> > + }
> > +
> > + *prev = vma;
>
> You've raised that line higher (so do_mlockall's "Ignore errors" works):
> that's an improvement because it saves special casing errors, I'd like
> you to make the same adjustment to mprotect_fixup, even though it's not
> required there (and delete "Unless it returns an error, " from comment).
> Let's keep the two in step.

Sure, makes sense.

> > if (start != vma->vm_start) {
> > ret = split_vma(mm, vma, start, 1);
> > @@ -32,6 +46,7 @@ static int mlock_fixup(struct vm_area_st
> > goto out;
> > }
> >
> > +success:
> > /*
> > * vm_flags is protected by the mmap_sem held in write mode.
> > * It's okay if try_to_unmap_one unmaps a page just after we
> > @@ -59,7 +74,7 @@ out:
> > static int do_mlock(unsigned long start, size_t len, int on)
> > {
> > unsigned long nstart, end, tmp;
> > - struct vm_area_struct * vma, * next;
> > + struct vm_area_struct * vma, * prev;
> > int error;
> >
> > len = PAGE_ALIGN(len);
> > @@ -68,7 +83,7 @@ static int do_mlock(unsigned long start,
> > return -EINVAL;
> > if (end == start)
> > return 0;
> > - vma = find_vma(current->mm, start);
> > + vma = find_vma_prev(current->mm, start, &prev);
> > if (!vma || vma->vm_start > start)
> > return -ENOMEM;
>
> But here sys_mprotect also says:
>
> if (start > vma->vm_start)
> prev = vma;
>
> Perhaps you've worked your way through vma_merge and convinced yourself
> this is never necessary, that's quite possible; but I'd still be happier
> if you were to add it into your do_mlock: it limits the cases vma_merge
> has to worry about. Or if you feel strongly about it, explain why I'm
> just being silly, and delete it from mprotect too.

No, I think you're right, and it measurably improves (~5%) the worst
case benchmark I was doing, thanks.

> > @@ -81,18 +96,19 @@ static int do_mlock(unsigned long start,
> > if (!on)
> > newflags &= ~VM_LOCKED;
> >
> > - if (vma->vm_end >= end) {
> > - error = mlock_fixup(vma, nstart, end, newflags);
> > - break;
> > - }
> > -
> > tmp = vma->vm_end;
> > - next = vma->vm_next;
> > - error = mlock_fixup(vma, nstart, tmp, newflags);
> > + if (tmp > end)
> > + tmp = end;
> > + error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
> > if (error)
> > break;
> > nstart = tmp;
> > - vma = next;
> > + if (nstart < prev->vm_end)
> > + nstart = prev->vm_end;
> > + if (nstart >= end)
> > + break;
> > +
> > + vma = prev->vm_next;
> > if (!vma || vma->vm_start != nstart) {
> > error = -ENOMEM;
> > break;
> > @@ -141,7 +157,7 @@ asmlinkage long sys_munlock(unsigned lon
> >
> > static int do_mlockall(int flags)
> > {
> > - struct vm_area_struct * vma;
> > + struct vm_area_struct * vma, * prev;
> > unsigned int def_flags = 0;
> >
> > if (flags & MCL_FUTURE)
> > @@ -150,7 +166,7 @@ static int do_mlockall(int flags)
> > if (flags == MCL_FUTURE)
> > goto out;
> >
> > - for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
> > + for (prev = vma = current->mm->mmap; vma ; vma = vma->vm_next) {
>
> Here prev should be initialized to NULL, rather than the first vma.
> Again, you've probably worked out that it's safe as you've written it,
> but vma_merge does expect prev NULL at the beginning.

No, it was a bug.

> > unsigned int newflags;
> >
> > newflags = vma->vm_flags | VM_LOCKED;
> > @@ -158,7 +174,8 @@ static int do_mlockall(int flags)
> > newflags &= ~VM_LOCKED;
> >
> > /* Ignore errors */
> > - mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
> > + mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
> > + vma = prev;
>
> Scrap that "vma = prev;" line, just say "vma = prev->vm_next" in the loop?

Yup, thanks for reviewing. Updated patch below.

thanks,
-chris
--

Successive mlock/munlock calls can leave fragmented vmas because they can
be split but not merged. Give mlock et. al. full vma merging support.
While we're at it, move *pprev assignment above first split_vma in
mprotect_fixup to keep it in step with mlock_fixup (which for mlockall
ignores errors yet still needs a valid prev pointer).

Signed-off-by: Chris Wright <[email protected]>

===== mm/mlock.c 1.19 vs edited =====
--- 1.19/mm/mlock.c 2005-02-11 11:07:35 -08:00
+++ edited/mm/mlock.c 2005-02-28 11:08:23 -08:00
@@ -7,18 +7,32 @@

#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/mempolicy.h>
#include <linux/syscalls.h>


-static int mlock_fixup(struct vm_area_struct * vma,
+static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, unsigned int newflags)
{
struct mm_struct * mm = vma->vm_mm;
+ pgoff_t pgoff;
int pages;
int ret = 0;

- if (newflags == vma->vm_flags)
+ if (newflags == vma->vm_flags) {
+ *prev = vma;
goto out;
+ }
+
+ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+ *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
+ vma->vm_file, pgoff, vma_policy(vma));
+ if (*prev) {
+ vma = *prev;
+ goto success;
+ }
+
+ *prev = vma;

if (start != vma->vm_start) {
ret = split_vma(mm, vma, start, 1);
@@ -32,6 +46,7 @@ static int mlock_fixup(struct vm_area_st
goto out;
}

+success:
/*
* vm_flags is protected by the mmap_sem held in write mode.
* It's okay if try_to_unmap_one unmaps a page just after we
@@ -59,7 +74,7 @@ out:
static int do_mlock(unsigned long start, size_t len, int on)
{
unsigned long nstart, end, tmp;
- struct vm_area_struct * vma, * next;
+ struct vm_area_struct * vma, * prev;
int error;

len = PAGE_ALIGN(len);
@@ -68,10 +83,13 @@ static int do_mlock(unsigned long start,
return -EINVAL;
if (end == start)
return 0;
- vma = find_vma(current->mm, start);
+ vma = find_vma_prev(current->mm, start, &prev);
if (!vma || vma->vm_start > start)
return -ENOMEM;

+ if (start > vma->vm_start)
+ prev = vma;
+
for (nstart = start ; ; ) {
unsigned int newflags;

@@ -81,18 +99,19 @@ static int do_mlock(unsigned long start,
if (!on)
newflags &= ~VM_LOCKED;

- if (vma->vm_end >= end) {
- error = mlock_fixup(vma, nstart, end, newflags);
- break;
- }
-
tmp = vma->vm_end;
- next = vma->vm_next;
- error = mlock_fixup(vma, nstart, tmp, newflags);
+ if (tmp > end)
+ tmp = end;
+ error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
if (error)
break;
nstart = tmp;
- vma = next;
+ if (nstart < prev->vm_end)
+ nstart = prev->vm_end;
+ if (nstart >= end)
+ break;
+
+ vma = prev->vm_next;
if (!vma || vma->vm_start != nstart) {
error = -ENOMEM;
break;
@@ -141,7 +160,7 @@ asmlinkage long sys_munlock(unsigned lon

static int do_mlockall(int flags)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev = NULL;
unsigned int def_flags = 0;

if (flags & MCL_FUTURE)
@@ -150,7 +169,7 @@ static int do_mlockall(int flags)
if (flags == MCL_FUTURE)
goto out;

- for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
+ for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
unsigned int newflags;

newflags = vma->vm_flags | VM_LOCKED;
@@ -158,7 +177,7 @@ static int do_mlockall(int flags)
newflags &= ~VM_LOCKED;

/* Ignore errors */
- mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
+ mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
}
out:
return 0;
===== mm/mprotect.c 1.40 vs edited =====
--- 1.40/mm/mprotect.c 2004-12-22 01:31:46 -08:00
+++ edited/mm/mprotect.c 2005-02-28 11:09:39 -08:00
@@ -185,16 +185,13 @@ mprotect_fixup(struct vm_area_struct *vm
goto success;
}

+ *pprev = vma;
+
if (start != vma->vm_start) {
error = split_vma(mm, vma, start, 1);
if (error)
goto fail;
}
- /*
- * Unless it returns an error, this function always sets *pprev to
- * the first vma for which vma->vm_end >= end.
- */
- *pprev = vma;

if (end != vma->vm_end) {
error = split_vma(mm, vma, end, 0);

2005-02-28 20:55:23

by Hugh Dickins

[permalink] [raw]
Subject: Re: [PATCH] allow vma merging with mlock et. al.

On Mon, 28 Feb 2005, Chris Wright wrote:
>
> Successive mlock/munlock calls can leave fragmented vmas because they can
> be split but not merged. Give mlock et. al. full vma merging support.
> While we're at it, move *pprev assignment above first split_vma in
> mprotect_fixup to keep it in step with mlock_fixup (which for mlockall
> ignores errors yet still needs a valid prev pointer).
>
> Signed-off-by: Chris Wright <[email protected]>

Acked-by: Hugh Dickins <[email protected]>