Hello,
write_opcode() cleanups resend + new minor fix.
Changes:
- document the new argument in 2/5.
- drop the buggy 5/5, thanks Anton for your quick nack.
Probably I'll return to this later, I have another reason
for this change.
- so this 5/5 is new.
Srikar, please add your acks unless you have some objections.
Oleg.
write_opcode() rechecks valid_vma() and ->f_mapping, this is pointless.
The caller, register_for_each_vma() or uprobe_mmap(), has already done
these checks under mmap_sem.
To clarify, uprobe_mmap() checks valid_vma() only, but we can rely on
build_probe_list(vm_file->f_mapping->host).
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 19 +------------------
1 files changed, 1 insertions(+), 18 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index f935327..a2b32a5 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -206,33 +206,16 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
unsigned long vaddr, uprobe_opcode_t opcode)
{
struct page *old_page, *new_page;
- struct address_space *mapping;
void *vaddr_old, *vaddr_new;
struct vm_area_struct *vma;
- struct uprobe *uprobe;
int ret;
+
retry:
/* Read the page with vaddr into memory */
ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
if (ret <= 0)
return ret;
- ret = -EINVAL;
-
- /*
- * We are interested in text pages only. Our pages of interest
- * should be mapped for read and execute only. We desist from
- * adding probes in write mapped pages since the breakpoints
- * might end up in the file copy.
- */
- if (!valid_vma(vma, is_swbp_insn(&opcode)))
- goto put_out;
-
- uprobe = container_of(auprobe, struct uprobe, arch);
- mapping = uprobe->inode->i_mapping;
- if (mapping != vma->vm_file->f_mapping)
- goto put_out;
-
ret = -ENOMEM;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
if (!new_page)
--
1.5.5.1
page_address_in_vma(old_page) in __replace_page() is ugly and wrong.
The caller already knows the correct virtual address, this page was
found by get_user_pages(vaddr).
However, page_address_in_vma() can actually fail if page->mapping was
cleared by __delete_from_page_cache() after get_user_pages() returns.
But this means the race with page reclaim, write_opcode() should not
fail, it should retry and read this page again. Not sure this race is
really possible though, page_freeze_refs() logic should prevent it.
We could change __replace_page() to return -EAGAIN in this case, but
it would be better to simply use the caller's vaddr and rely on
page_check_address().
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 11 ++++-------
1 files changed, 4 insertions(+), 7 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a2b32a5..6fda799 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -127,22 +127,19 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
* based on replace_page in mm/ksm.c
*
* @vma: vma that holds the pte pointing to page
+ * @addr: address the old @page is mapped at
* @page: the cowed page we are replacing by kpage
* @kpage: the modified page we replace page by
*
* Returns 0 on success, -EFAULT on failure.
*/
-static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
+static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
+ struct page *page, struct page *kpage)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long addr;
spinlock_t *ptl;
pte_t *ptep;
- addr = page_address_in_vma(page, vma);
- if (addr == -EFAULT)
- return -EFAULT;
-
ptep = page_check_address(page, mm, addr, &ptl, 0);
if (!ptep)
return -EAGAIN;
@@ -243,7 +240,7 @@ retry:
goto unlock_out;
lock_page(new_page);
- ret = __replace_page(vma, old_page, new_page);
+ ret = __replace_page(vma, vaddr, old_page, new_page);
unlock_page(new_page);
unlock_out:
--
1.5.5.1
write_opcode() does lock_page(new_page) for no reason. Nobody can
see this page until __replace_page() exposes it under ptl lock, and
we do nothing with this page after pte_unmap_unlock().
If nothing else, the similar code in do_wp_page() doesn't lock the
new page for page_add_new_anon_rmap/set_pte_at_notify.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6fda799..23c562b 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -239,9 +239,7 @@ retry:
if (ret)
goto unlock_out;
- lock_page(new_page);
ret = __replace_page(vma, vaddr, old_page, new_page);
- unlock_page(new_page);
unlock_out:
unlock_page(old_page);
--
1.5.5.1
The comment above write_opcode()->lock_page(old_page) tells about
the race with do_wp_page(). I don't really understand which exactly
race it means, but afaics this lock_page() was not enough to close
all races with do_wp_page().
Anyway, since 77fc4af1 this code is always called with ->mmap_sem
hold for writing so we can forget about do_wp_page().
However, we can't simply remove this lock_page(), and the only
(afaics) reason is __replace_page()->try_to_free_swap().
Nothing in write_opcode() needs it, move it into __replace_page()
and fix the comment.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 27 ++++++++++++++-------------
1 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 23c562b..5db150b 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -139,10 +139,15 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl;
pte_t *ptep;
+ int err;
+ /* freeze PageSwapCache() for try_to_free_swap() below */
+ lock_page(page);
+
+ err = -EAGAIN;
ptep = page_check_address(page, mm, addr, &ptl, 0);
if (!ptep)
- return -EAGAIN;
+ goto unlock;
get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
@@ -162,7 +167,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
put_page(page);
pte_unmap_unlock(ptep, ptl);
- return 0;
+ err = 0;
+ unlock:
+ unlock_page(page);
+ return err;
}
/**
@@ -216,15 +224,10 @@ retry:
ret = -ENOMEM;
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
if (!new_page)
- goto put_out;
+ goto put_old;
__SetPageUptodate(new_page);
- /*
- * lock page will serialize against do_wp_page()'s
- * PageAnon() handling
- */
- lock_page(old_page);
/* copy the page now that we've got it stable */
vaddr_old = kmap_atomic(old_page);
vaddr_new = kmap_atomic(new_page);
@@ -237,15 +240,13 @@ retry:
ret = anon_vma_prepare(vma);
if (ret)
- goto unlock_out;
+ goto put_new;
ret = __replace_page(vma, vaddr, old_page, new_page);
-unlock_out:
- unlock_page(old_page);
+put_new:
page_cache_release(new_page);
-
-put_out:
+put_old:
put_page(old_page);
if (unlikely(ret == -EAGAIN))
--
1.5.5.1
Like do_wp_page(), __replace_page() should do munlock_vma_page()
for the case when the old page still has other !VM_LOCKED mappings.
Unfortunately this needs mm/internal.h.
Also, move put_page() outside of ptl lock. This doesn't really
matter but looks better.
Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/events/uprobes.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 5db150b..889c62b 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -32,6 +32,7 @@
#include <linux/swap.h> /* try_to_free_swap */
#include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */
+#include "../../mm/internal.h" /* munlock_vma_page */
#include <linux/uprobes.h>
@@ -141,7 +142,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
pte_t *ptep;
int err;
- /* freeze PageSwapCache() for try_to_free_swap() below */
+ /* For try_to_free_swap() and munlock_vma_page() below */
lock_page(page);
err = -EAGAIN;
@@ -164,9 +165,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
page_remove_rmap(page);
if (!page_mapped(page))
try_to_free_swap(page);
- put_page(page);
pte_unmap_unlock(ptep, ptl);
+ if (vma->vm_flags & VM_LOCKED)
+ munlock_vma_page(page);
+ put_page(page);
+
err = 0;
unlock:
unlock_page(page);
--
1.5.5.1
On Sun, Jun 24, 2012 at 05:01:11PM +0200, Oleg Nesterov wrote:
> Like do_wp_page(), __replace_page() should do munlock_vma_page()
> for the case when the old page still has other !VM_LOCKED mappings.
> Unfortunately this needs mm/internal.h.
>
> Also, move put_page() outside of ptl lock. This doesn't really
> matter but looks better.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/events/uprobes.c | 8 ++++++--
> 1 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 5db150b..889c62b 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -32,6 +32,7 @@
> #include <linux/swap.h> /* try_to_free_swap */
> #include <linux/ptrace.h> /* user_enable_single_step */
> #include <linux/kdebug.h> /* notifier mechanism */
> +#include "../../mm/internal.h" /* munlock_vma_page */
We have vma_adress() defined in internal.h, under #ifdef CONFIG_TRANSPARENT_HUGEPAGE .
NAK. :-)
Anton.
>
> #include <linux/uprobes.h>
>
> @@ -141,7 +142,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
> pte_t *ptep;
> int err;
>
> - /* freeze PageSwapCache() for try_to_free_swap() below */
> + /* For try_to_free_swap() and munlock_vma_page() below */
> lock_page(page);
>
> err = -EAGAIN;
> @@ -164,9 +165,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
> page_remove_rmap(page);
> if (!page_mapped(page))
> try_to_free_swap(page);
> - put_page(page);
> pte_unmap_unlock(ptep, ptl);
>
> + if (vma->vm_flags & VM_LOCKED)
> + munlock_vma_page(page);
> + put_page(page);
> +
> err = 0;
> unlock:
> unlock_page(page);
> --
> 1.5.5.1
>
>
On 06/26, Anton Arapov wrote:
>
> On Sun, Jun 24, 2012 at 05:01:11PM +0200, Oleg Nesterov wrote:
> > Like do_wp_page(), __replace_page() should do munlock_vma_page()
> > for the case when the old page still has other !VM_LOCKED mappings.
> > Unfortunately this needs mm/internal.h.
> >
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -32,6 +32,7 @@
> > #include <linux/swap.h> /* try_to_free_swap */
> > #include <linux/ptrace.h> /* user_enable_single_step */
> > #include <linux/kdebug.h> /* notifier mechanism */
> > +#include "../../mm/internal.h" /* munlock_vma_page */
>
> We have vma_adress() defined in internal.h, under #ifdef CONFIG_TRANSPARENT_HUGEPAGE .
>
> NAK. :-)
Damn you Anton ;) You seem to dislike number 5...
OK, please ignore this patch.
Can't resist. I swear, I specially checked mm/internal.h to ensure
it doesn't export mm/rmap.c:vma_address(), can't understand how I
didn't notice this declaration.
Oleg.