VMA with VM_GROWSDOWN or VM_GROWSUP flag set can change their size under
mmap_read_lock(). It can lead to race with __do_munmap():
Thread A Thread B
__do_munmap()
detach_vmas_to_be_unmapped()
mmap_write_downgrade()
expand_downwards()
vma->vm_start = address;
// The VMA now overlaps with
// VMAs detached by the Thread A
// page fault populates expanded part
// of the VMA
unmap_region()
// Zaps pagetables partly
// populated by Thread B
Similar race exists for expand_upwards().
The fix is to avoid downgrading mmap_lock in __do_munmap() if detached
VMAs are next to VM_GROWSDOWN or VM_GROWSUP VMA.
Signed-off-by: Kirill A. Shutemov <[email protected]>
Reported-by: Jann Horn <[email protected]>
Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
Cc: <[email protected]> # 4.20
Cc: Yang Shi <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Matthew Wilcox <[email protected]>
---
mm/mmap.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 59a4682ebf3f..71df4b36b42a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2620,7 +2620,7 @@ static void unmap_region(struct mm_struct *mm,
* Create a list of vma's touched by the unmap, removing them from the mm's
* vma list as we go..
*/
-static void
+static bool
detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, unsigned long end)
{
@@ -2645,6 +2645,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
/* Kill the cache */
vmacache_invalidate(mm);
+
+ /*
+ * Do not downgrade mmap_sem if we are next to VM_GROWSDOWN or
+ * VM_GROWSUP VMA. Such VMAs can change their size under
+ * down_read(mmap_sem) and collide with the VMA we are about to unmap.
+ */
+ if (vma && (vma->vm_flags & VM_GROWSDOWN))
+ return false;
+ if (prev && (prev->vm_flags & VM_GROWSUP))
+ return false;
+ return true;
}
/*
@@ -2825,7 +2836,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
}
/* Detach vmas from rbtree */
- detach_vmas_to_be_unmapped(mm, vma, prev, end);
+ if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
+ downgrade = false;
if (downgrade)
mmap_write_downgrade(mm);
--
2.26.2
On 7/9/20 12:53 PM, Kirill A. Shutemov wrote:
> VMA with VM_GROWSDOWN or VM_GROWSUP flag set can change their size under
> mmap_read_lock(). It can lead to race with __do_munmap():
>
> Thread A Thread B
> __do_munmap()
> detach_vmas_to_be_unmapped()
> mmap_write_downgrade()
> expand_downwards()
> vma->vm_start = address;
> // The VMA now overlaps with
> // VMAs detached by the Thread A
> // page fault populates expanded part
> // of the VMA
> unmap_region()
> // Zaps pagetables partly
> // populated by Thread B
>
> Similar race exists for expand_upwards().
>
> The fix is to avoid downgrading mmap_lock in __do_munmap() if detached
> VMAs are next to VM_GROWSDOWN or VM_GROWSUP VMA.
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Reported-by: Jann Horn <[email protected]>
> Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
> Cc: <[email protected]> # 4.20
> Cc: Yang Shi <[email protected]>
> Cc: Vlastimil Babka <[email protected]>
> Cc: Oleg Nesterov <[email protected]>
> Cc: Matthew Wilcox <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Thanks!
> ---
> mm/mmap.c | 16 ++++++++++++++--
> 1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 59a4682ebf3f..71df4b36b42a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2620,7 +2620,7 @@ static void unmap_region(struct mm_struct *mm,
> * Create a list of vma's touched by the unmap, removing them from the mm's
> * vma list as we go..
> */
> -static void
> +static bool
> detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
> struct vm_area_struct *prev, unsigned long end)
> {
> @@ -2645,6 +2645,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>
> /* Kill the cache */
> vmacache_invalidate(mm);
> +
> + /*
> + * Do not downgrade mmap_sem if we are next to VM_GROWSDOWN or
> + * VM_GROWSUP VMA. Such VMAs can change their size under
> + * down_read(mmap_sem) and collide with the VMA we are about to unmap.
> + */
> + if (vma && (vma->vm_flags & VM_GROWSDOWN))
> + return false;
> + if (prev && (prev->vm_flags & VM_GROWSUP))
> + return false;
> + return true;
> }
>
> /*
> @@ -2825,7 +2836,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
> }
>
> /* Detach vmas from rbtree */
> - detach_vmas_to_be_unmapped(mm, vma, prev, end);
> + if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
> + downgrade = false;
>
> if (downgrade)
> mmap_write_downgrade(mm);
>
On 7/9/20 3:53 AM, Kirill A. Shutemov wrote:
> VMA with VM_GROWSDOWN or VM_GROWSUP flag set can change their size under
> mmap_read_lock(). It can lead to race with __do_munmap():
>
> Thread A Thread B
> __do_munmap()
> detach_vmas_to_be_unmapped()
> mmap_write_downgrade()
> expand_downwards()
> vma->vm_start = address;
> // The VMA now overlaps with
> // VMAs detached by the Thread A
> // page fault populates expanded part
> // of the VMA
> unmap_region()
> // Zaps pagetables partly
> // populated by Thread B
>
> Similar race exists for expand_upwards().
>
> The fix is to avoid downgrading mmap_lock in __do_munmap() if detached
> VMAs are next to VM_GROWSDOWN or VM_GROWSUP VMA.
Thanks for catching this. The fix makes sense to me. Reviewed-by: Yang
Shi <[email protected]>
>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Reported-by: Jann Horn <[email protected]>
> Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap")
> Cc: <[email protected]> # 4.20
> Cc: Yang Shi <[email protected]>
> Cc: Vlastimil Babka <[email protected]>
> Cc: Oleg Nesterov <[email protected]>
> Cc: Matthew Wilcox <[email protected]>
> ---
> mm/mmap.c | 16 ++++++++++++++--
> 1 file changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 59a4682ebf3f..71df4b36b42a 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2620,7 +2620,7 @@ static void unmap_region(struct mm_struct *mm,
> * Create a list of vma's touched by the unmap, removing them from the mm's
> * vma list as we go..
> */
> -static void
> +static bool
> detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
> struct vm_area_struct *prev, unsigned long end)
> {
> @@ -2645,6 +2645,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>
> /* Kill the cache */
> vmacache_invalidate(mm);
> +
> + /*
> + * Do not downgrade mmap_sem if we are next to VM_GROWSDOWN or
> + * VM_GROWSUP VMA. Such VMAs can change their size under
> + * down_read(mmap_sem) and collide with the VMA we are about to unmap.
> + */
> + if (vma && (vma->vm_flags & VM_GROWSDOWN))
> + return false;
> + if (prev && (prev->vm_flags & VM_GROWSUP))
> + return false;
> + return true;
> }
>
> /*
> @@ -2825,7 +2836,8 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
> }
>
> /* Detach vmas from rbtree */
> - detach_vmas_to_be_unmapped(mm, vma, prev, end);
> + if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
> + downgrade = false;
>
> if (downgrade)
> mmap_write_downgrade(mm);