set_mempolicy_home_node() iterates over a list of VMAs and calls
mbind_range() on each VMA, which also iterates over the singular list of
the VMA passed in and potentially splits the VMA. Since the VMA
iterator is not passed through, set_mempolicy_home_node() may now point
to a stale node in the VMA tree. This can result in a UAF as reported
by syzbot.
Avoid the stale maple tree node by passing the VMA iterator through to
the underlying call to split_vma().
mbind_range() is also overly complicated, since there are two calling
functions and one already handles iterating over the VMAs. Simplify
mbind_range() to only handle merging and splitting of the VMAs.
Align the new loop in do_mbind() and existing loop in
set_mempolicy_home_node() to use the reduced mbind_range() function.
This allows for a single location of the range calculation and avoids
constantly looking up the previous VMA (since this is a loop over the
VMAs).
Link: https://lore.kernel.org/linux-mm/[email protected]/
Reported-and-tested-by: [email protected]
Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list")
Cc: <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
---
mm/mempolicy.c | 104 +++++++++++++++++++++++--------------------------
1 file changed, 49 insertions(+), 55 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a256a241fd1d..2068b594dc88 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -790,61 +790,50 @@ static int vma_replace_policy(struct vm_area_struct *vma,
return err;
}
-/* Step 2: apply policy to a range and do splits. */
-static int mbind_range(struct mm_struct *mm, unsigned long start,
- unsigned long end, struct mempolicy *new_pol)
+/* Split or merge the VMA (if required) and apply the new policy */
+static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ struct vm_area_struct **prev, unsigned long start,
+ unsigned long end, struct mempolicy *new_pol)
{
- VMA_ITERATOR(vmi, mm, start);
- struct vm_area_struct *prev;
- struct vm_area_struct *vma;
- int err = 0;
+ struct vm_area_struct *merged;
+ unsigned long vmstart, vmend;
pgoff_t pgoff;
+ int err;
- prev = vma_prev(&vmi);
- vma = vma_find(&vmi, end);
- if (WARN_ON(!vma))
+ vmend = min(end, vma->vm_end);
+ if (start > vma->vm_start) {
+ *prev = vma;
+ vmstart = start;
+ } else {
+ vmstart = vma->vm_start;
+ }
+
+ if (mpol_equal(vma_policy(vma), new_pol))
return 0;
- if (start > vma->vm_start)
- prev = vma;
-
- do {
- unsigned long vmstart = max(start, vma->vm_start);
- unsigned long vmend = min(end, vma->vm_end);
-
- if (mpol_equal(vma_policy(vma), new_pol))
- goto next;
-
- pgoff = vma->vm_pgoff +
- ((vmstart - vma->vm_start) >> PAGE_SHIFT);
- prev = vma_merge(&vmi, mm, prev, vmstart, vmend, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff,
- new_pol, vma->vm_userfaultfd_ctx,
- anon_vma_name(vma));
- if (prev) {
- vma = prev;
- goto replace;
- }
- if (vma->vm_start != vmstart) {
- err = split_vma(&vmi, vma, vmstart, 1);
- if (err)
- goto out;
- }
- if (vma->vm_end != vmend) {
- err = split_vma(&vmi, vma, vmend, 0);
- if (err)
- goto out;
- }
-replace:
- err = vma_replace_policy(vma, new_pol);
+ pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
+ merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff, new_pol,
+ vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+ if (merged) {
+ *prev = merged;
+ return vma_replace_policy(merged, new_pol);
+ }
+
+ if (vma->vm_start != vmstart) {
+ err = split_vma(vmi, vma, vmstart, 1);
if (err)
- goto out;
-next:
- prev = vma;
- } for_each_vma_range(vmi, vma, end);
+ return err;
+ }
-out:
- return err;
+ if (vma->vm_end != vmend) {
+ err = split_vma(vmi, vma, vmend, 0);
+ if (err)
+ return err;
+ }
+
+ *prev = vma;
+ return vma_replace_policy(vma, new_pol);
}
/* Set the process memory policy */
@@ -1259,6 +1248,8 @@ static long do_mbind(unsigned long start, unsigned long len,
nodemask_t *nmask, unsigned long flags)
{
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma, *prev;
+ struct vma_iterator vmi;
struct mempolicy *new;
unsigned long end;
int err;
@@ -1328,7 +1319,13 @@ static long do_mbind(unsigned long start, unsigned long len,
goto up_out;
}
- err = mbind_range(mm, start, end, new);
+ vma_iter_init(&vmi, mm, start);
+ prev = vma_prev(&vmi);
+ for_each_vma_range(vmi, vma, end) {
+ err = mbind_range(&vmi, vma, &prev, start, end, new);
+ if (err)
+ break;
+ }
if (!err) {
int nr_failed = 0;
@@ -1489,10 +1486,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long, home_node, unsigned long, flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *prev;
struct mempolicy *new, *old;
- unsigned long vmstart;
- unsigned long vmend;
unsigned long end;
int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start);
@@ -1521,6 +1516,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start)
return 0;
mmap_write_lock(mm);
+ prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
/*
* If any vma in the range got policy other than MPOL_BIND
@@ -1541,9 +1537,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
}
new->home_node = home_node;
- vmstart = max(start, vma->vm_start);
- vmend = min(end, vma->vm_end);
- err = mbind_range(mm, vmstart, vmend, new);
+ err = mbind_range(&vmi, vma, &prev, start, end, new);
mpol_put(new);
if (err)
break;
--
2.39.2
"Liam R. Howlett" <[email protected]> writes:
> set_mempolicy_home_node() iterates over a list of VMAs and calls
> mbind_range() on each VMA, which also iterates over the singular list of
> the VMA passed in and potentially splits the VMA. Since the VMA
> iterator is not passed through, set_mempolicy_home_node() may now point
> to a stale node in the VMA tree. This can result in a UAF as reported
> by syzbot.
>
> Avoid the stale maple tree node by passing the VMA iterator through to
> the underlying call to split_vma().
>
> mbind_range() is also overly complicated, since there are two calling
> functions and one already handles iterating over the VMAs. Simplify
> mbind_range() to only handle merging and splitting of the VMAs.
>
> Align the new loop in do_mbind() and existing loop in
> set_mempolicy_home_node() to use the reduced mbind_range() function.
> This allows for a single location of the range calculation and avoids
> constantly looking up the previous VMA (since this is a loop over the
> VMAs).
>
> Link: https://lore.kernel.org/linux-mm/[email protected]/
> Reported-and-tested-by: [email protected]
> Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list")
> Cc: <[email protected]>
> Signed-off-by: Liam R. Howlett <[email protected]>
> ---
This breaks the vma02 testcase from ltp on s390:
~ # ./vma02
vma02 0 TINFO : pid = 617 addr = 0x3ff8f673000
vma02 0 TINFO : start = 0x3ff8f673000, end = 0x3ff8f674000
vma02 0 TINFO : start = 0x3ff8f674000, end = 0x3ff8f675000
vma02 0 TINFO : start = 0x3ff8f675000, end = 0x3ff8f676000
vma02 1 TFAIL : vma02.c:144: >1 unmerged VMAs.
When this happens the following VM_WARN_ON() is triggered:
[ 25.628747] ------------[ cut here ]------------
[ 25.628821] WARNING: CPU: 0 PID: 617 at mm/mmap.c:922 vma_merge+0x7ca/0x970
[ 25.628834] Modules linked in:
[ 25.628841] CPU: 0 PID: 617 Comm: vma02 Not tainted 6.3.0-09574-g285e1dccc348 #108
[ 25.628846] Hardware name: IBM 3906 M04 704 (z/VM 7.1.0)
[ 25.628850] Krnl PSW : 0704e00180000000 00000000004317b6 (vma_merge+0x7ce/0x970)
[ 25.628859] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
[ 25.628866] Krnl GPRS: 000003ff8f62d000 000003ff8f674000 000000008e7cb908 000000000131f0f4
[ 25.628871] 0000000000000001 0000038000bdba1c 000003ff8f675000 000000008e7cb908
[ 25.628876] 000000008e7cb908 000003ff8f674000 000003ff8f675000 000000008e7ca308
[ 25.628881] 0000000000000001 000000003ff8f674 0000000000431048 0000038000bdbb08
[ 25.628891] Krnl Code: 00000000004317aa: a7f4fcea brc 15,000000000043117e
[ 25.628891] 00000000004317ae: b9040087 lgr %r8,%r7
[ 25.628891] #00000000004317b2: af000000 mc 0,0
[ 25.628891] >00000000004317b6: a7f4fc5f brc 15,0000000000431074
[ 25.628891] 00000000004317ba: ec38fd30007c cgij %r3,0,8,000000000043121a
[ 25.628891] 00000000004317c0: e31031d80004 lg %r1,472(%r3)
[ 25.628891] 00000000004317c6: e310f0f80024 stg %r1,248(%r15)
[ 25.628891] 00000000004317cc: a7f4fd27 brc 15,000000000043121a
[ 25.628970] Call Trace:
[ 25.628974] [<00000000004317b6>] vma_merge+0x7ce/0x970
[ 25.628979] ([<0000000000431048>] vma_merge+0x60/0x970)
[ 25.628985] [<0000000000482e5a>] mbind_range+0x13a/0x1c8
[ 25.628990] [<0000000000483448>] do_mbind+0x2c8/0x448
[ 25.629013] [<0000000000483662>] kernel_mbind+0x9a/0xb8
[ 25.629018] [<0000000000483724>] __s390x_sys_mbind+0x4c/0x58
[ 25.629023] [<0000000000f38c3a>] __do_syscall+0x1da/0x208
[ 25.629030] [<0000000000f4ed40>] system_call+0x70/0x98
[ 25.629037] 1 lock held by vma02/617:
[ 25.629042] #0: 000000008c570ca8 (&mm->mmap_lock){++++}-{3:3}, at: do_mbind+0x102/0x448
[ 25.629055] Last Breaking-Event-Address:
[ 25.629058] [<0000000000431070>] vma_merge+0x88/0x970
[ 25.629066] irq event stamp: 12559
[ 25.629070] hardirqs last enabled at (12567): [<00000000001fcc6c>] __up_console_sem+0x8c/0xc0
[ 25.629080] hardirqs last disabled at (12574): [<00000000001fcc4e>] __up_console_sem+0x6e/0xc0
[ 25.629086] softirqs last enabled at (10076): [<0000000000f516e2>] __do_softirq+0x512/0x618
[ 25.629123] softirqs last disabled at (10053): [<0000000000162bbe>] __irq_exit_rcu+0x13e/0x170
[ 25.629132] ---[ end trace 0000000000000000 ]---
which is:
/* verify some invariant that must be enforced by the caller */
VM_WARN_ON(prev && addr <= prev->vm_start);
--> VM_WARN_ON(mid && end > mid->vm_end);
VM_WARN_ON(addr >= end);
Any thoughts?
Thanks
Sven
* Sven Schnelle <[email protected]> [230427 02:53]:
> "Liam R. Howlett" <[email protected]> writes:
>
> > set_mempolicy_home_node() iterates over a list of VMAs and calls
> > mbind_range() on each VMA, which also iterates over the singular list of
> > the VMA passed in and potentially splits the VMA. Since the VMA
> > iterator is not passed through, set_mempolicy_home_node() may now point
> > to a stale node in the VMA tree. This can result in a UAF as reported
> > by syzbot.
> >
> > Avoid the stale maple tree node by passing the VMA iterator through to
> > the underlying call to split_vma().
> >
> > mbind_range() is also overly complicated, since there are two calling
> > functions and one already handles iterating over the VMAs. Simplify
> > mbind_range() to only handle merging and splitting of the VMAs.
> >
> > Align the new loop in do_mbind() and existing loop in
> > set_mempolicy_home_node() to use the reduced mbind_range() function.
> > This allows for a single location of the range calculation and avoids
> > constantly looking up the previous VMA (since this is a loop over the
> > VMAs).
> >
> > Link: https://lore.kernel.org/linux-mm/[email protected]/
> > Reported-and-tested-by: [email protected]
> > Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list")
> > Cc: <[email protected]>
> > Signed-off-by: Liam R. Howlett <[email protected]>
> > ---
>
> This breaks the vma02 testcase from ltp on s390:
>
> ~ # ./vma02
> vma02 0 TINFO : pid = 617 addr = 0x3ff8f673000
> vma02 0 TINFO : start = 0x3ff8f673000, end = 0x3ff8f674000
> vma02 0 TINFO : start = 0x3ff8f674000, end = 0x3ff8f675000
> vma02 0 TINFO : start = 0x3ff8f675000, end = 0x3ff8f676000
> vma02 1 TFAIL : vma02.c:144: >1 unmerged VMAs.
>
> When this happens the following VM_WARN_ON() is triggered:
>
> [ 25.628747] ------------[ cut here ]------------
> [ 25.628821] WARNING: CPU: 0 PID: 617 at mm/mmap.c:922 vma_merge+0x7ca/0x970
> [ 25.628834] Modules linked in:
> [ 25.628841] CPU: 0 PID: 617 Comm: vma02 Not tainted 6.3.0-09574-g285e1dccc348 #108
> [ 25.628846] Hardware name: IBM 3906 M04 704 (z/VM 7.1.0)
> [ 25.628850] Krnl PSW : 0704e00180000000 00000000004317b6 (vma_merge+0x7ce/0x970)
> [ 25.628859] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
> [ 25.628866] Krnl GPRS: 000003ff8f62d000 000003ff8f674000 000000008e7cb908 000000000131f0f4
> [ 25.628871] 0000000000000001 0000038000bdba1c 000003ff8f675000 000000008e7cb908
> [ 25.628876] 000000008e7cb908 000003ff8f674000 000003ff8f675000 000000008e7ca308
> [ 25.628881] 0000000000000001 000000003ff8f674 0000000000431048 0000038000bdbb08
> [ 25.628891] Krnl Code: 00000000004317aa: a7f4fcea brc 15,000000000043117e
> [ 25.628891] 00000000004317ae: b9040087 lgr %r8,%r7
> [ 25.628891] #00000000004317b2: af000000 mc 0,0
> [ 25.628891] >00000000004317b6: a7f4fc5f brc 15,0000000000431074
> [ 25.628891] 00000000004317ba: ec38fd30007c cgij %r3,0,8,000000000043121a
> [ 25.628891] 00000000004317c0: e31031d80004 lg %r1,472(%r3)
> [ 25.628891] 00000000004317c6: e310f0f80024 stg %r1,248(%r15)
> [ 25.628891] 00000000004317cc: a7f4fd27 brc 15,000000000043121a
> [ 25.628970] Call Trace:
> [ 25.628974] [<00000000004317b6>] vma_merge+0x7ce/0x970
> [ 25.628979] ([<0000000000431048>] vma_merge+0x60/0x970)
> [ 25.628985] [<0000000000482e5a>] mbind_range+0x13a/0x1c8
> [ 25.628990] [<0000000000483448>] do_mbind+0x2c8/0x448
> [ 25.629013] [<0000000000483662>] kernel_mbind+0x9a/0xb8
> [ 25.629018] [<0000000000483724>] __s390x_sys_mbind+0x4c/0x58
> [ 25.629023] [<0000000000f38c3a>] __do_syscall+0x1da/0x208
> [ 25.629030] [<0000000000f4ed40>] system_call+0x70/0x98
> [ 25.629037] 1 lock held by vma02/617:
> [ 25.629042] #0: 000000008c570ca8 (&mm->mmap_lock){++++}-{3:3}, at: do_mbind+0x102/0x448
> [ 25.629055] Last Breaking-Event-Address:
> [ 25.629058] [<0000000000431070>] vma_merge+0x88/0x970
> [ 25.629066] irq event stamp: 12559
> [ 25.629070] hardirqs last enabled at (12567): [<00000000001fcc6c>] __up_console_sem+0x8c/0xc0
> [ 25.629080] hardirqs last disabled at (12574): [<00000000001fcc4e>] __up_console_sem+0x6e/0xc0
> [ 25.629086] softirqs last enabled at (10076): [<0000000000f516e2>] __do_softirq+0x512/0x618
> [ 25.629123] softirqs last disabled at (10053): [<0000000000162bbe>] __irq_exit_rcu+0x13e/0x170
> [ 25.629132] ---[ end trace 0000000000000000 ]---
>
> which is:
>
> /* verify some invariant that must be enforced by the caller */
> VM_WARN_ON(prev && addr <= prev->vm_start);
> --> VM_WARN_ON(mid && end > mid->vm_end);
> VM_WARN_ON(addr >= end);
>
> Any thoughts?
No thoughts that I should share.
I will have to boot my s390 (vm) and have a look.
Thanks for letting me know.
Regards,
Liam
On Thu, Apr 27, 2023 at 01:32:47PM -0400, Liam R. Howlett wrote:
> * Sven Schnelle <[email protected]> [230427 02:53]:
> > "Liam R. Howlett" <[email protected]> writes:
> >
> > > set_mempolicy_home_node() iterates over a list of VMAs and calls
> > > mbind_range() on each VMA, which also iterates over the singular list of
> > > the VMA passed in and potentially splits the VMA. Since the VMA
> > > iterator is not passed through, set_mempolicy_home_node() may now point
> > > to a stale node in the VMA tree. This can result in a UAF as reported
> > > by syzbot.
> > >
> > > Avoid the stale maple tree node by passing the VMA iterator through to
> > > the underlying call to split_vma().
> > >
> > > mbind_range() is also overly complicated, since there are two calling
> > > functions and one already handles iterating over the VMAs. Simplify
> > > mbind_range() to only handle merging and splitting of the VMAs.
> > >
> > > Align the new loop in do_mbind() and existing loop in
> > > set_mempolicy_home_node() to use the reduced mbind_range() function.
> > > This allows for a single location of the range calculation and avoids
> > > constantly looking up the previous VMA (since this is a loop over the
> > > VMAs).
> > >
> > > Link: https://lore.kernel.org/linux-mm/[email protected]/
> > > Reported-and-tested-by: [email protected]
> > > Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list")
> > > Cc: <[email protected]>
> > > Signed-off-by: Liam R. Howlett <[email protected]>
> > > ---
> >
> > This breaks the vma02 testcase from ltp on s390:
> >
> > ~ # ./vma02
> > vma02 0 TINFO : pid = 617 addr = 0x3ff8f673000
> > vma02 0 TINFO : start = 0x3ff8f673000, end = 0x3ff8f674000
> > vma02 0 TINFO : start = 0x3ff8f674000, end = 0x3ff8f675000
> > vma02 0 TINFO : start = 0x3ff8f675000, end = 0x3ff8f676000
> > vma02 1 TFAIL : vma02.c:144: >1 unmerged VMAs.
> >
> > When this happens the following VM_WARN_ON() is triggered:
> >
> > [ 25.628747] ------------[ cut here ]------------
> > [ 25.628821] WARNING: CPU: 0 PID: 617 at mm/mmap.c:922 vma_merge+0x7ca/0x970
> > [ 25.628834] Modules linked in:
> > [ 25.628841] CPU: 0 PID: 617 Comm: vma02 Not tainted 6.3.0-09574-g285e1dccc348 #108
> > [ 25.628846] Hardware name: IBM 3906 M04 704 (z/VM 7.1.0)
> > [ 25.628850] Krnl PSW : 0704e00180000000 00000000004317b6 (vma_merge+0x7ce/0x970)
> > [ 25.628859] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
> > [ 25.628866] Krnl GPRS: 000003ff8f62d000 000003ff8f674000 000000008e7cb908 000000000131f0f4
> > [ 25.628871] 0000000000000001 0000038000bdba1c 000003ff8f675000 000000008e7cb908
> > [ 25.628876] 000000008e7cb908 000003ff8f674000 000003ff8f675000 000000008e7ca308
> > [ 25.628881] 0000000000000001 000000003ff8f674 0000000000431048 0000038000bdbb08
> > [ 25.628891] Krnl Code: 00000000004317aa: a7f4fcea brc 15,000000000043117e
> > [ 25.628891] 00000000004317ae: b9040087 lgr %r8,%r7
> > [ 25.628891] #00000000004317b2: af000000 mc 0,0
> > [ 25.628891] >00000000004317b6: a7f4fc5f brc 15,0000000000431074
> > [ 25.628891] 00000000004317ba: ec38fd30007c cgij %r3,0,8,000000000043121a
> > [ 25.628891] 00000000004317c0: e31031d80004 lg %r1,472(%r3)
> > [ 25.628891] 00000000004317c6: e310f0f80024 stg %r1,248(%r15)
> > [ 25.628891] 00000000004317cc: a7f4fd27 brc 15,000000000043121a
> > [ 25.628970] Call Trace:
> > [ 25.628974] [<00000000004317b6>] vma_merge+0x7ce/0x970
> > [ 25.628979] ([<0000000000431048>] vma_merge+0x60/0x970)
> > [ 25.628985] [<0000000000482e5a>] mbind_range+0x13a/0x1c8
> > [ 25.628990] [<0000000000483448>] do_mbind+0x2c8/0x448
> > [ 25.629013] [<0000000000483662>] kernel_mbind+0x9a/0xb8
> > [ 25.629018] [<0000000000483724>] __s390x_sys_mbind+0x4c/0x58
> > [ 25.629023] [<0000000000f38c3a>] __do_syscall+0x1da/0x208
> > [ 25.629030] [<0000000000f4ed40>] system_call+0x70/0x98
> > [ 25.629037] 1 lock held by vma02/617:
> > [ 25.629042] #0: 000000008c570ca8 (&mm->mmap_lock){++++}-{3:3}, at: do_mbind+0x102/0x448
> > [ 25.629055] Last Breaking-Event-Address:
> > [ 25.629058] [<0000000000431070>] vma_merge+0x88/0x970
> > [ 25.629066] irq event stamp: 12559
> > [ 25.629070] hardirqs last enabled at (12567): [<00000000001fcc6c>] __up_console_sem+0x8c/0xc0
> > [ 25.629080] hardirqs last disabled at (12574): [<00000000001fcc4e>] __up_console_sem+0x6e/0xc0
> > [ 25.629086] softirqs last enabled at (10076): [<0000000000f516e2>] __do_softirq+0x512/0x618
> > [ 25.629123] softirqs last disabled at (10053): [<0000000000162bbe>] __irq_exit_rcu+0x13e/0x170
> > [ 25.629132] ---[ end trace 0000000000000000 ]---
> >
> > which is:
> >
> > /* verify some invariant that must be enforced by the caller */
> > VM_WARN_ON(prev && addr <= prev->vm_start);
> > --> VM_WARN_ON(mid && end > mid->vm_end);
> > VM_WARN_ON(addr >= end);
> >
> > Any thoughts?
>
> No thoughts that I should share.
>
> I will have to boot my s390 (vm) and have a look.
>
> Thanks for letting me know.
>
> Regards,
> Liam
I tracked down what this (almost certainly) was + added fix in [1] as it
popped up as a 6.2.y stable bug. It doesn't seem arch-specific so you can
put that s390 down :)
[1]:https://lore.kernel.org/all/db42467a692d78c654ec5c1953329401bd8a9c34.1682859234.git.lstoakes@gmail.com/
Lorenzo Stoakes <[email protected]> writes:
> On Thu, Apr 27, 2023 at 01:32:47PM -0400, Liam R. Howlett wrote:
>> * Sven Schnelle <[email protected]> [230427 02:53]:
>> > "Liam R. Howlett" <[email protected]> writes:
>> >
>> > > set_mempolicy_home_node() iterates over a list of VMAs and calls
>> > > mbind_range() on each VMA, which also iterates over the singular list of
>> > > the VMA passed in and potentially splits the VMA. Since the VMA
>> > > iterator is not passed through, set_mempolicy_home_node() may now point
>> > > to a stale node in the VMA tree. This can result in a UAF as reported
>> > > by syzbot.
>> > >
>> > > Avoid the stale maple tree node by passing the VMA iterator through to
>> > > the underlying call to split_vma().
>> > >
>> > > mbind_range() is also overly complicated, since there are two calling
>> > > functions and one already handles iterating over the VMAs. Simplify
>> > > mbind_range() to only handle merging and splitting of the VMAs.
>> > >
>> > > Align the new loop in do_mbind() and existing loop in
>> > > set_mempolicy_home_node() to use the reduced mbind_range() function.
>> > > This allows for a single location of the range calculation and avoids
>> > > constantly looking up the previous VMA (since this is a loop over the
>> > > VMAs).
>> > >
>> > > Link: https://lore.kernel.org/linux-mm/[email protected]/
>> > > Reported-and-tested-by: [email protected]
>> > > Fixes: 66850be55e8e ("mm/mempolicy: use vma iterator & maple state instead of vma linked list")
>> > > Cc: <[email protected]>
>> > > Signed-off-by: Liam R. Howlett <[email protected]>
>> > > ---
>> >
>> > This breaks the vma02 testcase from ltp on s390:
>> >
>> > ~ # ./vma02
>> > vma02 0 TINFO : pid = 617 addr = 0x3ff8f673000
>> > vma02 0 TINFO : start = 0x3ff8f673000, end = 0x3ff8f674000
>> > vma02 0 TINFO : start = 0x3ff8f674000, end = 0x3ff8f675000
>> > vma02 0 TINFO : start = 0x3ff8f675000, end = 0x3ff8f676000
>> > vma02 1 TFAIL : vma02.c:144: >1 unmerged VMAs.
>> > Any thoughts?
>>
>> No thoughts that I should share.
>>
>> I will have to boot my s390 (vm) and have a look.
>>
>> Thanks for letting me know.
>>
>> Regards,
>> Liam
>
> I tracked down what this (almost certainly) was + added fix in [1] as it
> popped up as a 6.2.y stable bug. It doesn't seem arch-specific so you can
> put that s390 down :)
>
> [1]:https://lore.kernel.org/all/db42467a692d78c654ec5c1953329401bd8a9c34.1682859234.git.lstoakes@gmail.com/
Thanks, just tested, and it solves the issue for me.