The two users of mbind_range() are expecting that mbind_range() will
update the pointer to the previous VMA, or return an error. However,
set_mempolicy_home_node() does not call mbind_range() if there is no VMA
policy. The fix is to update the pointer to the previous VMA prior to
continuing iterating the VMAs when there is no policy.
Users may experience a WARN_ON() during VMA policy updates when updating
a range of VMAs on the home node.
Reported-by: Yikebaer Aizezi <[email protected]>
Closes: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/
Link: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/
Fixes: f4e9e0e69468 ("mm/mempolicy: fix use-after-free of VMA iterator")
Cc: [email protected]
Cc: Lorenzo Stoakes <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
---
mm/mempolicy.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
For completeness, here is the syzbot reproducer so that it is available
from the mailing list:
#define _GNU_SOURCE
#include <endian.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#ifndef __NR_set_mempolicy_home_node
#define __NR_set_mempolicy_home_node 450
#endif
int main(void)
{
syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul, /*prot=*/7ul, /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
*(uint64_t*)0x20000000 = 0xffffffffffffff81;
syscall(__NR_mbind, /*addr=*/0x20ffa000ul, /*len=*/0x4000ul, /*mode=*/2ul, /*nodemask=*/0x20000000ul, /*maxnode=*/7ul, /*flags=*/0ul);
syscall(__NR_mbind, /*addr=*/0x20ff9000ul, /*len=*/0x3000ul, /*mode=*/0ul, /*nodemask=*/0ul, /*maxnode=*/0ul, /*flags=*/0ul);
syscall(__NR_set_mempolicy_home_node, /*addr=*/0x20ffa000ul, /*len=*/0x4000ul, /*home_node=*/0ul, /*flags=*/0ul);
return 0;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 42b5567e3773..717d93c175f2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1544,8 +1544,10 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
* the home node for vmas we already updated before.
*/
old = vma_policy(vma);
- if (!old)
+ if (!old) {
+ prev = vma;
continue;
+ }
if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) {
err = -EOPNOTSUPP;
break;
--
2.40.1
On Thu, Sep 28, 2023 at 01:24:32PM -0400, Liam R. Howlett wrote:
> The two users of mbind_range() are expecting that mbind_range() will
> update the pointer to the previous VMA, or return an error. However,
> set_mempolicy_home_node() does not call mbind_range() if there is no VMA
> policy. The fix is to update the pointer to the previous VMA prior to
> continuing iterating the VMAs when there is no policy.
>
> Users may experience a WARN_ON() during VMA policy updates when updating
> a range of VMAs on the home node.
>
> Reported-by: Yikebaer Aizezi <[email protected]>
> Closes: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/
> Link: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/
> Fixes: f4e9e0e69468 ("mm/mempolicy: fix use-after-free of VMA iterator")
> Cc: [email protected]
> Cc: Lorenzo Stoakes <[email protected]>
> Signed-off-by: Liam R. Howlett <[email protected]>
> ---
> mm/mempolicy.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
>
> For completeness, here is the syzbot reproducer so that it is available
> from the mailing list:
>
> #define _GNU_SOURCE
>
> #include <endian.h>
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/syscall.h>
> #include <sys/types.h>
> #include <unistd.h>
>
> #ifndef __NR_set_mempolicy_home_node
> #define __NR_set_mempolicy_home_node 450
> #endif
>
> int main(void)
> {
> syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
> syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul, /*prot=*/7ul, /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
> syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul, /*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
>
> *(uint64_t*)0x20000000 = 0xffffffffffffff81;
> syscall(__NR_mbind, /*addr=*/0x20ffa000ul, /*len=*/0x4000ul, /*mode=*/2ul, /*nodemask=*/0x20000000ul, /*maxnode=*/7ul, /*flags=*/0ul);
> syscall(__NR_mbind, /*addr=*/0x20ff9000ul, /*len=*/0x3000ul, /*mode=*/0ul, /*nodemask=*/0ul, /*maxnode=*/0ul, /*flags=*/0ul);
> syscall(__NR_set_mempolicy_home_node, /*addr=*/0x20ffa000ul, /*len=*/0x4000ul, /*home_node=*/0ul, /*flags=*/0ul);
> return 0;
> }
>
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 42b5567e3773..717d93c175f2 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -1544,8 +1544,10 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
> * the home node for vmas we already updated before.
> */
> old = vma_policy(vma);
> - if (!old)
> + if (!old) {
> + prev = vma;
> continue;
> + }
> if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) {
> err = -EOPNOTSUPP;
> break;
> --
> 2.40.1
>
It feels a bit like the prev assignment is in the wrong place, however
looking at mbind_range() it's because of the possible merge that this is so
I guess. Just a pity the two bits get separated, as obviously it is at this
upper loop where the assignment of prev is most meaningful.
But definitely looks correct,
Reviewed-by: Lorenzo Stoakes <[email protected]>