2019-11-29 09:06:42

by Vlastimil Babka

[permalink] [raw]
Subject: [PATCH STABLE 4.9 1/1] mm, gup: add missing refcount overflow checks on x86 and s390

The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from
overflowing page refcount") was backported to 4.9.y stable as commit
2ed768cfd895. The backport however missed that in 4.9, there are several
arch-specific gup.c versions with fast gup implementations, so these do not
prevent refcount overflow.

This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm,
gup: prevent get_page() race with munmap in paravirt guest"). This stable-only
commit adds missing parts to x86 version, as well as s390 version, both taken
from the SUSE SLES/openSUSE 4.12-based kernels.

The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely
the known overflow scenario based on FUSE, which needs 140GB of RAM, is a
problem for those architectures, and I don't feel confident enough to patch
them.

Signed-off-by: Vlastimil Babka <[email protected]>
---
arch/s390/mm/gup.c | 9 ++++++---
arch/x86/mm/gup.c | 10 ++++++++--
2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 97fc449a7470..33a940389a6d 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
head = compound_head(page);
- if (!page_cache_get_speculative(head))
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_get_speculative(head)))
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_page(head);
@@ -76,7 +77,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

- if (!page_cache_add_speculative(head, refs)) {
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_add_speculative(head, refs))) {
*nr -= refs;
return 0;
}
@@ -150,7 +152,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
refs++;
} while (addr += PAGE_SIZE, addr != end);

- if (!page_cache_add_speculative(head, refs)) {
+ if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+ || !page_cache_add_speculative(head, refs))) {
*nr -= refs;
return 0;
}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index d7db45bdfb3b..551fc7fea046 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
undo_dev_pagemap(nr, nr_start, pages);
return 0;
}
+ if (unlikely(!try_get_page(page))) {
+ put_dev_pagemap(pgmap);
+ return 0;
+ }
SetPageReferenced(page);
pages[*nr] = page;
- get_page(page);
- put_dev_pagemap(pgmap);
(*nr)++;
pfn++;
} while (addr += PAGE_SIZE, addr != end);
@@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,

refs = 0;
head = pmd_page(pmd);
+ if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+ return 0;
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,

refs = 0;
head = pud_page(pud);
+ if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+ return 0;
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON_PAGE(compound_head(page) != head, page);
--
2.24.0


2019-12-03 12:24:54

by Ben Hutchings

[permalink] [raw]
Subject: Re: [PATCH STABLE 4.9 1/1] mm, gup: add missing refcount overflow checks on x86 and s390

On Fri, 2019-11-29 at 10:03 +0100, Vlastimil Babka wrote:
> The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from
> overflowing page refcount") was backported to 4.9.y stable as commit
> 2ed768cfd895. The backport however missed that in 4.9, there are several
> arch-specific gup.c versions with fast gup implementations, so these do not
> prevent refcount overflow.
>
> This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm,
> gup: prevent get_page() race with munmap in paravirt guest"). This stable-only
> commit adds missing parts to x86 version, as well as s390 version, both taken
> from the SUSE SLES/openSUSE 4.12-based kernels.
>
> The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely
> the known overflow scenario based on FUSE, which needs 140GB of RAM, is a
> problem for those architectures, and I don't feel confident enough to patch
> them.
>
> Signed-off-by: Vlastimil Babka <[email protected]>
> ---
> arch/s390/mm/gup.c | 9 ++++++---
> arch/x86/mm/gup.c | 10 ++++++++--
> 2 files changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
> index 97fc449a7470..33a940389a6d 100644
> --- a/arch/s390/mm/gup.c
> +++ b/arch/s390/mm/gup.c
> @@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
> VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> page = pte_page(pte);
> head = compound_head(page);
> - if (!page_cache_get_speculative(head))
> + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)

No need for unlikely(); WARN_ON() includes that.

> + || !page_cache_get_speculative(head)))
> return 0;
> if (unlikely(pte_val(pte) != pte_val(*ptep))) {
> put_page(head);
[...]
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
> undo_dev_pagemap(nr, nr_start, pages);
> return 0;
> }
> + if (unlikely(!try_get_page(page))) {
> + put_dev_pagemap(pgmap);
> + return 0;
> + }
> SetPageReferenced(page);
> pages[*nr] = page;
> - get_page(page);
> - put_dev_pagemap(pgmap);

This leaks a pgmap reference on success!

> (*nr)++;
> pfn++;
> } while (addr += PAGE_SIZE, addr != end);
> @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
>
> refs = 0;
> head = pmd_page(pmd);
> + if (WARN_ON_ONCE(page_ref_count(head) <= 0))

Why <= 0, given we use < 0 elsewhere?

> + return 0;
> page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
> do {
> VM_BUG_ON_PAGE(compound_head(page) != head, page);
> @@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
>
> refs = 0;
> head = pud_page(pud);
> + if (WARN_ON_ONCE(page_ref_count(head) <= 0))

Same question here.

Ben.

> + return 0;
> page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
> do {
> VM_BUG_ON_PAGE(compound_head(page) != head, page);
--
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
Manchester, M1 2HF, United Kingdom

2019-12-03 12:47:48

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH STABLE 4.9 1/1] mm, gup: add missing refcount overflow checks on x86 and s390

On 12/3/19 1:22 PM, Ben Hutchings wrote:
>> + || !page_cache_get_speculative(head)))
>> return 0;
>> if (unlikely(pte_val(pte) != pte_val(*ptep))) {
>> put_page(head);
> [...]
>> --- a/arch/x86/mm/gup.c
>> +++ b/arch/x86/mm/gup.c
>> @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
>> undo_dev_pagemap(nr, nr_start, pages);
>> return 0;
>> }
>> + if (unlikely(!try_get_page(page))) {
>> + put_dev_pagemap(pgmap);
>> + return 0;
>> + }
>> SetPageReferenced(page);
>> pages[*nr] = page;
>> - get_page(page);
>> - put_dev_pagemap(pgmap);
>
> This leaks a pgmap reference on success!

Good catch, deleted one line too many!

>> (*nr)++;
>> pfn++;
>> } while (addr += PAGE_SIZE, addr != end);
>> @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
>>
>> refs = 0;
>> head = pmd_page(pmd);
>> + if (WARN_ON_ONCE(page_ref_count(head) <= 0))
>
> Why <= 0, given we use < 0 elsewhere?

The code uses get_head_page_multiple() which boils down to atomic_add
and not add_unless_zero(), so it assumes a pre-existing pin that must
not go away or it's a bug (one that I've been hunting recently in this
area). The check makes it explicit.

>
>> + return 0;
>> page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
>> do {
>> VM_BUG_ON_PAGE(compound_head(page) != head, page);
>> @@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
>>
>> refs = 0;
>> head = pud_page(pud);
>> + if (WARN_ON_ONCE(page_ref_count(head) <= 0))
>
> Same question here.

Same as above.

> Ben.
>
>> + return 0;
>> page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
>> do {
>> VM_BUG_ON_PAGE(compound_head(page) != head, page);