2020-08-21 12:41:05

by Joerg Roedel

[permalink] [raw]
Subject: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

From: Joerg Roedel <[email protected]>

The __apply_to_page_range() function is also used to change and/or
allocate page-table pages in the vmalloc area of the address space.
Make sure these changes get synchronized to other page-tables in the
system by calling arch_sync_kernel_mappings() when necessary.

Tested-by: Chris Wilson <[email protected]> #x86-32
Cc: <[email protected]> # v5.8+
Signed-off-by: Joerg Roedel <[email protected]>
---
mm/memory.c | 36 +++++++++++++++++++++++-------------
1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 3a7779d9891d..1b7d846f6992 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -83,6 +83,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>

+#include "pgalloc-track.h"
#include "internal.h"

#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST)
@@ -2206,7 +2207,8 @@ EXPORT_SYMBOL(vm_iomap_memory);

static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data, bool create)
+ pte_fn_t fn, void *data, bool create,
+ pgtbl_mod_mask *mask)
{
pte_t *pte;
int err = 0;
@@ -2214,7 +2216,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,

if (create) {
pte = (mm == &init_mm) ?
- pte_alloc_kernel(pmd, addr) :
+ pte_alloc_kernel_track(pmd, addr, mask) :
pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
@@ -2235,6 +2237,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
break;
}
} while (addr += PAGE_SIZE, addr != end);
+ *mask |= PGTBL_PTE_MODIFIED;

arch_leave_lazy_mmu_mode();

@@ -2245,7 +2248,8 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,

static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data, bool create)
+ pte_fn_t fn, void *data, bool create,
+ pgtbl_mod_mask *mask)
{
pmd_t *pmd;
unsigned long next;
@@ -2254,7 +2258,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
BUG_ON(pud_huge(*pud));

if (create) {
- pmd = pmd_alloc(mm, pud, addr);
+ pmd = pmd_alloc_track(mm, pud, addr, mask);
if (!pmd)
return -ENOMEM;
} else {
@@ -2264,7 +2268,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
next = pmd_addr_end(addr, end);
if (create || !pmd_none_or_clear_bad(pmd)) {
err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
- create);
+ create, mask);
if (err)
break;
}
@@ -2274,14 +2278,15 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,

static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data, bool create)
+ pte_fn_t fn, void *data, bool create,
+ pgtbl_mod_mask *mask)
{
pud_t *pud;
unsigned long next;
int err = 0;

if (create) {
- pud = pud_alloc(mm, p4d, addr);
+ pud = pud_alloc_track(mm, p4d, addr, mask);
if (!pud)
return -ENOMEM;
} else {
@@ -2291,7 +2296,7 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
next = pud_addr_end(addr, end);
if (create || !pud_none_or_clear_bad(pud)) {
err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
- create);
+ create, mask);
if (err)
break;
}
@@ -2301,14 +2306,15 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,

static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data, bool create)
+ pte_fn_t fn, void *data, bool create,
+ pgtbl_mod_mask *mask)
{
p4d_t *p4d;
unsigned long next;
int err = 0;

if (create) {
- p4d = p4d_alloc(mm, pgd, addr);
+ p4d = p4d_alloc_track(mm, pgd, addr, mask);
if (!p4d)
return -ENOMEM;
} else {
@@ -2318,7 +2324,7 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
next = p4d_addr_end(addr, end);
if (create || !p4d_none_or_clear_bad(p4d)) {
err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
- create);
+ create, mask);
if (err)
break;
}
@@ -2331,8 +2337,9 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
void *data, bool create)
{
pgd_t *pgd;
- unsigned long next;
+ unsigned long start = addr, next;
unsigned long end = addr + size;
+ pgtbl_mod_mask mask = 0;
int err = 0;

if (WARN_ON(addr >= end))
@@ -2343,11 +2350,14 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
next = pgd_addr_end(addr, end);
if (!create && pgd_none_or_clear_bad(pgd))
continue;
- err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
+ err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask);
if (err)
break;
} while (pgd++, addr = next, addr != end);

+ if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+ arch_sync_kernel_mappings(start, start + size);
+
return err;
}

--
2.28.0


2020-08-21 18:52:59

by Chris Wilson

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Quoting Joerg Roedel (2020-08-21 13:37:46)
> From: Joerg Roedel <[email protected]>
>
> The __apply_to_page_range() function is also used to change and/or
> allocate page-table pages in the vmalloc area of the address space.
> Make sure these changes get synchronized to other page-tables in the
> system by calling arch_sync_kernel_mappings() when necessary.
>
> Tested-by: Chris Wilson <[email protected]> #x86-32
> Cc: <[email protected]> # v5.8+
> Signed-off-by: Joerg Roedel <[email protected]>

I've doubled check that this patch by itself fixes our x86-32 vmapping
issue. Thanks,
-Chris

2020-08-21 19:22:11

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

On Fri, Aug 21, 2020 at 5:38 AM Joerg Roedel <[email protected]> wrote:
>
> From: Joerg Roedel <[email protected]>
>
> The __apply_to_page_range() function is also used to change and/or
> allocate page-table pages in the vmalloc area of the address space.
> Make sure these changes get synchronized to other page-tables in the
> system by calling arch_sync_kernel_mappings() when necessary.

I get the strong feeling that these functions should be using a
"struct apply_details *" or something like that (the way the
zap_page_range() code has that "zap_details" thing).

Because adding more and more arguments gets pretty painful after a
while. But maybe the compiler inlining it all makes it a non-issue.

It also strikes me that I think the only architecture that uses the
whole arch_sync_kernel_mappings() thing is now just x86-32.

[ Well, x86-64 still has it, but that's because we undid the 64-bit
removal, but it's on the verge of going away and x86-64 shouldn't
actually _need_ it any more ]

So all of this seems to be purely for 32-bit x86. Which kind of makes
this all fail the smell test.

But the patch does seem to be the minimal fix for a real issue - I'm
just pointing out ugly details, not actual problems with the patch.

IOW, a somewhat reluctant Ack, hoping that this will be cleaned up
some day. Possibly/hopefully because arch_sync_kernel_mappings() just
goes away entirely.

Linus

2020-08-21 20:39:20

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

On Fri, 21 Aug 2020 14:37:46 +0200 Joerg Roedel <[email protected]> wrote:

> The __apply_to_page_range() function is also used to change and/or
> allocate page-table pages in the vmalloc area of the address space.
> Make sure these changes get synchronized to other page-tables in the
> system by calling arch_sync_kernel_mappings() when necessary.

There's no description here of the user-visible effects of the bug.
Please always provide this, especially when proposing a -stable
backport. Take pity upon all the downstream kernel maintainers who are
staring at this wondering whether they should risk adding it to their
kernels.


2020-08-21 20:54:14

by Chris Wilson

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Quoting Andrew Morton (2020-08-21 21:35:48)
> On Fri, 21 Aug 2020 14:37:46 +0200 Joerg Roedel <[email protected]> wrote:
>
> > The __apply_to_page_range() function is also used to change and/or
> > allocate page-table pages in the vmalloc area of the address space.
> > Make sure these changes get synchronized to other page-tables in the
> > system by calling arch_sync_kernel_mappings() when necessary.
>
> There's no description here of the user-visible effects of the bug.
> Please always provide this, especially when proposing a -stable
> backport. Take pity upon all the downstream kernel maintainers who are
> staring at this wondering whether they should risk adding it to their
> kernels.

The impact appears limited to x86-32, where apply_to_page_range may miss
updating the PMD. That leads to explosions in drivers like

[ 24.227844] BUG: unable to handle page fault for address: fe036000
[ 24.228076] #PF: supervisor write access in kernel mode
[ 24.228294] #PF: error_code(0x0002) - not-present page
[ 24.228494] *pde = 00000000
[ 24.228640] Oops: 0002 [#1] SMP
[ 24.228788] CPU: 3 PID: 1300 Comm: gem_concurrent_ Not tainted 5.9.0-rc1+ #16
[ 24.228957] Hardware name: /NUC6i3SYB, BIOS SYSKLi35.86A.0024.2015.1027.2142 10/27/2015
[ 24.229297] EIP: __execlists_context_alloc+0x132/0x2d0 [i915]
[ 24.229462] Code: 31 d2 89 f0 e8 2f 55 02 00 89 45 e8 3d 00 f0 ff ff 0f 87 11 01 00 00 8b 4d e8 03 4b 30 b8 5a 5a 5a 5a ba 01 00 00 00 8d 79 04 <c7> 01 5a 5a 5a 5a c7 81 fc 0f 00 00 5a 5a 5a 5a 83 e7 fc 29 f9 81
[ 24.229759] EAX: 5a5a5a5a EBX: f60ca000 ECX: fe036000 EDX: 00000001
[ 24.229915] ESI: f43b7340 EDI: fe036004 EBP: f6389cb8 ESP: f6389c9c
[ 24.230072] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010286
[ 24.230229] CR0: 80050033 CR2: fe036000 CR3: 2d361000 CR4: 001506d0
[ 24.230385] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[ 24.230539] DR6: fffe0ff0 DR7: 00000400
[ 24.230675] Call Trace:
[ 24.230957] execlists_context_alloc+0x10/0x20 [i915]
[ 24.231266] intel_context_alloc_state+0x3f/0x70 [i915]
[ 24.231547] __intel_context_do_pin+0x117/0x170 [i915]
[ 24.231850] i915_gem_do_execbuffer+0xcc7/0x2500 [i915]
[ 24.232024] ? __kmalloc_track_caller+0x54/0x230
[ 24.232181] ? ktime_get+0x3e/0x120
[ 24.232333] ? dma_fence_signal+0x34/0x50
[ 24.232617] i915_gem_execbuffer2_ioctl+0xcd/0x1f0 [i915]
[ 24.232912] ? i915_gem_execbuffer_ioctl+0x2e0/0x2e0 [i915]
[ 24.233084] drm_ioctl_kernel+0x8f/0xd0
[ 24.233236] drm_ioctl+0x223/0x3d0
[ 24.233505] ? i915_gem_execbuffer_ioctl+0x2e0/0x2e0 [i915]
[ 24.233684] ? pick_next_task_fair+0x1b5/0x3d0
[ 24.233873] ? __switch_to_asm+0x36/0x50
[ 24.234021] ? drm_ioctl_kernel+0xd0/0xd0
[ 24.234167] __ia32_sys_ioctl+0x1ab/0x760
[ 24.234313] ? exit_to_user_mode_prepare+0xe5/0x110
[ 24.234453] ? syscall_exit_to_user_mode+0x23/0x130
[ 24.234601] __do_fast_syscall_32+0x3f/0x70
[ 24.234744] do_fast_syscall_32+0x29/0x60
[ 24.234885] do_SYSENTER_32+0x15/0x20
[ 24.235021] entry_SYSENTER_32+0x9f/0xf2
[ 24.235157] EIP: 0xb7f28559
[ 24.235288] Code: 03 74 c0 01 10 05 03 74 b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
[ 24.235576] EAX: ffffffda EBX: 00000005 ECX: c0406469 EDX: bf95556c
[ 24.235722] ESI: b7e68000 EDI: c0406469 EBP: 00000005 ESP: bf9554d8
[ 24.235869] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296
[ 24.236018] Modules linked in: i915 x86_pkg_temp_thermal intel_powerclamp crc32_pclmul crc32c_intel intel_cstate intel_uncore intel_gtt drm_kms_helper intel_pch_thermal video button autofs4 i2c_i801 i2c_smbus fan
[ 24.236336] CR2: 00000000fe036000

It looks like kasan, xen and i915 are vulnerable.
-Chris

2020-08-21 21:32:55

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Hi!

> > > The __apply_to_page_range() function is also used to change and/or
> > > allocate page-table pages in the vmalloc area of the address space.
> > > Make sure these changes get synchronized to other page-tables in the
> > > system by calling arch_sync_kernel_mappings() when necessary.
> >
> > There's no description here of the user-visible effects of the bug.
> > Please always provide this, especially when proposing a -stable
> > backport. Take pity upon all the downstream kernel maintainers who are
> > staring at this wondering whether they should risk adding it to their
> > kernels.
>
> The impact appears limited to x86-32, where apply_to_page_range may miss
> updating the PMD. That leads to explosions in drivers like
>
> [ 24.227844] BUG: unable to handle page fault for address: fe036000
> [ 24.228076] #PF: supervisor write access in kernel mode
> [ 24.228294] #PF: error_code(0x0002) - not-present page
> [ 24.228494] *pde = 00000000
> [ 24.228640] Oops: 0002 [#1] SMP
> [ 24.228788] CPU: 3 PID: 1300 Comm: gem_concurrent_ Not tainted 5.9.0-rc1+ #16
> [ 24.228957] Hardware name: /NUC6i3SYB, BIOS SYSKLi35.86A.0024.2015.1027.2142 10/27/2015
> [ 24.229297] EIP: __execlists_context_alloc+0x132/0x2d0 [i915]
> [ 24.229462] Code: 31 d2 89 f0 e8 2f 55 02 00 89 45 e8 3d 00 f0 ff ff 0f 87 11 01 00 00 8b 4d e8 03 4b 30 b8 5a 5a 5a 5a ba 01 00 00 00 8d 79 04 <c7> 01 5a 5a 5a 5a c7 81 fc 0f 00 00 5a 5a 5a 5a 83 e7 fc 29 f9 81
> [ 24.229759] EAX: 5a5a5a5a EBX: f60ca000 ECX: fe036000 EDX: 00000001
> [ 24.229915] ESI: f43b7340 EDI: fe036004 EBP: f6389cb8 ESP: f6389c9c
> [ 24.230072] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010286
> [ 24.230229] CR0: 80050033 CR2: fe036000 CR3: 2d361000 CR4: 001506d0
> [ 24.230385] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [ 24.230539] DR6: fffe0ff0 DR7: 00000400
> [ 24.230675] Call Trace:
> [ 24.230957] execlists_context_alloc+0x10/0x20 [i915]
> [ 24.231266] intel_context_alloc_state+0x3f/0x70 [i915]
> [ 24.231547] __intel_context_do_pin+0x117/0x170 [i915]
> [ 24.231850] i915_gem_do_execbuffer+0xcc7/0x2500 [i915]
> [ 24.232024] ? __kmalloc_track_caller+0x54/0x230
> [ 24.232181] ? ktime_get+0x3e/0x120
> [ 24.232333] ? dma_fence_signal+0x34/0x50
> [ 24.232617] i915_gem_execbuffer2_ioctl+0xcd/0x1f0 [i915]
> [ 24.232912] ? i915_gem_execbuffer_ioctl+0x2e0/0x2e0 [i915]
> [ 24.233084] drm_ioctl_kernel+0x8f/0xd0
> [ 24.233236] drm_ioctl+0x223/0x3d0
> [ 24.233505] ? i915_gem_execbuffer_ioctl+0x2e0/0x2e0 [i915]
> [ 24.233684] ? pick_next_task_fair+0x1b5/0x3d0
> [ 24.233873] ? __switch_to_asm+0x36/0x50
> [ 24.234021] ? drm_ioctl_kernel+0xd0/0xd0
> [ 24.234167] __ia32_sys_ioctl+0x1ab/0x760
> [ 24.234313] ? exit_to_user_mode_prepare+0xe5/0x110
> [ 24.234453] ? syscall_exit_to_user_mode+0x23/0x130
> [ 24.234601] __do_fast_syscall_32+0x3f/0x70
> [ 24.234744] do_fast_syscall_32+0x29/0x60
> [ 24.234885] do_SYSENTER_32+0x15/0x20
> [ 24.235021] entry_SYSENTER_32+0x9f/0xf2
> [ 24.235157] EIP: 0xb7f28559
> [ 24.235288] Code: 03 74 c0 01 10 05 03 74 b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76
> [ 24.235576] EAX: ffffffda EBX: 00000005 ECX: c0406469 EDX: bf95556c
> [ 24.235722] ESI: b7e68000 EDI: c0406469 EBP: 00000005 ESP: bf9554d8
> [ 24.235869] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296
> [ 24.236018] Modules linked in: i915 x86_pkg_temp_thermal intel_powerclamp crc32_pclmul crc32c_intel intel_cstate intel_uncore intel_gtt drm_kms_helper intel_pch_thermal video button autofs4 i2c_i801 i2c_smbus fan
> [ 24.236336] CR2: 00000000fe036000
>
> It looks like kasan, xen and i915 are vulnerable.

And actual impact is "on thinkpad X60 in 5.9-rc1, screen starts
blinking after 30-or-so minutes, and macine is unusable"... that is
assuming we are taking same bug.

Best regards,
Pavel

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


Attachments:
(No filename) (4.16 kB)
signature.asc (188.00 B)
Digital signature
Download all attachments

2020-08-21 22:35:19

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

On Fri, 21 Aug 2020 14:37:46 +0200 Joerg Roedel <[email protected]> wrote:

> The __apply_to_page_range() function is also used to change and/or
> allocate page-table pages in the vmalloc area of the address space.
> Make sure these changes get synchronized to other page-tables in the
> system by calling arch_sync_kernel_mappings() when necessary.
>
> Tested-by: Chris Wilson <[email protected]> #x86-32
> Cc: <[email protected]> # v5.8+

I'm trying to figure out how you figured out that this is 5.8+. Has a
particular misbehaving commit been identified?

2020-08-21 23:44:04

by Chris Wilson

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Quoting Andrew Morton (2020-08-21 23:34:12)
> On Fri, 21 Aug 2020 14:37:46 +0200 Joerg Roedel <[email protected]> wrote:
>
> > The __apply_to_page_range() function is also used to change and/or
> > allocate page-table pages in the vmalloc area of the address space.
> > Make sure these changes get synchronized to other page-tables in the
> > system by calling arch_sync_kernel_mappings() when necessary.
> >
> > Tested-by: Chris Wilson <[email protected]> #x86-32
> > Cc: <[email protected]> # v5.8+
>
> I'm trying to figure out how you figured out that this is 5.8+. Has a
> particular misbehaving commit been identified?

The two commits of relevance, in my eyes, were

2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified")
86cf69f1d893 ("x86/mm/32: implement arch_sync_kernel_mappings()")

I can reproduce the failure on v5.8, but not on v5.7. A bisect would
seem to be plausible.
-Chris

2020-08-22 11:33:30

by Chris Wilson

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Quoting Chris Wilson (2020-08-22 00:39:09)
> Quoting Andrew Morton (2020-08-21 23:34:12)
> > On Fri, 21 Aug 2020 14:37:46 +0200 Joerg Roedel <[email protected]> wrote:
> >
> > > The __apply_to_page_range() function is also used to change and/or
> > > allocate page-table pages in the vmalloc area of the address space.
> > > Make sure these changes get synchronized to other page-tables in the
> > > system by calling arch_sync_kernel_mappings() when necessary.
> > >
> > > Tested-by: Chris Wilson <[email protected]> #x86-32
> > > Cc: <[email protected]> # v5.8+
> >
> > I'm trying to figure out how you figured out that this is 5.8+. Has a
> > particular misbehaving commit been identified?
>
> The two commits of relevance, in my eyes, were
>
> 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified")
> 86cf69f1d893 ("x86/mm/32: implement arch_sync_kernel_mappings()")
>
> I can reproduce the failure on v5.8, but not on v5.7. A bisect would
> seem to be plausible.

The active ingredient was

7f0a002b5a21 ("x86/mm: remove vmalloc faulting")

which explains a lot.
-Chris

2020-08-22 16:17:15

by Jörg Rödel

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

On Fri, Aug 21, 2020 at 12:18:41PM -0700, Linus Torvalds wrote:
> It also strikes me that I think the only architecture that uses the
> whole arch_sync_kernel_mappings() thing is now just x86-32.
>
> [ Well, x86-64 still has it, but that's because we undid the 64-bit
> removal, but it's on the verge of going away and x86-64 shouldn't
> actually _need_ it any more ]
>
> So all of this seems to be purely for 32-bit x86. Which kind of makes
> this all fail the smell test.

Yeah, it is certainly not the nicest thing to have in generic mm code,
but at least it is an improvement of the vmalloc_sync_all() interface we
had before, where the function had to be called at random undefined
places.

And x86-32 needs it, as long as we have the !SHARED_KERNEL_PMD cases
(which includes legacy paging). Or we also pre-allocate the PMDs on
x86-32 and forbid large ioremap mappings. But since the vmalloc area
gets larger with less RAM on x86-32, this would penalize low memory
machines by using more pages for the pre-allocations.

Not sure if making the vmalloc area on x86-32 a fixed 128MB range of
address space independent of RAM size is doable or if it will break some
machines. But with that pre-allocating PMDs would make more sense and we
could get rid of the p?d_alloc_track() stuff.

Regards,

Joerg

2020-08-22 17:01:17

by Jörg Rödel

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

On Sat, Aug 22, 2020 at 12:31:55PM +0100, Chris Wilson wrote:
> The active ingredient was
>
> 7f0a002b5a21 ("x86/mm: remove vmalloc faulting")

Right, that is what bisection will point to. Thanks for collecting all
the info and updating the commit message!

Regards,

Joerg

2020-08-22 22:42:12

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Hi!
> > > The __apply_to_page_range() function is also used to change and/or
> > > allocate page-table pages in the vmalloc area of the address space.
> > > Make sure these changes get synchronized to other page-tables in the
> > > system by calling arch_sync_kernel_mappings() when necessary.
> >
> > There's no description here of the user-visible effects of the bug.
> > Please always provide this, especially when proposing a -stable
> > backport. Take pity upon all the downstream kernel maintainers who are
> > staring at this wondering whether they should risk adding it to their
> > kernels.
>
> The impact appears limited to x86-32, where apply_to_page_range may miss
> updating the PMD. That leads to explosions in drivers like

Is this alone supposed to fix my problems with graphics on Thinkpad
X60? Let me try...

Best regards,
Pavel

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


Attachments:
(No filename) (1.00 kB)
signature.asc (188.00 B)
Digital signature
Download all attachments

2020-08-23 10:57:42

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH v2] mm: Track page table modifications in __apply_to_page_range()

Hi!

> The __apply_to_page_range() function is also used to change and/or
> allocate page-table pages in the vmalloc area of the address space.
> Make sure these changes get synchronized to other page-tables in the
> system by calling arch_sync_kernel_mappings() when necessary.
>
> Tested-by: Chris Wilson <[email protected]> #x86-32
> Cc: <[email protected]> # v5.8+
> Signed-off-by: Joerg Roedel <[email protected]>

This seems to solve screen blinking problems on Thinkpad X60. (It
already survived few unison runs, which would usually kill it.).

Tested-by: Pavel Machek <[email protected]>

Thanks and best regards,
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


Attachments:
(No filename) (800.00 B)
signature.asc (201.00 B)
Download all attachments