2022-09-11 10:08:10

by Pasha Tatashin

[permalink] [raw]
Subject: [PATCH 0/3] page table check default to warn instead of panic

From: Pasha Tatashin <[email protected]>

Page table check when detects errors panics the kernel. Let instead,
print a warning, and panic only when specifically requested via kernel
parameter:

page_table_check=panic

The discussion about using panic vs. warn is here:
https://lore.kernel.org/linux-mm/[email protected]

Pasha Tatashin (2):
mm/page_table_check: Do WARN_ON instead of BUG_ON by default
doc/vm: add information about page_table_check=panic

Rick Edgecombe (1):
mm/page_table_check: Check writable zero page in page table check

Documentation/mm/page_table_check.rst | 16 ++++----
mm/page_table_check.c | 53 ++++++++++++++++++++-------
2 files changed, 49 insertions(+), 20 deletions(-)

--
2.37.2.789.g6183377224-goog


2022-09-11 10:20:38

by Pasha Tatashin

[permalink] [raw]
Subject: [PATCH 2/3] mm/page_table_check: Do WARN_ON instead of BUG_ON by default

Currently, page_table_check when detects errors panics kernel. Instead,
print a warning, and panic only when specifically requested via kernel
parameter:

page_table_check=panic

Signed-off-by: Pasha Tatashin <[email protected]>
---
mm/page_table_check.c | 53 +++++++++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 665ece0d55d4..881f19d0714c 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -17,13 +17,37 @@ struct page_table_check {

static bool __page_table_check_enabled __initdata =
IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
+static bool __page_table_check_panic;

DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
EXPORT_SYMBOL(page_table_check_disabled);

+#define PAGE_TABLE_CHECK_BUG(v) \
+ do { \
+ bool __bug = !!(v); \
+ \
+ if (__page_table_check_panic) \
+ BUG_ON(__bug); \
+ else if (WARN_ON_ONCE(__bug)) \
+ static_branch_enable(&page_table_check_disabled); \
+ } while (false)
+
static int __init early_page_table_check_param(char *buf)
{
- return strtobool(buf, &__page_table_check_enabled);
+ int rc = strtobool(buf, &__page_table_check_enabled);
+
+ if (rc) {
+ if (!strcmp(buf, "panic")) {
+ __page_table_check_enabled = true;
+ __page_table_check_panic = true;
+ rc = 0;
+ }
+ }
+
+ if (rc)
+ pr_warn("Invalid option string: '%s'\n", buf);
+
+ return rc;
}

early_param("page_table_check", early_page_table_check_param);
@@ -48,7 +72,8 @@ struct page_ext_operations page_table_check_ops = {

static struct page_table_check *get_page_table_check(struct page_ext *page_ext)
{
- BUG_ON(!page_ext);
+ PAGE_TABLE_CHECK_BUG(!page_ext);
+
return (void *)(page_ext) + page_table_check_ops.offset;
}

@@ -75,11 +100,11 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
struct page_table_check *ptc = get_page_table_check(page_ext);

if (anon) {
- BUG_ON(atomic_read(&ptc->file_map_count));
- BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0);
+ PAGE_TABLE_CHECK_BUG(atomic_read(&ptc->file_map_count));
+ PAGE_TABLE_CHECK_BUG(atomic_dec_return(&ptc->anon_map_count) < 0);
} else {
- BUG_ON(atomic_read(&ptc->anon_map_count));
- BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0);
+ PAGE_TABLE_CHECK_BUG(atomic_read(&ptc->anon_map_count));
+ PAGE_TABLE_CHECK_BUG(atomic_dec_return(&ptc->file_map_count) < 0);
}
page_ext = page_ext_next(page_ext);
}
@@ -102,7 +127,7 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
if (!pfn_valid(pfn))
return;

- BUG_ON(is_zero_pfn(pfn) && rw);
+ PAGE_TABLE_CHECK_BUG(!is_zero_pfn(pfn) && rw);

page = pfn_to_page(pfn);
page_ext = lookup_page_ext(page);
@@ -112,11 +137,11 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
struct page_table_check *ptc = get_page_table_check(page_ext);

if (anon) {
- BUG_ON(atomic_read(&ptc->file_map_count));
- BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
+ PAGE_TABLE_CHECK_BUG(atomic_read(&ptc->file_map_count));
+ PAGE_TABLE_CHECK_BUG(atomic_inc_return(&ptc->anon_map_count) > 1 && rw);
} else {
- BUG_ON(atomic_read(&ptc->anon_map_count));
- BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0);
+ PAGE_TABLE_CHECK_BUG(atomic_read(&ptc->anon_map_count));
+ PAGE_TABLE_CHECK_BUG(atomic_inc_return(&ptc->file_map_count) < 0);
}
page_ext = page_ext_next(page_ext);
}
@@ -131,12 +156,12 @@ void __page_table_check_zero(struct page *page, unsigned int order)
struct page_ext *page_ext = lookup_page_ext(page);
unsigned long i;

- BUG_ON(!page_ext);
+ PAGE_TABLE_CHECK_BUG(!page_ext);
for (i = 0; i < (1ul << order); i++) {
struct page_table_check *ptc = get_page_table_check(page_ext);

- BUG_ON(atomic_read(&ptc->anon_map_count));
- BUG_ON(atomic_read(&ptc->file_map_count));
+ PAGE_TABLE_CHECK_BUG(atomic_read(&ptc->anon_map_count));
+ PAGE_TABLE_CHECK_BUG(atomic_read(&ptc->file_map_count));
page_ext = page_ext_next(page_ext);
}
}
--
2.37.2.789.g6183377224-goog

2022-09-11 10:45:28

by Pasha Tatashin

[permalink] [raw]
Subject: [PATCH 1/3] mm/page_table_check: Check writable zero page in page table check

From: Rick Edgecombe <[email protected]>

The zero page should remain all zero, so that it can be mapped as
read-only for read faults of memory that should be zeroed. If it is ever
mapped writable to userspace, it could become non-zero and so other apps
would unexpectedly get non-zero data. So the zero page should never be
mapped writable to userspace. Check for this condition in
page_table_check_set().

Signed-off-by: Rick Edgecombe <[email protected]>
Signed-off-by: Pasha Tatashin <[email protected]>
---
mm/page_table_check.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index e2062748791a..665ece0d55d4 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -102,6 +102,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
if (!pfn_valid(pfn))
return;

+ BUG_ON(is_zero_pfn(pfn) && rw);
+
page = pfn_to_page(pfn);
page_ext = lookup_page_ext(page);
anon = PageAnon(page);
--
2.37.2.789.g6183377224-goog

2022-09-11 16:15:23

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH 2/3] mm/page_table_check: Do WARN_ON instead of BUG_ON by default

On Sun, Sep 11, 2022 at 09:59:22AM +0000, Pasha Tatashin wrote:
> Currently, page_table_check when detects errors panics kernel. Instead,
> print a warning, and panic only when specifically requested via kernel
> parameter:
>
> page_table_check=panic

Why are the page table checks so special that they deserve their own
command line parameter? Why shouldn't this be controlled by the usual
panic_on_warn option?

2022-09-11 20:49:08

by Pasha Tatashin

[permalink] [raw]
Subject: Re: [PATCH 2/3] mm/page_table_check: Do WARN_ON instead of BUG_ON by default

On Sun, Sep 11, 2022 at 12:08 PM Matthew Wilcox <[email protected]> wrote:
>
> On Sun, Sep 11, 2022 at 09:59:22AM +0000, Pasha Tatashin wrote:
> > Currently, page_table_check when detects errors panics kernel. Instead,
> > print a warning, and panic only when specifically requested via kernel
> > parameter:
> >
> > page_table_check=panic
>
> Why are the page table checks so special that they deserve their own
> command line parameter? Why shouldn't this be controlled by the usual
> panic_on_warn option?

page_table_check can be used as a security feature preventing false
page sharing between address spaces. For example, at Google we want it
to keep enabled on production systems, yet we do not want to enable
panic_on_warn as it would cause panics for many other reasons which
are security unrelated.

Pasha

2022-09-12 16:18:01

by Edgecombe, Rick P

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm/page_table_check: Check writable zero page in page table check

On Sun, 2022-09-11 at 09:59 +0000, Pasha Tatashin wrote:
> From: Rick Edgecombe <[email protected]>
>
> The zero page should remain all zero, so that it can be mapped as
> read-only for read faults of memory that should be zeroed. If it is
> ever
> mapped writable to userspace, it could become non-zero and so other
> apps
> would unexpectedly get non-zero data. So the zero page should never
> be
> mapped writable to userspace. Check for this condition in
> page_table_check_set().
>
> Signed-off-by: Rick Edgecombe <[email protected]>
> Signed-off-by: Pasha Tatashin <[email protected]>
> ---
> mm/page_table_check.c | 2 ++
> 1 file changed, 2 insertions(+)
>
Thanks. Should we put this at the end, in order to not add any more
BUG_ON()'s to the kernel? Or I can just send a follow up and add the
docs you asked for.

2022-09-12 21:02:49

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 0/3] page table check default to warn instead of panic

On Sun, 11 Sep 2022 09:59:20 +0000 Pasha Tatashin <[email protected]> wrote:

> From: Pasha Tatashin <[email protected]>
>
> Page table check when detects errors panics the kernel. Let instead,
> print a warning, and panic only when specifically requested via kernel
> parameter:
>
> page_table_check=panic
>
> The discussion about using panic vs. warn is here:
> https://lore.kernel.org/linux-mm/[email protected]

The changelog doesn't actually describe the reason for making this
change. Somebody obviously wants pagetable check errors to no longer
panic the kernel, but why?? (The same can be said of the [2/3]
changelog).

Also, should we be changing the default? People who like the panic
will get a big surprise when they find out that they should have added
a kernel parameter to get the old behaviour back. It would be less
disruptive to default to panic unless page_table_check=warn was added.

If there's a solid reason for changing the default, it should be
changelogged. And if that reason is generally agreed to, perhaps the
kernel should print a warning at boot if neither page_table_check=panic
nor page_table_check=warn were provided. To tell people that the
default has been changed.


2022-09-20 18:28:15

by Pasha Tatashin

[permalink] [raw]
Subject: Re: [PATCH 0/3] page table check default to warn instead of panic

On Mon, Sep 12, 2022 at 4:23 PM Andrew Morton <[email protected]> wrote:
>
> On Sun, 11 Sep 2022 09:59:20 +0000 Pasha Tatashin <[email protected]> wrote:
>
> > From: Pasha Tatashin <[email protected]>
> >
> > Page table check when detects errors panics the kernel. Let instead,
> > print a warning, and panic only when specifically requested via kernel
> > parameter:
> >
> > page_table_check=panic
> >
> > The discussion about using panic vs. warn is here:
> > https://lore.kernel.org/linux-mm/[email protected]
>
> The changelog doesn't actually describe the reason for making this
> change. Somebody obviously wants pagetable check errors to no longer
> panic the kernel, but why?? (The same can be said of the [2/3]
> changelog).

This came from the discussion listed above. There seems to be a
consensus that we should reduce the number of BUG_ON() in the kernel,
and replace them with WARN_ON_ONCE() when possible to recover. In the
case of page_table_check we can recover, but for some it may be unsafe
because of security implications. Therefore, I would like to keep an
option of being able to panic only because of page table check errors,
but not keeping it enabled by default.

I will add more info to the commit message.

>
> Also, should we be changing the default? People who like the panic
> will get a big surprise when they find out that they should have added
> a kernel parameter to get the old behaviour back. It would be less
> disruptive to default to panic unless page_table_check=warn was added.

I was thinking about this as well. I decided to change the default:
the old users will still get a warning, but going forward we will be
inline with the rest of the kernel: warn on by default, and optionally
panic.

>
> If there's a solid reason for changing the default, it should be
> changelogged. And if that reason is generally agreed to, perhaps the
> kernel should print a warning at boot if neither page_table_check=panic
> nor page_table_check=warn were provided. To tell people that the
> default has been changed.

I am not sure that is needed, and when do we remove that extra boot
message? This is a relatively new feature, and existing users would
still get an ugly warning about incorrect page table mappings.

Thank you,
Pasha

>
>

2022-09-26 02:10:17

by kernel test robot

[permalink] [raw]
Subject: [mm/page_table_check] 6e807506f4: WARNING:at_mm/page_table_check.c:#page_table_check_set

Greeting,

FYI, we noticed the following commit (built with gcc-11):

commit: 6e807506f443c197c1ec2e0037af36f6abc19ca3 ("[PATCH 2/3] mm/page_table_check: Do WARN_ON instead of BUG_ON by default")
url: https://github.com/intel-lab-lkp/linux/commits/Pasha-Tatashin/page-table-check-default-to-warn-instead-of-panic/20220911-180036
base: git://git.lwn.net/linux-2.6 docs-next
patch link: https://lore.kernel.org/linux-mm/[email protected]

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


[ 32.343753][ T45] ------------[ cut here ]------------
[ 32.344340][ T45] WARNING: CPU: 0 PID: 45 at mm/page_table_check.c:130 page_table_check_set+0x4eb/0x5c0
[ 32.345851][ T45] Modules linked in:
[ 32.346542][ T45] CPU: 0 PID: 45 Comm: kworker/u2:1 Tainted: G T 6.0.0-rc1-00038-g6e807506f443 #1
[ 32.347656][ T45] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-4 04/01/2014
[ 32.349131][ T45] RIP: page_table_check_set+0x4eb/0x5c0
[ 32.349812][ T45] Code: ff ff e8 38 a7 c0 ff 4c 8d 63 ff e9 35 fc ff ff e8 2a a7 c0 ff 31 ff 44 89 ee e8 a0 a1 c0 ff 45 84 ed 75 58 e8 16 a7 c0 ff 90 <0f> 0b 90 48 c7 c7 80 4b 89 84 e8 46 ed e3 ff e9 7f fb ff ff e8 fc
All code
========
0: ff (bad)
1: ff (bad)
2: e8 38 a7 c0 ff callq 0xffffffffffc0a73f
7: 4c 8d 63 ff lea -0x1(%rbx),%r12
b: e9 35 fc ff ff jmpq 0xfffffffffffffc45
10: e8 2a a7 c0 ff callq 0xffffffffffc0a73f
15: 31 ff xor %edi,%edi
17: 44 89 ee mov %r13d,%esi
1a: e8 a0 a1 c0 ff callq 0xffffffffffc0a1bf
1f: 45 84 ed test %r13b,%r13b
22: 75 58 jne 0x7c
24: e8 16 a7 c0 ff callq 0xffffffffffc0a73f
29: 90 nop
2a:* 0f 0b ud2 <-- trapping instruction
2c: 90 nop
2d: 48 c7 c7 80 4b 89 84 mov $0xffffffff84894b80,%rdi
34: e8 46 ed e3 ff callq 0xffffffffffe3ed7f
39: e9 7f fb ff ff jmpq 0xfffffffffffffbbd
3e: e8 .byte 0xe8
3f: fc cld

Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 90 nop
3: 48 c7 c7 80 4b 89 84 mov $0xffffffff84894b80,%rdi
a: e8 46 ed e3 ff callq 0xffffffffffe3ed55
f: e9 7f fb ff ff jmpq 0xfffffffffffffb93
14: e8 .byte 0xe8
15: fc cld
[ 32.351854][ T45] RSP: 0000:ffff888151d2f868 EFLAGS: 00010293
[ 32.352741][ T45] RAX: 0000000000000000 RBX: 00000000003ad53c RCX: 0000000000000000
[ 32.353600][ T45] RDX: ffff888151821800 RSI: ffffffff81912dea RDI: 0000000000000003
[ 32.354622][ T45] RBP: ffff888151d2f8a8 R08: ffff888151d2f850 R09: ffff888151822894
[ 32.355421][ T45] R10: 0000000000000003 R11: 0000000000000001 R12: 0000000000000001
[ 32.356513][ T45] R13: 0000000000000000 R14: 000000000000512b R15: 0000000000000001
[ 32.357677][ T45] FS: 0000000000000000(0000) GS:ffffffff8432c000(0000) knlGS:0000000000000000
[ 32.358950][ T45] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 32.359618][ T45] CR2: ffff88843ffff000 CR3: 000000000423a000 CR4: 00000000000406f0
[ 32.360505][ T45] Call Trace:
[ 32.360866][ T45] <TASK>
[ 32.361477][ T45] __page_table_check_pte_set (??:?)
[ 32.362463][ T45] ? __page_table_check_pte_clear (??:?)
[ 32.363101][ T45] ? lru_cache_add (??:?)
[ 32.363589][ T45] do_anonymous_page (memory.c:?)
[ 32.364135][ T45] handle_pte_fault (memory.c:?)
[ 32.365057][ T45] ? write_comp_data (kcov.c:?)
[ 32.365971][ T45] __handle_mm_fault (memory.c:?)
[ 32.366860][ T45] ? __pmd_alloc (memory.c:?)
[ 32.367567][ T45] ? write_comp_data (kcov.c:?)
[ 32.368308][ T45] ? __raw_spin_unlock_irqrestore (spinlock.c:?)
[ 32.369018][ T45] ? _raw_spin_unlock_irqrestore (??:?)
[ 32.369624][ T45] ? write_comp_data (kcov.c:?)
[ 32.370276][ T45] handle_mm_fault (??:?)
[ 32.371123][ T45] ? write_comp_data (kcov.c:?)
[ 32.371799][ T45] faultin_page (gup.c:?)
[ 32.372591][ T45] __get_user_pages (gup.c:?)
[ 32.373397][ T45] ? get_gate_page (gup.c:?)
[ 32.374221][ T45] ? lock_acquire (??:?)
[ 32.375194][ T45] ? __bprm_mm_init (exec.c:?)
[ 32.376070][ T45] ? write_comp_data (kcov.c:?)
[ 32.377012][ T45] __get_user_pages_remote (gup.c:?)
[ 32.378062][ T45] ? ikconfig_read_current (configs.c:?)
[ 32.379058][ T45] get_user_pages_remote (??:?)
[ 32.379934][ T45] get_arg_page (exec.c:?)
[ 32.380468][ T45] ? count_strings_kernel+0x1c0/0x1c0
[ 32.381190][ T45] ? write_comp_data (kcov.c:?)
[ 32.382005][ T45] copy_string_kernel (??:?)
[ 32.382701][ T45] ? count_strings_kernel+0x15f/0x1c0
[ 32.383642][ T45] kernel_execve (??:?)
[ 32.384391][ T45] call_usermodehelper_exec_async (umh.c:?)
[ 32.385463][ T45] ? calculate_sigpending (??:?)
[ 32.386410][ T45] ? umh_complete (umh.c:?)
[ 32.387225][ T45] ret_from_fork (??:?)
[ 32.388078][ T45] </TASK>
[ 32.388637][ T45] irq event stamp: 521
[ 32.389376][ T45] hardirqs last enabled at (529): __up_console_sem (printk.c:?)
[ 32.391184][ T45] hardirqs last disabled at (550): __up_console_sem (printk.c:?)
[ 32.392684][ T45] softirqs last enabled at (548): __do_softirq (??:?)
[ 32.393662][ T45] softirqs last disabled at (537): __irq_exit_rcu (softirq.c:?)
[ 32.394619][ T45] ---[ end trace 0000000000000000 ]---


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <[email protected]>
| Link: https://lore.kernel.org/r/[email protected]


To reproduce:

# build kernel
cd linux
cp config-6.0.0-rc1-00038-g6e807506f443 .config
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
cd <mod-install-dir>
find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.


--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (6.56 kB)
config-6.0.0-rc1-00038-g6e807506f443 (141.93 kB)
job-script (4.98 kB)
dmesg.xz (30.92 kB)
Download all attachments

2022-09-26 08:53:45

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH 1/3] mm/page_table_check: Check writable zero page in page table check

On 11.09.22 11:59, Pasha Tatashin wrote:
> From: Rick Edgecombe <[email protected]>
>
> The zero page should remain all zero, so that it can be mapped as
> read-only for read faults of memory that should be zeroed. If it is ever
> mapped writable to userspace, it could become non-zero and so other apps
> would unexpectedly get non-zero data. So the zero page should never be
> mapped writable to userspace. Check for this condition in
> page_table_check_set().
>
> Signed-off-by: Rick Edgecombe <[email protected]>
> Signed-off-by: Pasha Tatashin <[email protected]>
> ---
> mm/page_table_check.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/mm/page_table_check.c b/mm/page_table_check.c
> index e2062748791a..665ece0d55d4 100644
> --- a/mm/page_table_check.c
> +++ b/mm/page_table_check.c
> @@ -102,6 +102,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
> if (!pfn_valid(pfn))
> return;
>
> + BUG_ON(is_zero_pfn(pfn) && rw);

We most probably don't want that:

https://lkml.kernel.org/r/[email protected]

--
Thanks,

David / dhildenb

2022-09-26 08:54:01

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH 2/3] mm/page_table_check: Do WARN_ON instead of BUG_ON by default

On 11.09.22 18:08, Matthew Wilcox wrote:
> On Sun, Sep 11, 2022 at 09:59:22AM +0000, Pasha Tatashin wrote:
>> Currently, page_table_check when detects errors panics kernel. Instead,
>> print a warning, and panic only when specifically requested via kernel
>> parameter:
>>
>> page_table_check=panic
>
> Why are the page table checks so special that they deserve their own
> command line parameter? Why shouldn't this be controlled by the usual
> panic_on_warn option?
>

I agree.

https://lkml.kernel.org/r/[email protected]

Use of WARN_ON_ONCE is the way to go nowadays.

--
Thanks,

David / dhildenb