Greeting,
FYI, we noticed WARNING:at_arch/x86/mm/pat/set_memory.c:#__change_page_attr due to commit (built with gcc-11):
commit: b38994948567e6d6b62947401c57f4ab2efe070c ("x86/mm: Implement native set_memory_rox()")
https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git x86/mm
[test failed on linux-next/master 0cdb3579f1ee4c1e55acf8dfb0697b660067b1f8]
in testcase: boot
on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <[email protected]>
| Link: https://lore.kernel.org/oe-lkp/[email protected]
[ 44.943065][ T11] ------------[ cut here ]------------
[ 44.943725][ T11] CPA detected W^X violation: 0000000000000060 -> 0000000000000063 range: 0xffff8881beca5000 - 0xffff8881beca5fff PFN 1beca5
[ 44.944929][ T11] WARNING: CPU: 0 PID: 11 at arch/x86/mm/pat/set_memory.c:609 __change_page_attr (arch/x86/mm/pat/set_memory.c:609 arch/x86/mm/pat/set_memory.c:1582)
[ 44.945824][ T11] Modules linked in:
[ 44.946229][ T11] CPU: 0 PID: 11 Comm: kworker/0:1 Tainted: G W 6.1.0-rc3-00010-gb38994948567 #1 f37474c2082f37dd433f70907b94c2b0df8d70b8
[ 44.947518][ T11] Workqueue: events bpf_prog_free_deferred
[ 44.948074][ T11] RIP: 0010:__change_page_attr (arch/x86/mm/pat/set_memory.c:609 arch/x86/mm/pat/set_memory.c:1582)
[ 44.948593][ T11] Code: f0 f6 83 06 01 90 48 c7 c7 c0 69 e7 84 4d 89 f9 4c 89 e9 4c 89 da 4d 8d 85 ff 0f 00 00 4c 89 f6 4c 89 1c 24 e8 7d 2d 4c 03 90 <0f> 0b 90 90 4c 8b 54 24 08 4c 8b 1c 24 4c 89 d8 4c 89 ff 4c 89 14
All code
========
0: f0 f6 83 06 01 90 48 lock testb $0xc7,0x48900106(%rbx)
7: c7
8: c7 c0 69 e7 84 4d mov $0x4d84e769,%eax
e: 89 f9 mov %edi,%ecx
10: 4c 89 e9 mov %r13,%rcx
13: 4c 89 da mov %r11,%rdx
16: 4d 8d 85 ff 0f 00 00 lea 0xfff(%r13),%r8
1d: 4c 89 f6 mov %r14,%rsi
20: 4c 89 1c 24 mov %r11,(%rsp)
24: e8 7d 2d 4c 03 callq 0x34c2da6
29: 90 nop
2a:* 0f 0b ud2 <-- trapping instruction
2c: 90 nop
2d: 90 nop
2e: 4c 8b 54 24 08 mov 0x8(%rsp),%r10
33: 4c 8b 1c 24 mov (%rsp),%r11
37: 4c 89 d8 mov %r11,%rax
3a: 4c 89 ff mov %r15,%rdi
3d: 4c rex.WR
3e: 89 .byte 0x89
3f: 14 .byte 0x14
Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 90 nop
3: 90 nop
4: 4c 8b 54 24 08 mov 0x8(%rsp),%r10
9: 4c 8b 1c 24 mov (%rsp),%r11
d: 4c 89 d8 mov %r11,%rax
10: 4c 89 ff mov %r15,%rdi
13: 4c rex.WR
14: 89 .byte 0x89
15: 14 .byte 0x14
[ 44.950258][ T11] RSP: 0018:ffffc900000bfa70 EFLAGS: 00010246
[ 44.950884][ T11] RAX: 0000000000000000 RBX: ffffffff86777910 RCX: 0000000000000000
[ 44.951640][ T11] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[ 44.952326][ T11] RBP: ffff888173dfc528 R08: 0000000000000000 R09: 0000000000000000
[ 44.953009][ T11] R10: 0000000000000000 R11: 0000000000000000 R12: ffffc900000bfba8
[ 44.953698][ T11] R13: ffff8881beca5000 R14: 0000000000000060 R15: 00000000001beca5
[ 44.954458][ T11] FS: 0000000000000000(0000) GS:ffffffff86732000(0000) knlGS:0000000000000000
[ 44.955274][ T11] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 44.955881][ T11] CR2: 00007f13ac3cd320 CR3: 000000000668a000 CR4: 00000000000406f0
[ 44.956577][ T11] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 44.957276][ T11] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 44.958018][ T11] Call Trace:
[ 44.958336][ T11] <TASK>
[ 44.958624][ T11] ? __should_split_large_page (arch/x86/mm/pat/set_memory.c:1552)
[ 44.959205][ T11] __change_page_attr_set_clr (include/linux/spinlock.h:390 arch/x86/mm/pat/set_memory.c:1706)
[ 44.959756][ T11] set_direct_map_default_noflush (arch/x86/mm/pat/set_memory.c:2307)
[ 44.960293][ T11] ? set_direct_map_invalid_noflush (arch/x86/mm/pat/set_memory.c:2307)
[ 44.960895][ T11] ? __mutex_unlock_slowpath (arch/x86/include/asm/atomic64_64.h:190 include/linux/atomic/atomic-long.h:449 include/linux/atomic/atomic-instrumented.h:1790 kernel/locking/mutex.c:924)
[ 44.961425][ T11] ? _vm_unmap_aliases (mm/vmalloc.c:2173 (discriminator 2))
[ 44.961896][ T11] __vunmap (mm/vmalloc.c:2620 mm/vmalloc.c:2673 mm/vmalloc.c:2699)
[ 44.962307][ T11] bpf_prog_pack_free (kernel/bpf/core.c:944)
[ 44.962764][ T11] bpf_jit_binary_pack_free (kernel/bpf/core.c:1137)
[ 44.963316][ T11] bpf_jit_free (include/linux/filter.h:1044 arch/x86/net/bpf_jit_comp.c:2579)
[ 44.963754][ T11] process_one_work (arch/x86/include/asm/atomic.h:29 include/linux/jump_label.h:259 include/linux/jump_label.h:269 include/trace/events/workqueue.h:108 kernel/workqueue.c:2294)
[ 44.964275][ T11] ? lock_release (kernel/locking/lockdep.c:5636)
[ 44.964742][ T11] ? cancel_delayed_work_sync (kernel/workqueue.c:2184)
[ 44.965238][ T11] ? io_schedule_timeout (kernel/sched/core.c:6385)
[ 44.965754][ T11] worker_thread (include/linux/list.h:292 kernel/workqueue.c:2437)
[ 44.966200][ T11] ? __kthread_parkme (arch/x86/include/asm/bitops.h:207 (discriminator 4) arch/x86/include/asm/bitops.h:239 (discriminator 4) include/asm-generic/bitops/instrumented-non-atomic.h:142 (discriminator 4) kernel/kthread.c:270 (discriminator 4))
[ 44.966630][ T11] ? schedule (arch/x86/include/asm/bitops.h:207 (discriminator 1) arch/x86/include/asm/bitops.h:239 (discriminator 1) include/asm-generic/bitops/instrumented-non-atomic.h:142 (discriminator 1) include/linux/thread_info.h:118 (discriminator 1) include/linux/sched.h:2229 (discriminator 1) kernel/sched/core.c:6581 (discriminator 1))
[ 44.967065][ T11] ? rescuer_thread (kernel/workqueue.c:2379)
[ 44.967543][ T11] ? rescuer_thread (kernel/workqueue.c:2379)
[ 44.968008][ T11] kthread (kernel/kthread.c:376)
[ 44.968392][ T11] ? kthread_complete_and_exit (kernel/kthread.c:331)
[ 44.968931][ T11] ret_from_fork (arch/x86/entry/entry_64.S:312)
[ 44.969374][ T11] </TASK>
[ 44.969653][ T11] irq event stamp: 38969
[ 44.970025][ T11] hardirqs last enabled at (38977): __up_console_sem (kernel/printk/printk.c:261 (discriminator 1))
[ 44.970883][ T11] hardirqs last disabled at (38986): __up_console_sem (kernel/printk/printk.c:259 (discriminator 1))
[ 44.971760][ T11] softirqs last enabled at (38298): rhashtable_rehash_chain (include/linux/instrumented.h:87 include/asm-generic/bitops/instrumented-lock.h:26 include/linux/bit_spinlock.h:63 include/linux/rhashtable.h:347 lib/rhashtable.c:290)
[ 44.972677][ T11] softirqs last disabled at (38296): rhashtable_rehash_chain (include/linux/bottom_half.h:20 include/linux/rhashtable.h:329 lib/rhashtable.c:283)
[ 44.973603][ T11] ---[ end trace 0000000000000000 ]---
[ 45.079703][ T194] LKP: stdout: 177: /lkp/lkp/src/bin/run-lkp /lkp/jobs/scheduled/vm-meta-182/boot-1-quantal-x86_64-core-20190426.cgz-b38994948567e6d6b62947401c57f4ab2efe070c-20221105-26780-hvpjtf-0.yaml
[ 45.079745][ T194]
[ 45.472417][ T280] udevd[280]: starting version 175
[ 45.863895][ T194] RESULT_ROOT=/result/boot/1/vm-snb/quantal-x86_64-core-20190426.cgz/x86_64-randconfig-a001-20220418/gcc-11/b38994948567e6d6b62947401c57f4ab2efe070c/8
[ 45.863939][ T194]
[ 45.898263][ T194] job=/lkp/jobs/scheduled/vm-meta-182/boot-1-quantal-x86_64-core-20190426.cgz-b38994948567e6d6b62947401c57f4ab2efe070c-20221105-26780-hvpjtf-0.yaml
[ 45.898304][ T194]
[ 46.250201][ T194] result_service: raw_upload, RESULT_MNT: /internal-lkp-server/result, RESULT_ROOT: /internal-lkp-server/result/boot/1/vm-snb/quantal-x86_64-core-20190426.cgz/x86_64-randconfig-a001-20220418/gcc-11/b38994948567e6d6b62947401c57f4ab2efe070c/8, TMP_RESULT_ROOT: /tmp/lkp/result
[ 46.250247][ T194]
[ 46.288136][ T194] run-job /lkp/jobs/scheduled/vm-meta-182/boot-1-quantal-x86_64-core-20190426.cgz-b38994948567e6d6b62947401c57f4ab2efe070c-20221105-26780-hvpjtf-0.yaml
[ 46.288179][ T194]
[ 47.618739][ T194] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/vm-meta-182/boot-1-quantal-x86_64-core-20190426.cgz-b38994948567e6d6b62947401c57f4ab2efe070c-20221105-26780-hvpjtf-0.yaml&job_state=running -O /dev/null
[ 47.618784][ T194]
[ 49.292432][ T381] calling parport_default_proc_register+0x0/0x1000 [parport] @ 381
[ 49.327753][ T381] initcall parport_default_proc_register+0x0/0x1000 [parport] returned 0 after 34505 usecs
[ 49.440207][ T381] calling parport_pc_init+0x0/0xac3 [parport_pc] @ 381
[ 49.487815][ T381] parport_pc 00:03: reported by Plug and Play ACPI
[ 49.502288][ T381] parport0: PC-style at 0x378, irq 7 [PCSPP(,...)]
[ 49.634328][ T381] initcall parport_pc_init+0x0/0xac3 [parport_pc] returned 0 after 193370 usecs
[ 49.660424][ T381] modprobe (381) used greatest stack depth: 27184 bytes left
[ 54.131749][ T1] init: failsafe main process (411) killed by TERM signal
[ 55.645580][ T442] calling ppdev_init+0x0/0x1000 [ppdev] @ 442
[ 55.671570][ T442] ppdev: user-space parallel port driver
[ 55.672172][ T442] initcall ppdev_init+0x0/0x1000 [ppdev] returned 0 after 25950 usecs
[ 56.978205][ T1] init: udev-fallback-graphics main process (456) terminated with status 1
[ 57.553215][ T1] init: networking main process (466) terminated with status 1
[ 59.730271][ T194] target ucode:
[ 59.730321][ T194]
[ 66.544620][ T1] init: tty4 main process (450) terminated with status 1
[ 66.546184][ T1] init: tty4 main process ended, respawning
[ 66.558350][ T194] sleep started
[ 66.558394][ T194]
[ 66.610521][ T1] init: tty5 main process (451) terminated with status 1
[ 66.612127][ T1] init: tty5 main process ended, respawning
[ 66.670353][ T1] init: tty2 main process (452) terminated with status 1
[ 66.671980][ T1] init: tty2 main process ended, respawning
[ 66.728761][ T1] init: tty3 main process (453) terminated with status 1
[ 66.730293][ T1] init: tty3 main process ended, respawning
[ 66.804600][ T1] init: tty6 main process (454) terminated with status 1
[ 66.806176][ T1] init: tty6 main process ended, respawning
[ 67.833563][ T194] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/vm-meta-182/boot-1-quantal-x86_64-core-20190426.cgz-b38994948567e6d6b62947401c57f4ab2efe070c-20221105-26780-hvpjtf-0.yaml&job_state=post_run -O /dev/null
[ 67.833607][ T194]
[ 76.667606][ T1] init: tty4 main process (583) terminated with status 1
[ 76.669116][ T1] init: tty4 main process ended, respawning
[ 76.725449][ T1] init: tty5 main process (584) terminated with status 1
[ 76.726898][ T1] init: tty5 main process ended, respawning
[ 76.777663][ T1] init: tty2 main process (585) terminated with status 1
[ 76.779234][ T1] init: tty2 main process ended, respawning
[ 76.821151][ T1] init: tty3 main process (586) terminated with status 1
[ 76.822692][ T1] init: tty3 main process ended, respawning
[ 76.891180][ T1] init: tty6 main process (587) terminated with status 1
[ 76.892742][ T1] init: tty6 main process ended, respawning
[ 80.967918][ T194] kill 506 vmstat -n 10
[ 80.967963][ T194]
[ 81.112072][ T194] kill 504 cat /proc/kmsg
[ 81.112113][ T194]
[ 81.260272][ T194] wait for background processes: 517 513 oom-killer meminfo
[ 81.260319][ T194]
[ 86.754428][ T1] init: tty4 main process (596) terminated with status 1
[ 86.755998][ T1] init: tty4 main process ended, respawning
[ 86.788212][ T1] init: tty5 main process (597) terminated with status 1
[ 86.789794][ T1] init: tty5 main process ended, respawning
[ 86.874442][ T1] init: tty2 main process (598) terminated with status 1
[ 86.876058][ T1] init: tty2 main process ended, respawning
[ 86.921154][ T1] init: tty3 main process (599) terminated with status 1
[ 86.922776][ T1] init: tty3 main process ended, respawning
[ 86.975117][ T1] init: tty6 main process (600) terminated with status 1
[ 86.988314][ T1] init: tty6 main process ended, respawning
To reproduce:
# build kernel
cd linux
cp config-6.1.0-rc3-00010-gb38994948567 .config
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
cd <mod-install-dir>
find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email
# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.
--
0-DAY CI Kernel Test Service
https://01.org/lkp
On Sun, Nov 06, 2022 at 08:38:27PM +0800, kernel test robot wrote:
>
> Greeting,
>
> FYI, we noticed WARNING:at_arch/x86/mm/pat/set_memory.c:#__change_page_attr due to commit (built with gcc-11):
>
> commit: b38994948567e6d6b62947401c57f4ab2efe070c ("x86/mm: Implement native set_memory_rox()")
> https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git x86/mm
>
> [test failed on linux-next/master 0cdb3579f1ee4c1e55acf8dfb0697b660067b1f8]
>
> in testcase: boot
>
> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
> If you fix the issue, kindly add following tag
> | Reported-by: kernel test robot <[email protected]>
> | Link: https://lore.kernel.org/oe-lkp/[email protected]
>
>
> [ 44.943065][ T11] ------------[ cut here ]------------
> [ 44.943725][ T11] CPA detected W^X violation: 0000000000000060 -> 0000000000000063 range: 0xffff8881beca5000 - 0xffff8881beca5fff PFN 1beca5
> [ 44.944929][ T11] WARNING: CPU: 0 PID: 11 at arch/x86/mm/pat/set_memory.c:609 __change_page_attr (arch/x86/mm/pat/set_memory.c:609 arch/x86/mm/pat/set_memory.c:1582)
> [ 44.945824][ T11] Modules linked in:
> [ 44.946229][ T11] CPU: 0 PID: 11 Comm: kworker/0:1 Tainted: G W 6.1.0-rc3-00010-gb38994948567 #1 f37474c2082f37dd433f70907b94c2b0df8d70b8
> [ 44.947518][ T11] Workqueue: events bpf_prog_free_deferred
> [ 44.948074][ T11] RIP: 0010:__change_page_attr (arch/x86/mm/pat/set_memory.c:609 arch/x86/mm/pat/set_memory.c:1582)
Urgh, as spotted by dhansen, the code in change_page_attr_set_clr(),
specifically the checkalias thing, seems to rely on single bit flips for
NX.
Let me try to make sense of this stuff....
On Mon, Nov 07, 2022 at 04:12:20PM +0100, Peter Zijlstra wrote:
> On Sun, Nov 06, 2022 at 08:38:27PM +0800, kernel test robot wrote:
> >
> > Greeting,
> >
> > FYI, we noticed WARNING:at_arch/x86/mm/pat/set_memory.c:#__change_page_attr due to commit (built with gcc-11):
> >
> > commit: b38994948567e6d6b62947401c57f4ab2efe070c ("x86/mm: Implement native set_memory_rox()")
> > https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git x86/mm
> >
> > [test failed on linux-next/master 0cdb3579f1ee4c1e55acf8dfb0697b660067b1f8]
> >
> > in testcase: boot
> >
> > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >
> >
> > If you fix the issue, kindly add following tag
> > | Reported-by: kernel test robot <[email protected]>
> > | Link: https://lore.kernel.org/oe-lkp/[email protected]
> >
> >
> > [ 44.943065][ T11] ------------[ cut here ]------------
> > [ 44.943725][ T11] CPA detected W^X violation: 0000000000000060 -> 0000000000000063 range: 0xffff8881beca5000 - 0xffff8881beca5fff PFN 1beca5
> > [ 44.944929][ T11] WARNING: CPU: 0 PID: 11 at arch/x86/mm/pat/set_memory.c:609 __change_page_attr (arch/x86/mm/pat/set_memory.c:609 arch/x86/mm/pat/set_memory.c:1582)
> > [ 44.945824][ T11] Modules linked in:
> > [ 44.946229][ T11] CPU: 0 PID: 11 Comm: kworker/0:1 Tainted: G W 6.1.0-rc3-00010-gb38994948567 #1 f37474c2082f37dd433f70907b94c2b0df8d70b8
> > [ 44.947518][ T11] Workqueue: events bpf_prog_free_deferred
> > [ 44.948074][ T11] RIP: 0010:__change_page_attr (arch/x86/mm/pat/set_memory.c:609 arch/x86/mm/pat/set_memory.c:1582)
>
> Urgh, as spotted by dhansen, the code in change_page_attr_set_clr(),
> specifically the checkalias thing, seems to rely on single bit flips for
> NX.
>
> Let me try to make sense of this stuff....
This appears to appease the test case, but I definitely need to look at
this again with a fresh mind, horrid stuff this and I'm sure I hate this
patch.
---
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index f275605892df..07339ac8bc41 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -69,10 +69,11 @@ static const int cpa_warn_level = CPA_PROTECT;
*/
static DEFINE_SPINLOCK(cpa_lock);
-#define CPA_FLUSHTLB 1
-#define CPA_ARRAY 2
-#define CPA_PAGES_ARRAY 4
-#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
+#define CPA_FLUSHTLB 0x01
+#define CPA_ARRAY 0x02
+#define CPA_PAGES_ARRAY 0x04
+#define CPA_NO_CHECK_ALIAS 0x08 /* Do not search for aliases */
+#define CPA_HAS_NX 0x10
static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
{
@@ -1708,9 +1709,21 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
goto out;
if (checkalias) {
- ret = cpa_process_alias(cpa);
- if (ret)
- goto out;
+ pgprot_t set = cpa->mask_set;
+ pgprot_t clr = cpa->mask_clr;
+
+ if (cpa->flags & CPA_HAS_NX) {
+ cpa->mask_set.pgprot &= ~_PAGE_NX;
+ cpa->mask_clr.pgprot &= ~_PAGE_NX;
+ }
+ if (pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) {
+ ret = cpa_process_alias(cpa);
+ if (ret)
+ goto out;
+ }
+
+ cpa->mask_set = set;
+ cpa->mask_clr = clr;
}
/*
@@ -1788,8 +1801,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
cpa.flags |= in_flag;
- /* No alias checking for _NX bit modifications */
- checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
+ if ((pgprot_val(mask_set) | pgprot_val(mask_clr)) & _PAGE_NX)
+ cpa.flags |= CPA_HAS_NX;
+
+ checkalias = 1;
/* Has caller explicitly disabled alias checking? */
if (in_flag & CPA_NO_CHECK_ALIAS)
checkalias = 0;