The pv_op functions are patched from indirect to direct calls at runtime
using the alternative.c infrastructure.
alt_replace_call() patches an indirect branch in the form
call *disp(%rip)
to a direct branch.
For the paravirtual functions, this indirect branch is generated by
inline assembly that uses the PARAVIRT_CALL macro:
"call *%[paravirt_opptr];"
However, it is not actually enforced that RIP relative addressing must
be used.
The toolchain may instead produce an indirect branch in the form:
mov address, %reg
call *offset(%reg)
This has been observed at lower levels of compiler optimisation but
there is nothing preventing it from occurring generally.
This triggers a BUG in alt_replace_call():
[ 114.919130] SMP alternatives: ALT_FLAG_DIRECT_CALL set for unrecognized indirect call
[ 114.919137] ------------[ cut here ]------------
[ 114.919138] kernel BUG at arch/x86/kernel/alternative.c:429!
[ 114.919144] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[ 114.919148] CPU: 13 PID: 14617 Comm: insmod Tainted: G OE 6.8.0-rc7 #16
[ 114.919151] Hardware name: Supermicro SYS-1019GP-TT-06-NC24B/X11SPG-TF, BIOS 3.3 02/21/2020
[ 114.919153] RIP: 0010:apply_alternatives+0x447/0x450
[ 114.919160] Code: 18 90 02 e9 7b fe ff ff 4c 89 f8 e9 65 ff ff ff 48 c7 c7 10 4f 90 8e e8 17 52 16 00 0f 0b 48 c7 c7 68 4f 90 8e e8 09 52 16 00 <0f> 0b e8 a2 0b fa 00 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90
[ 114.919163] RSP: 0018:ffffa9714d8fb9f8 EFLAGS: 00010246
[ 114.919166] RAX: 0000000000000049 RBX: ffffffffc2674690 RCX: 0000000000000000
[ 114.919168] RDX: 0000000000000000 RSI: ffff97ed101618c0 RDI: ffff97ed101618c0
[ 114.919170] RBP: 0000000000000006 R08: 0000000000000000 R09: ffffa9714d8fb8a8
[ 114.919171] R10: 0000000000000003 R11: ffffffff8ed46848 R12: ffffa9714d8fba0a
[ 114.919173] R13: ffffffffc267916e R14: ffffffffc21be935 R15: ffffffffc1fc750a
[ 114.919174] FS: 00007f9417d21000(0000) GS:ffff97ed10140000(0000) knlGS:0000000000000000
[ 114.919177] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 114.919179] CR2: 000056030cd5e520 CR3: 000000106587a006 CR4: 00000000007706f0
[ 114.919181] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 114.919182] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 114.919183] PKRU: 55555554
[ 114.919185] Call Trace:
[ 114.919187] <TASK>
[ 114.919189] ? die+0x36/0x90
[ 114.919196] ? do_trap+0xda/0x100
[ 114.919200] ? apply_alternatives+0x447/0x450
[ 114.919203] ? do_error_trap+0x6a/0x90
[ 114.919206] ? apply_alternatives+0x447/0x450
[ 114.919208] ? exc_invalid_op+0x50/0x70
[ 114.919214] ? apply_alternatives+0x447/0x450
[ 114.919216] ? asm_exc_invalid_op+0x1a/0x20
[ 114.919224] ? apply_alternatives+0x447/0x450
[ 114.919226] ? apply_alternatives+0x447/0x450
[ 114.919228] ? insn_get_opcode+0x36/0x1d0
[ 114.919232] ? insn_get_modrm+0x71/0x110
[ 114.919235] ? text_poke_early+0x6a/0xa0
[ 114.919238] ? apply_returns+0xcd/0x330
[ 114.919240] ? pcpu_block_update_hint_alloc+0xd5/0x300
[ 114.919247] module_finalize+0x23e/0x3b0
[ 114.919253] ? add_kallsyms+0x295/0x310
[ 114.919259] load_module+0x1af9/0x2180
[ 114.919264] ? ima_post_read_file+0xd0/0xe0
[ 114.919270] ? init_module_from_file+0x86/0xc0
[ 114.919273] init_module_from_file+0x86/0xc0
[ 114.919278] idempotent_init_module+0x109/0x2a0
[ 114.919282] __x64_sys_finit_module+0x5e/0xb0
[ 114.919285] do_syscall_64+0x86/0x170
[ 114.919288] ? do_syscall_64+0x96/0x170
[ 114.919290] ? do_user_addr_fault+0x304/0x670
[ 114.919296] ? exc_page_fault+0x7f/0x180
[ 114.919300] entry_SYSCALL_64_after_hwframe+0x6e/0x76
[ 114.919304] RIP: 0033:0x7f941751e88d
[ 114.919307] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 b5 0f 00 f7 d8 64 89 01 48
[ 114.919309] RSP: 002b:00007ffc77449ed8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[ 114.919312] RAX: ffffffffffffffda RBX: 000056030eb0d7c0 RCX: 00007f941751e88d
[ 114.919314] RDX: 0000000000000000 RSI: 000056030cd69cd2 RDI: 0000000000000003
[ 114.919315] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[ 114.919317] R10: 0000000000000003 R11: 0000000000000246 R12: 000056030cd69cd2
[ 114.919318] R13: 000056030eb0d750 R14: 000056030cd68888 R15: 000056030eb0d8d0
[ 114.919321] </TASK>
[ 114.919322] Modules linked in: nvidia(OE) rfkill tls intel_rapl_msr intel_rapl_common intel_uncore_frequency intel_uncore_frequency_common isst_if_common skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel binfmt_misc kvm vfat fat irqbypass ipmi_ssif rapl intel_cstate spi_nor mei_me joydev intel_uncore mei mtd acpi_ipmi ioatdma ipmi_si intel_pch_thermal ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad nfsd squashfs loop auth_rpcgss nfs_acl lockd grace dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua sunrpc parport_pc ppdev lp fuse parport ip_tables raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic ixgbe ghash_clmulni_intel sha512_ssse3 sha256_ssse3 ast sha1_ssse3 spi_intel_pci i2c_i801 dca i2c_algo_bit spi_intel lpc_ich i2c_smbus mdio wmi
[ 114.919378] ---[ end trace 0000000000000000 ]---
Before commit 60bc276b129e ("x86/paravirt: Switch mixed
paravirt/alternative calls to alternatives"), the paravirt patching did
not look at the indirect call to generate the direct call target. The
target was determined by the type in the .parainstructions section
entry. This meant the form of indirect call used did not matter.
Force PARAVIRT_CALL to generate a RIP relative call on x84_64 instead of
leaving the choice to the toolchain.
The inline assembly invocation is based on rip_rel_ptr() in asm.h.
Fixes: 60bc276b129e ("x86/paravirt: Switch mixed paravirt/alternative calls to alternatives")
Signed-off-by: Jordan Niethe <[email protected]>
---
arch/x86/include/asm/paravirt_types.h | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8d4fbe1be489..a960ed301c6a 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -242,15 +242,28 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct paravirt_patch_template pv_ops;
-#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)
int paravirt_disable_iospace(void);
/* This generates an indirect call based on the operation type number. */
+#ifdef CONFIG_X86_64
+
+#define paravirt_ptr(op) [paravirt_opptr] "i" (&pv_ops.op)
+
+#define PARAVIRT_CALL \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%c[paravirt_opptr](%%rip);"
+
+#else /* CONFIG_X86_32 */
+
+#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)
+
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
+#endif /* CONFIG_X86_64 */
+
/*
* These macros are intended to wrap calls through one of the paravirt
* ops structs, so that they can be later identified and patched at
--
2.34.1
On Fri, May 17, 2024 at 12:42:24PM +1000, Jordan Niethe wrote:
> This has been observed at lower levels of compiler optimisation but
> there is nothing preventing it from occurring generally.
How can I reproduce this?
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov wrote:
> On Fri, May 17, 2024 at 12:42:24PM +1000, Jordan Niethe wrote:
>> This has been observed at lower levels of compiler optimisation but
>> there is nothing preventing it from occurring generally.
>
> How can I reproduce this?
>
> Thx.
>
Hi,
I run into this building a module that used paravirtual operations with -O1.
I'll try and get a minimal reproducing example.
Thanks,
Jordan.
On Mon, May 20, 2024 at 12:08:08PM +1000, Jordan Niethe wrote:
> I'll try and get a minimal reproducing example.
No need. I think we'll hold off on doing anything here until one can
trigger this with a valid kernel config. AFAICT, we build either with
-O2 or -Os...
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette