During activation of secondary CPUs, lapic_online is
invoked to initialize vectors. While lapic_online
installs legacy vectors on all CPUs, it does not set
the corresponding bits in per CPU bitmap maintained
under irq_matrix.
This may result in these legacy vectors getting allocated
by irq_matrix_alloc and if that happens subsequent invocation
of apic_update_vector will cause BUG like the one shown below:
[ 154.738226] kernel BUG at arch/x86/kernel/apic/vector.c:172!
[ 154.805956] invalid opcode: 0000 [#1] SMP PTI
[ 154.858092] CPU: 22 PID: 3569 Comm: ifup-eth Not tainted 5.8.0-20200716.x86_64 #1
[ 154.954939] Hardware name: Oracle Corporation ORACLE SERVER X6-2/ASM,MOTHERBOARD,1U
[ 155.073636] RIP: 0010:apic_update_vector+0xa7/0x190
[ 155.131996] Code: 01 00 4a 8b 14 ed 80 69 01 a6 48 89 c8 4a 8d 04 e0 48 8b 04 10 48
85 c0 0f 84 d2 00 00 00 48 3d 00 f0 ff ff 0f 87 c6 00 00 00 <0f> 0b 41 8b 46 10 48 0f
a3 05 6b 3e 7c 01 0f 92 c0 84 c0 0f 84 83
[ 155.356788] RSP: 0018:ffffb3848b417970 EFLAGS: 00010087
[ 155.419311] RAX: ffff9e9047c79000 RBX: 0000000000000000 RCX: 0000000000017040
[ 155.504719] RDX: ffff9e9fbf800000 RSI: 0000000000000182 RDI: ffff9e9fbe7936c0
[ 155.590127] RBP: ffffb3848b4179b0 R08: 0000000000000000 R09: 0004000000000000
[ 155.675533] R10: ffff000000000000 R11: 0000000000000246 R12: 0000000000000030
[ 155.760939] R13: 000000000000000a R14: ffff9e9fbe7939c0 R15: 0000000000000030
[ 155.846341] FS: 00007f6513279740(0000) GS:ffff9e979fb00000(0000) knlGS:0000000000000000
[ 155.943189] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 156.011947] CR2: 00007f6513280000 CR3: 00000007f2cbc003 CR4: 00000000003606e0
[ 156.097355] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 156.182761] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 156.268168] Call Trace:
[ 156.297409] ? irq_matrix_alloc+0x8a/0x150
[ 156.346408] assign_vector_locked+0xd2/0x170
[ 156.397489] x86_vector_activate+0x1b5/0x320
[ 156.448570] __irq_domain_activate_irq+0x64/0xa0
[ 156.503808] __irq_domain_activate_irq+0x38/0xa0
[ 156.559050] irq_domain_activate_irq+0x2b/0x40
[ 156.612213] irq_activate+0x25/0x30
[ 156.653930] __setup_irq+0x58f/0x7b0
[ 156.696690] request_threaded_irq+0xf8/0x1b0
[ 156.747784] ixgbe_open+0x3af/0x600 [ixgbe]
[ 156.797827] __dev_open+0xd8/0x160
[ 156.838503] dev_open+0x48/0x90
[ 156.876065] bond_enslave+0x2b6/0x12c0 [bonding]
[ 156.931310] ? vsscanf+0x5af/0x8e0
[ 156.971986] ? sscanf+0x4e/0x70
[ 157.009546] bond_option_slaves_set+0x112/0x1c0 [bonding]
[ 157.074148] __bond_opt_set+0xdc/0x320 [bonding]
[ 157.129389] __bond_opt_set_notify+0x2c/0x90 [bonding]
[ 157.190871] bond_opt_tryset_rtnl+0x56/0xa0 [bonding]
[ 157.251315] bonding_sysfs_store_option+0x52/0x90 [bonding]
This patch marks these legacy vectors as assigned in irq_matrix
so that corresponding bits in percpu bitmaps get set and these
legacy vectors don't get reallocted.
Signed-off-by: Imran Khan <[email protected]>
---
arch/x86/kernel/apic/vector.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6dbdc7c22bb7..ea92b12614b9 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -806,6 +806,7 @@ static struct irq_desc *__setup_vector_irq(int vector)
void lapic_online(void)
{
unsigned int vector;
+ struct irq_desc *desc = VECTOR_UNUSED;
lockdep_assert_held(&vector_lock);
@@ -821,8 +822,17 @@ void lapic_online(void)
* must be installed on all CPUs. All non legacy interrupts can be
* cleared.
*/
- for (vector = 0; vector < NR_VECTORS; vector++)
- this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
+ for (vector = 0; vector < NR_VECTORS; vector++) {
+ desc = __setup_vector_irq(vector);
+ this_cpu_write(vector_irq[vector], desc);
+ /*
+ * Mark legacy vectors assigned, so that
+ * irq_matrix_alloc does not see them as
+ * free in bitmap
+ */
+ if (desc != VECTOR_UNUSED)
+ irq_matrix_assign(vector_matrix, vector);
+ }
}
void lapic_offline(void)
--
2.27.0
On Wed, May 19, 2021 at 11:39:28PM +0000, Imran Khan wrote:
> During activation of secondary CPUs, lapic_online is
> invoked to initialize vectors. While lapic_online
> installs legacy vectors on all CPUs, it does not set
> the corresponding bits in per CPU bitmap maintained
> under irq_matrix.
> This may result in these legacy vectors getting allocated
> by irq_matrix_alloc and if that happens subsequent invocation
> of apic_update_vector will cause BUG like the one shown below:
>
> [ 154.738226] kernel BUG at arch/x86/kernel/apic/vector.c:172!
> [ 154.805956] invalid opcode: 0000 [#1] SMP PTI
> [ 154.858092] CPU: 22 PID: 3569 Comm: ifup-eth Not tainted 5.8.0-20200716.x86_64 #1
> [ 154.954939] Hardware name: Oracle Corporation ORACLE SERVER X6-2/ASM,MOTHERBOARD,1U
> [ 155.073636] RIP: 0010:apic_update_vector+0xa7/0x190
> [ 155.131996] Code: 01 00 4a 8b 14 ed 80 69 01 a6 48 89 c8 4a 8d 04 e0 48 8b 04 10 48
> 85 c0 0f 84 d2 00 00 00 48 3d 00 f0 ff ff 0f 87 c6 00 00 00 <0f> 0b 41 8b 46 10 48 0f
> a3 05 6b 3e 7c 01 0f 92 c0 84 c0 0f 84 83
> [ 155.356788] RSP: 0018:ffffb3848b417970 EFLAGS: 00010087
> [ 155.419311] RAX: ffff9e9047c79000 RBX: 0000000000000000 RCX: 0000000000017040
> [ 155.504719] RDX: ffff9e9fbf800000 RSI: 0000000000000182 RDI: ffff9e9fbe7936c0
> [ 155.590127] RBP: ffffb3848b4179b0 R08: 0000000000000000 R09: 0004000000000000
> [ 155.675533] R10: ffff000000000000 R11: 0000000000000246 R12: 0000000000000030
> [ 155.760939] R13: 000000000000000a R14: ffff9e9fbe7939c0 R15: 0000000000000030
> [ 155.846341] FS: 00007f6513279740(0000) GS:ffff9e979fb00000(0000) knlGS:0000000000000000
> [ 155.943189] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 156.011947] CR2: 00007f6513280000 CR3: 00000007f2cbc003 CR4: 00000000003606e0
> [ 156.097355] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 156.182761] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 156.268168] Call Trace:
> [ 156.297409] ? irq_matrix_alloc+0x8a/0x150
> [ 156.346408] assign_vector_locked+0xd2/0x170
> [ 156.397489] x86_vector_activate+0x1b5/0x320
> [ 156.448570] __irq_domain_activate_irq+0x64/0xa0
> [ 156.503808] __irq_domain_activate_irq+0x38/0xa0
> [ 156.559050] irq_domain_activate_irq+0x2b/0x40
> [ 156.612213] irq_activate+0x25/0x30
> [ 156.653930] __setup_irq+0x58f/0x7b0
> [ 156.696690] request_threaded_irq+0xf8/0x1b0
> [ 156.747784] ixgbe_open+0x3af/0x600 [ixgbe]
> [ 156.797827] __dev_open+0xd8/0x160
> [ 156.838503] dev_open+0x48/0x90
> [ 156.876065] bond_enslave+0x2b6/0x12c0 [bonding]
> [ 156.931310] ? vsscanf+0x5af/0x8e0
> [ 156.971986] ? sscanf+0x4e/0x70
> [ 157.009546] bond_option_slaves_set+0x112/0x1c0 [bonding]
> [ 157.074148] __bond_opt_set+0xdc/0x320 [bonding]
> [ 157.129389] __bond_opt_set_notify+0x2c/0x90 [bonding]
> [ 157.190871] bond_opt_tryset_rtnl+0x56/0xa0 [bonding]
> [ 157.251315] bonding_sysfs_store_option+0x52/0x90 [bonding]
>
> This patch marks these legacy vectors as assigned in irq_matrix
> so that corresponding bits in percpu bitmaps get set and these
> legacy vectors don't get reallocted.
>
> Signed-off-by: Imran Khan <[email protected]>
> ---
> arch/x86/kernel/apic/vector.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
<formletter>
This is not the correct way to submit patches for inclusion in the
stable kernel tree. Please read:
https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html
for how to do this properly.
</formletter>
On 20/5/21 3:56 pm, Greg KH wrote:
> On Wed, May 19, 2021 at 11:39:28PM +0000, Imran Khan wrote:
>> During activation of secondary CPUs, lapic_online is
>> invoked to initialize vectors. While lapic_online
>> installs legacy vectors on all CPUs, it does not set
>> the corresponding bits in per CPU bitmap maintained
>> under irq_matrix.
>> This may result in these legacy vectors getting allocated
>> by irq_matrix_alloc and if that happens subsequent invocation
>> of apic_update_vector will cause BUG like the one shown below:
>>
>> [ 154.738226] kernel BUG at arch/x86/kernel/apic/vector.c:172!
>> [ 154.805956] invalid opcode: 0000 [#1] SMP PTI
>> [ 154.858092] CPU: 22 PID: 3569 Comm: ifup-eth Not tainted 5.8.0-20200716.x86_64 #1
>> [ 154.954939] Hardware name: Oracle Corporation ORACLE SERVER X6-2/ASM,MOTHERBOARD,1U
>> [ 155.073636] RIP: 0010:apic_update_vector+0xa7/0x190
>> [ 155.131996] Code: 01 00 4a 8b 14 ed 80 69 01 a6 48 89 c8 4a 8d 04 e0 48 8b 04 10 48
>> 85 c0 0f 84 d2 00 00 00 48 3d 00 f0 ff ff 0f 87 c6 00 00 00 <0f> 0b 41 8b 46 10 48 0f
[.....]
>> [ 156.268168] Call Trace:
>> [ 156.297409] ? irq_matrix_alloc+0x8a/0x150
>> [ 156.346408] assign_vector_locked+0xd2/0x170
>> [ 156.397489] x86_vector_activate+0x1b5/0x320
>> [ 156.448570] __irq_domain_activate_irq+0x64/0xa0
>> [ 156.503808] __irq_domain_activate_irq+0x38/0xa0
>> [ 156.559050] irq_domain_activate_irq+0x2b/0x40
>> [ 156.612213] irq_activate+0x25/0x30
>> [ 156.653930] __setup_irq+0x58f/0x7b0
>> [ 156.696690] request_threaded_irq+0xf8/0x1b0
>> [ 156.747784] ixgbe_open+0x3af/0x600 [ixgbe]
>> [ 156.797827] __dev_open+0xd8/0x160
>> [ 156.838503] dev_open+0x48/0x90
>> [ 156.876065] bond_enslave+0x2b6/0x12c0 [bonding]
>> [ 156.931310] ? vsscanf+0x5af/0x8e0
>> [ 156.971986] ? sscanf+0x4e/0x70
>> [ 157.009546] bond_option_slaves_set+0x112/0x1c0 [bonding]
>> [ 157.074148] __bond_opt_set+0xdc/0x320 [bonding]
>> [ 157.129389] __bond_opt_set_notify+0x2c/0x90 [bonding]
>> [ 157.190871] bond_opt_tryset_rtnl+0x56/0xa0 [bonding]
>> [ 157.251315] bonding_sysfs_store_option+0x52/0x90 [bonding]
>>
>> This patch marks these legacy vectors as assigned in irq_matrix
>> so that corresponding bits in percpu bitmaps get set and these
>> legacy vectors don't get reallocted.
>>
>> Signed-off-by: Imran Khan <[email protected]>
>> ---
>> arch/x86/kernel/apic/vector.c | 14 ++++++++++++--
>> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> <formletter>
>
> This is not the correct way to submit patches for inclusion in the
> stable kernel tree. Please read:
> https://urldefense.com/v3/__https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html__;!!GqivPVa7Brio!MYRIHQM1qBB0Raf823KeG1-OSUDCwlOyOxqTp5fzHTlxAL1H4LZW6XniBtajqKVb3w$
> for how to do this properly.
>
> </formletter>
>
Thanks for clarifying the process and providing the relevant doc. Looks
like CC-ing stable list for RFC patch was a mistake in first place. I
will wait for review comments and, if the patch gets included, will
inform via option 2 of the above mentioned document.
Thanks,
Imran
Imran,
On Wed, May 19 2021 at 23:39, Imran Khan wrote:
> During activation of secondary CPUs, lapic_online is
> invoked to initialize vectors. While lapic_online
> installs legacy vectors on all CPUs, it does not set
> the corresponding bits in per CPU bitmap maintained
> under irq_matrix.
> This may result in these legacy vectors getting allocated
> by irq_matrix_alloc and if that happens subsequent invocation
> of apic_update_vector will cause BUG like the one shown below:
>
> [ 154.738226] kernel BUG at arch/x86/kernel/apic/vector.c:172!
please trim the backtrace. It's not really relevant for understanding
the problem.
> This patch marks these legacy vectors as assigned in irq_matrix
git grep 'This patch' Documentation/process/
> so that corresponding bits in percpu bitmaps get set and these
> legacy vectors don't get reallocted.
This is just wrong.
True legacy interrupts (PIC delivery) are marked as system vectors. See
lapic_assign_legacy_vector(). That prevents them from being allocated.
> [ 154.858092] CPU: 22 PID: 3569 Comm: ifup-eth Not tainted 5.8.0-20200716.x86_64 #1
I have no idea what this 5.8.0-magic-date kernel is.
Have you verified that this problem exists with upstream?
Thanks,
tglx
On 20/5/21 6:17 pm, Thomas Gleixner wrote:
> Imran,
Hi Thomas,
Thanks for the review.
>
> On Wed, May 19 2021 at 23:39, Imran Khan wrote:
[...]
>>
>> [ 154.738226] kernel BUG at arch/x86/kernel/apic/vector.c:172!
>
> please trim the backtrace. It's not really relevant for understanding
> the problem.
>
Noted.
>> This patch marks these legacy vectors as assigned in irq_matrix
>
> git grep 'This patch' Documentation/process/
>
Noted.
>> so that corresponding bits in percpu bitmaps get set and these
>> legacy vectors don't get reallocted.
>
> This is just wrong.
>
> True legacy interrupts (PIC delivery) are marked as system vectors. See
> lapic_assign_legacy_vector(). That prevents them from being allocated.
>
Taking current ML tag v5.13-rc2, I can see lapic_assign_legacy_vector
getting invoked only for vector 2 (PIC_CASCADE_IR).
I do see that lapic_assign_system_vectors is assigning legacy
interrupts, but this is invoked only for boot CPU from native_init_IRQ.
>> [ 154.858092] CPU: 22 PID: 3569 Comm: ifup-eth Not tainted 5.8.0-20200716.x86_64 #1
>
> I have no idea what this 5.8.0-magic-date kernel is.
>
> Have you verified that this problem exists with upstream?
>
I have not yet tested current mainline tag (v5.13-rc2)
on this setup because of some pending porting stuff.
But I tested ML v5.13-rc2 on qemu based x86_64 setup (4 CPUs) and
observed some difference in system vector assignments depending
on whether kernel is booted with or without noapic option.
If kernel is booted with option noapic, the io_apic_irqs
bitmap is not set by setup_IO_APIC and this happens because
skip_ioapic_setup is found as set in this case. But in absence
of noapic kernel parameters, io_apic_irqs bitmap gets set by setup_IO_APIC.
Now in case of booting with noapic option, the check "test_bit(isairq,
&io_apic_irqs))" in __setup_vector_irq will fail for system vectors
and __setup_vector_irq will end up returning corresponding irq descriptor.
This irq descriptor gets assigned in per cpu vector_irq corresponding
to secondary CPUs.
But the corresponding bitmap in irq_matrix still remains unset,
because invocation of lapic_assign_system_vectors via native_init_IRQ
happens only for boot CPU.
As a result of this if any of these vectors get allocated for secondary
CPUs kernel will hit the BUG condition given in apic_update_vector.
Even my current setup that was crashing with in-house 5.4 and 5.8
kernels, boots fine if I boot it with noapic option removed from kernel
boot parameters.
So even though I have not yet tested current ML tag on my test setup, It
looks to me that cause of my problem is there in current ML tag as well
and gets manifested when kernel is booted with noapic option.
Please let me know if I am missing something. If you need additional
data (vector traces, debugfs content for VECTOR domain etc.) from my
qemu based experiment, I can provide that as well.
Thanks,
Imran
> Thanks,
>
> tglx
>
Imran,
On Mon, May 24 2021 at 13:29, imran f. khan wrote:
> On 20/5/21 6:17 pm, Thomas Gleixner wrote:
> But I tested ML v5.13-rc2 on qemu based x86_64 setup (4 CPUs) and
> observed some difference in system vector assignments depending
> on whether kernel is booted with or without noapic option.
Of course there is a difference.
> Even my current setup that was crashing with in-house 5.4 and 5.8
> kernels, boots fine if I boot it with noapic option removed from kernel
> boot parameters.
May I ask the obvious question why the changelog of your patch did not
mention that this happens only with 'noapic' on the kernel command line?
> If kernel is booted with option noapic, the io_apic_irqs
> bitmap is not set by setup_IO_APIC and this happens because
> skip_ioapic_setup is found as set in this case. But in absence
> of noapic kernel parameters, io_apic_irqs bitmap gets set by
> setup_IO_APIC.
Yes
> Now in case of booting with noapic option, the check "test_bit(isairq,
> &io_apic_irqs))" in __setup_vector_irq will fail for system vectors
> and __setup_vector_irq will end up returning corresponding irq descriptor.
> This irq descriptor gets assigned in per cpu vector_irq corresponding
> to secondary CPUs.
>
> But the corresponding bitmap in irq_matrix still remains unset,
> because invocation of lapic_assign_system_vectors via native_init_IRQ
> happens only for boot CPU.
Which is sufficient because system vectors are assigned system wide.
> As a result of this if any of these vectors get allocated for secondary
> CPUs kernel will hit the BUG condition given in apic_update_vector.
Correct. So you almost decoded the underlying problem.
In absence of IOAPIC all legacy interrupts are routed through the
PIC. The PIC does not support interrupt affinities and the interrupt can
end up on any online CPU. That's why we need to treat the PIC interrupts
special.
The problem is that lapic_assign_legacy_vector() is _NOT_ invoked for
any interrupt except PIC_CASCADE_IRQ if the IOAPIC is disabled.
So with your patch you work around that, but that falls flat on it's
nose when the IO/APIC is enabled and one of the legacy interrupts is in
PIC mode, e.g. IRQ0. If that happens then bit 0 is not set in
io_apic_irqs _AND_ the interrupt is correctly marked as system vector
already. So marking it again via irq_matrix_assign() will explode
in the matrix allocator.
So the right thing to do is to ensure that the legacy interrupts are
marked as system vectors in case that the IO/APIC is disabled via
config, kernel command line or lack of enumeration. See below.
Thanks,
tglx
---
Subject: x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
From: Thomas Gleixner <[email protected]>
Date: Tue, 25 May 2021 13:08:41 +0200
PIC interrupts do not support affinity setting and they can end up on any
online CPU. Therefore it's required to mark the associated vectors as
system wide reserved. Otherwise the corresponding irq descriptors are
copied to the secondary CPUs but the vectors are not marked as assigned or
reserved. This works correctly for the IO/APIC case.
When the IO/APIC is disabled via config, kernel command line or lack of
enumeration then all legacy interrupts are routed through the PIC, but
nothing marks them as system wide reserved vectors.
As a consequence a subsequent allocation on a secondary CPU can result in
allocating one on these vectors, which triggers the BUG() in
apic_update_vector() because the interrupt descriptor slot is not empty.
Imran tried to work around that by marking those interrupts as allocated
when a CPU comes online. But that's wrong in case that the IO/APIC is
available and one of the legacy interrupts, e.g. IRQ0, has been switched to
PIC mode because then marking them as allocated will fail as they are
already marked as system vectors.
Stay consistent and update the legacy vectors after attempting IO/APIC
initialization and mark them as system vectors in case that no IO/APIC is
available.
Reported-by: Imran Khan <[email protected]>
Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment")
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
---
arch/x86/include/asm/apic.h | 1 +
arch/x86/kernel/apic/apic.c | 1 +
arch/x86/kernel/apic/vector.c | 20 ++++++++++++++++++++
3 files changed, 22 insertions(+)
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool u
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
+ lapic_update_legacy_vectors();
}
#ifdef CONFIG_UP_LATE_INIT
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
}
+void __init lapic_update_legacy_vectors(void)
+{
+ unsigned int i;
+
+ if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
+ return;
+
+ /*
+ * If the IO/APIC is disabled via config, kernel command line or
+ * lack of enumeration then all legacy interrupts are routed
+ * through the PIC. Make sure that they are marked as legacy
+ * vectors. PIC_CASCADE_IRQ has already been marked in
+ * lapic_assign_system_vectors().
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ if (i != PIC_CASCADE_IR)
+ lapic_assign_legacy_vector(i, true);
+ }
+}
+
void __init lapic_assign_system_vectors(void)
{
unsigned int i, vector = 0;
Hi Thomas,
On 26/5/21 12:51 am, Thomas Gleixner wrote:
> Imran,
>
> On Mon, May 24 2021 at 13:29, imran f. khan wrote:
[...]
>
>> Even my current setup that was crashing with in-house 5.4 and 5.8
>> kernels, boots fine if I boot it with noapic option removed from kernel
>> boot parameters.
>
> May I ask the obvious question why the changelog of your patch did not
> mention that this happens only with 'noapic' on the kernel command line?
>
I was not aware of it earlier. After your first feedback I debugged the
code further with ML tag and found out about the noapic dependency.
[...]
>
> Correct. So you almost decoded the underlying problem.
>
[...]
> So the right thing to do is to ensure that the legacy interrupts are
> marked as system vectors in case that the IO/APIC is disabled via
> config, kernel command line or lack of enumeration. See below.
>
Thanks for clarifying it. I have tested your change in my local setup as
well and it works well.
Thanks,
Imran
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 7d65f9e80646c595e8c853640a9d0768a33e204c
Gitweb: https://git.kernel.org/tip/7d65f9e80646c595e8c853640a9d0768a33e204c
Author: Thomas Gleixner <[email protected]>
AuthorDate: Tue, 25 May 2021 13:08:41 +02:00
Committer: Borislav Petkov <[email protected]>
CommitterDate: Sat, 29 May 2021 11:41:14 +02:00
x86/apic: Mark _all_ legacy interrupts when IO/APIC is missing
PIC interrupts do not support affinity setting and they can end up on
any online CPU. Therefore, it's required to mark the associated vectors
as system-wide reserved. Otherwise, the corresponding irq descriptors
are copied to the secondary CPUs but the vectors are not marked as
assigned or reserved. This works correctly for the IO/APIC case.
When the IO/APIC is disabled via config, kernel command line or lack of
enumeration then all legacy interrupts are routed through the PIC, but
nothing marks them as system-wide reserved vectors.
As a consequence, a subsequent allocation on a secondary CPU can result in
allocating one of these vectors, which triggers the BUG() in
apic_update_vector() because the interrupt descriptor slot is not empty.
Imran tried to work around that by marking those interrupts as allocated
when a CPU comes online. But that's wrong in case that the IO/APIC is
available and one of the legacy interrupts, e.g. IRQ0, has been switched to
PIC mode because then marking them as allocated will fail as they are
already marked as system vectors.
Stay consistent and update the legacy vectors after attempting IO/APIC
initialization and mark them as system vectors in case that no IO/APIC is
available.
Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment")
Reported-by: Imran Khan <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/include/asm/apic.h | 1 +
arch/x86/kernel/apic/apic.c | 1 +
arch/x86/kernel/apic/vector.c | 20 ++++++++++++++++++++
3 files changed, 22 insertions(+)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 412b51e..48067af 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void)
extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
extern void lapic_assign_system_vectors(void);
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4a39fb4..d262811 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode)
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
+ lapic_update_legacy_vectors();
}
#ifdef CONFIG_UP_LATE_INIT
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6dbdc7c..fb67ed5 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace)
irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
}
+void __init lapic_update_legacy_vectors(void)
+{
+ unsigned int i;
+
+ if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
+ return;
+
+ /*
+ * If the IO/APIC is disabled via config, kernel command line or
+ * lack of enumeration then all legacy interrupts are routed
+ * through the PIC. Make sure that they are marked as legacy
+ * vectors. PIC_CASCADE_IRQ has already been marked in
+ * lapic_assign_system_vectors().
+ */
+ for (i = 0; i < nr_legacy_irqs(); i++) {
+ if (i != PIC_CASCADE_IR)
+ lapic_assign_legacy_vector(i, true);
+ }
+}
+
void __init lapic_assign_system_vectors(void)
{
unsigned int i, vector = 0;