Device interrupts which go through do_IRQ() or the spurious interrupt
handler have their separate entry code on 64 bit for no good reason.
Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
pt_regs. Further the vector number is forced to fit into an u8 and is
complemented and offset by 0x80 for historical reasons.
Push the vector number into the error code slot instead and hand the plain
vector number to the C functions.
Originally-by: Andy Lutomirski <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/entry/entry_32.S | 11 ++++++-----
arch/x86/entry/entry_64.S | 14 ++++++++------
arch/x86/include/asm/entry_arch.h | 2 +-
arch/x86/include/asm/hw_irq.h | 1 +
arch/x86/include/asm/irq.h | 2 +-
arch/x86/include/asm/traps.h | 3 ++-
arch/x86/kernel/apic/apic.c | 25 +++++++++++++++++++------
arch/x86/kernel/idt.c | 2 +-
arch/x86/kernel/irq.c | 6 ++----
9 files changed, 41 insertions(+), 25 deletions(-)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1233,7 +1233,7 @@ SYM_FUNC_END(entry_INT80_32)
SYM_CODE_START(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
- pushl $(~vector+0x80) /* Note: always in signed byte range */
+ pushl $(vector)
vector=vector+1
jmp common_interrupt
.align 8
@@ -1245,7 +1245,7 @@ SYM_CODE_END(irq_entries_start)
SYM_CODE_START(spurious_entries_start)
vector=FIRST_SYSTEM_VECTOR
.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
- pushl $(~vector+0x80) /* Note: always in signed byte range */
+ pushl $(vector)
vector=vector+1
jmp common_spurious
.align 8
@@ -1254,11 +1254,12 @@ SYM_CODE_END(spurious_entries_start)
SYM_CODE_START_LOCAL(common_spurious)
ASM_CLAC
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
SAVE_ALL switch_stacks=1
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
call smp_spurious_interrupt
jmp ret_from_intr
SYM_CODE_END(common_spurious)
@@ -1271,12 +1272,12 @@ SYM_CODE_END(common_spurious)
.p2align CONFIG_X86_L1_CACHE_SHIFT
SYM_CODE_START_LOCAL(common_interrupt)
ASM_CLAC
- addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
-
SAVE_ALL switch_stacks=1
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
+ movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */
+ movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */
call do_IRQ
jmp ret_from_intr
SYM_CODE_END(common_interrupt)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,7 +364,7 @@ SYM_CODE_START(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
UNWIND_HINT_IRET_REGS
- pushq $(~vector+0x80) /* Note: always in signed byte range */
+ pushq $(vector)
jmp common_interrupt
.align 8
vector=vector+1
@@ -376,7 +376,7 @@ SYM_CODE_START(spurious_entries_start)
vector=FIRST_SYSTEM_VECTOR
.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
UNWIND_HINT_IRET_REGS
- pushq $(~vector+0x80) /* Note: always in signed byte range */
+ pushq $(vector)
jmp common_spurious
.align 8
vector=vector+1
@@ -756,9 +756,10 @@ SYM_CODE_END(interrupt_entry)
* then jump to common_spurious/interrupt.
*/
SYM_CODE_START_LOCAL(common_spurious)
- addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
call interrupt_entry
UNWIND_HINT_REGS indirect=1
+ movq ORIG_RAX(%rdi), %rsi /* get vector from stack */
+ movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
call smp_spurious_interrupt /* rdi points to pt_regs */
jmp ret_from_intr
SYM_CODE_END(common_spurious)
@@ -767,10 +768,11 @@ SYM_CODE_END(common_spurious)
/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
SYM_CODE_START_LOCAL(common_interrupt)
- addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
call interrupt_entry
UNWIND_HINT_REGS indirect=1
- call do_IRQ /* rdi points to pt_regs */
+ movq ORIG_RAX(%rdi), %rsi /* get vector from stack */
+ movq $-1, ORIG_RAX(%rdi) /* no syscall to restart */
+ call do_IRQ /* rdi points to pt_regs */
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -1019,7 +1021,7 @@ apicinterrupt RESCHEDULE_VECTOR resche
#endif
apicinterrupt ERROR_APIC_VECTOR error_interrupt smp_error_interrupt
-apicinterrupt SPURIOUS_APIC_VECTOR spurious_interrupt smp_spurious_interrupt
+apicinterrupt SPURIOUS_APIC_VECTOR spurious_apic_interrupt smp_spurious_apic_interrupt
#ifdef CONFIG_IRQ_WORK
apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -35,7 +35,7 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_i
BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_INTERRUPT(spurious_apic_interrupt,SPURIOUS_APIC_VECTOR)
BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
#ifdef CONFIG_IRQ_WORK
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -39,6 +39,7 @@ extern asmlinkage void irq_work_interrup
extern asmlinkage void uv_bau_message_intr1(void);
extern asmlinkage void spurious_interrupt(void);
+extern asmlinkage void spurious_apic_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
extern asmlinkage void reschedule_interrupt(void);
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -36,7 +36,7 @@ extern void native_init_IRQ(void);
extern void handle_irq(struct irq_desc *desc, struct pt_regs *regs);
-extern __visible void do_IRQ(struct pt_regs *regs);
+extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
extern void init_ISA_irqs(void);
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -39,8 +39,9 @@ asmlinkage void smp_deferred_error_inter
#endif
void smp_apic_timer_interrupt(struct pt_regs *regs);
-void smp_spurious_interrupt(struct pt_regs *regs);
void smp_error_interrupt(struct pt_regs *regs);
+void smp_spurious_apic_interrupt(struct pt_regs *regs);
+void smp_spurious_interrupt(struct pt_regs *regs, unsigned long vector);
asmlinkage void smp_irq_move_cleanup_interrupt(void);
extern void ist_enter(struct pt_regs *regs);
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2158,12 +2158,20 @@ void __init register_lapic_address(unsig
* Local APIC interrupts
*/
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
+/**
+ * smp_spurious_interrupt - Catch all for interrupts raised on unused vectors
+ * @regs: Pointer to pt_regs on stack
+ * @vector: Vector number
+ *
+ * This is invoked from ASM entry code to catch all interrupts which
+ * trigger on an entry which is routed to the common_spurious idtentry
+ * point.
+ *
+ * Also called from smp_spurious_apic_interrupt().
*/
-__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs,
+ unsigned long vector)
{
- u8 vector = ~regs->orig_ax;
u32 v;
entering_irq();
@@ -2187,11 +2195,11 @@ void __init register_lapic_address(unsig
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
if (v & (1 << (vector & 0x1f))) {
- pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Acked\n",
vector, smp_processor_id());
ack_APIC_irq();
} else {
- pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ pr_info("Spurious interrupt (vector 0x%02lx) on CPU#%d. Not pending!\n",
vector, smp_processor_id());
}
out:
@@ -2199,6 +2207,11 @@ void __init register_lapic_address(unsig
exiting_irq();
}
+__visible void smp_spurious_apic_interrupt(struct pt_regs *regs)
+{
+ smp_spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
+}
+
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -142,7 +142,7 @@ static const __initconst struct idt_data
#ifdef CONFIG_X86_UV
INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
#endif
- INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
+ INTG(SPURIOUS_APIC_VECTOR, spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, error_interrupt),
#endif
};
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -230,12 +230,10 @@ u64 arch_irq_stat(void)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-__visible void __irq_entry do_IRQ(struct pt_regs *regs)
+__visible void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long vector)
{
struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc * desc;
- /* high bit used in ret_from_ code */
- unsigned vector = ~regs->orig_ax;
entering_irq();
@@ -252,7 +250,7 @@ u64 arch_irq_stat(void)
ack_APIC_irq();
if (desc == VECTOR_UNUSED) {
- pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
+ pr_emerg_ratelimited("%s: %d.%lu No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
} else {
On Tue, Feb 25, 2020 at 3:26 PM Thomas Gleixner <[email protected]> wrote:
>
> Device interrupts which go through do_IRQ() or the spurious interrupt
> handler have their separate entry code on 64 bit for no good reason.
>
> Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
> pt_regs. Further the vector number is forced to fit into an u8 and is
> complemented and offset by 0x80 for historical reasons.
>
> Push the vector number into the error code slot instead and hand the plain
> vector number to the C functions.
Reviewed-by: Andy Lutomirski <[email protected]>
On Tue, Feb 25, 2020 at 6:26 PM Thomas Gleixner <[email protected]> wrote:
>
> Device interrupts which go through do_IRQ() or the spurious interrupt
> handler have their separate entry code on 64 bit for no good reason.
>
> Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
> pt_regs. Further the vector number is forced to fit into an u8 and is
> complemented and offset by 0x80 for historical reasons.
The reason for the 0x80 offset is so that the push instruction only
takes two bytes. This allows each entry stub to be packed into a
fixed 8 bytes. idt_setup_apic_and_irq_gates() assumes this 8-byte
fixed length for the stubs, so now every odd vector after 0x80 is
broken.
508: 6a 7f pushq $0x7f
50a: e9 f1 08 00 00 jmpq e00 <common_interrupt>
50f: 90 nop
510: 68 80 00 00 00 pushq $0x80
515: e9 e6 08 00 00 jmpq e00 <common_interrupt>
51a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
520: 68 81 00 00 00 pushq $0x81
525: e9 d6 08 00 00 jmpq e00 <common_interrupt>
52a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
The 0x81 vector should start at 0x518, not 0x520.
--
Brian Gerst
Brian Gerst <[email protected]> writes:
> On Tue, Feb 25, 2020 at 6:26 PM Thomas Gleixner <[email protected]> wrote:
>>
>> Device interrupts which go through do_IRQ() or the spurious interrupt
>> handler have their separate entry code on 64 bit for no good reason.
>>
>> Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
>> pt_regs. Further the vector number is forced to fit into an u8 and is
>> complemented and offset by 0x80 for historical reasons.
>
> The reason for the 0x80 offset is so that the push instruction only
> takes two bytes. This allows each entry stub to be packed into a
> fixed 8 bytes. idt_setup_apic_and_irq_gates() assumes this 8-byte
> fixed length for the stubs, so now every odd vector after 0x80 is
> broken.
>
> 508: 6a 7f pushq $0x7f
> 50a: e9 f1 08 00 00 jmpq e00 <common_interrupt>
> 50f: 90 nop
> 510: 68 80 00 00 00 pushq $0x80
> 515: e9 e6 08 00 00 jmpq e00 <common_interrupt>
> 51a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
> 520: 68 81 00 00 00 pushq $0x81
> 525: e9 d6 08 00 00 jmpq e00 <common_interrupt>
> 52a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
>
> The 0x81 vector should start at 0x518, not 0x520.
Bah, I somehow missed that big fat comment explaining it. :)
Thanks for catching it. So my testing just has been lucky to not hit one
of those.
Now the question is whether we care about the packed stubs or just make
them larger by using alignment to get rid of this silly +0x80 and
~vector fixup later on. The straight forward thing clearly has its charm
and I doubt it matters in measurable ways.
Thanks,
tglx
> On Feb 26, 2020, at 12:13 PM, Thomas Gleixner <[email protected]> wrote:
>
> Brian Gerst <[email protected]> writes:
>
>>> On Tue, Feb 25, 2020 at 6:26 PM Thomas Gleixner <[email protected]> wrote:
>>>
>>> Device interrupts which go through do_IRQ() or the spurious interrupt
>>> handler have their separate entry code on 64 bit for no good reason.
>>>
>>> Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
>>> pt_regs. Further the vector number is forced to fit into an u8 and is
>>> complemented and offset by 0x80 for historical reasons.
>>
>> The reason for the 0x80 offset is so that the push instruction only
>> takes two bytes. This allows each entry stub to be packed into a
>> fixed 8 bytes. idt_setup_apic_and_irq_gates() assumes this 8-byte
>> fixed length for the stubs, so now every odd vector after 0x80 is
>> broken.
>>
>> 508: 6a 7f pushq $0x7f
>> 50a: e9 f1 08 00 00 jmpq e00 <common_interrupt>
>> 50f: 90 nop
>> 510: 68 80 00 00 00 pushq $0x80
>> 515: e9 e6 08 00 00 jmpq e00 <common_interrupt>
>> 51a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
>> 520: 68 81 00 00 00 pushq $0x81
>> 525: e9 d6 08 00 00 jmpq e00 <common_interrupt>
>> 52a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
>>
>> The 0x81 vector should start at 0x518, not 0x520.
>
> Bah, I somehow missed that big fat comment explaining it. :)
>
> Thanks for catching it. So my testing just has been lucky to not hit one
> of those.
>
> Now the question is whether we care about the packed stubs or just make
> them larger by using alignment to get rid of this silly +0x80 and
> ~vector fixup later on. The straight forward thing clearly has its charm
> and I doubt it matters in measurable ways.
I agree it probably doesn’t matter. That being said, I have a distinct memory of fixing that asm so it would fail the build if the alignment was off.
>
> Thanks,
>
> tglx
>
>
On Wed, Feb 26, 2020 at 3:13 PM Thomas Gleixner <[email protected]> wrote:
>
> Brian Gerst <[email protected]> writes:
>
> > On Tue, Feb 25, 2020 at 6:26 PM Thomas Gleixner <[email protected]> wrote:
> >>
> >> Device interrupts which go through do_IRQ() or the spurious interrupt
> >> handler have their separate entry code on 64 bit for no good reason.
> >>
> >> Both 32 and 64 bit transport the vector number through ORIG_[RE]AX in
> >> pt_regs. Further the vector number is forced to fit into an u8 and is
> >> complemented and offset by 0x80 for historical reasons.
> >
> > The reason for the 0x80 offset is so that the push instruction only
> > takes two bytes. This allows each entry stub to be packed into a
> > fixed 8 bytes. idt_setup_apic_and_irq_gates() assumes this 8-byte
> > fixed length for the stubs, so now every odd vector after 0x80 is
> > broken.
> >
> > 508: 6a 7f pushq $0x7f
> > 50a: e9 f1 08 00 00 jmpq e00 <common_interrupt>
> > 50f: 90 nop
> > 510: 68 80 00 00 00 pushq $0x80
> > 515: e9 e6 08 00 00 jmpq e00 <common_interrupt>
> > 51a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
> > 520: 68 81 00 00 00 pushq $0x81
> > 525: e9 d6 08 00 00 jmpq e00 <common_interrupt>
> > 52a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
> >
> > The 0x81 vector should start at 0x518, not 0x520.
>
> Bah, I somehow missed that big fat comment explaining it. :)
>
> Thanks for catching it. So my testing just has been lucky to not hit one
> of those.
>
> Now the question is whether we care about the packed stubs or just make
> them larger by using alignment to get rid of this silly +0x80 and
> ~vector fixup later on. The straight forward thing clearly has its charm
> and I doubt it matters in measurable ways.
I think we can get rid of the inversion. That was done so orig_ax had
a negative number (signifying it's not a syscall), but if you replace
it with -1 that isn't necessary. A simple -0x80 offset should be
sufficient.
I think it's a worthy optimization to keep. There are 240 of these
stubs, so increasing the allocation to 16 bytes would add 1920 bytes
to the kernel text.
--
Brian Gerst
Brian Gerst <[email protected]> writes:
> On Wed, Feb 26, 2020 at 3:13 PM Thomas Gleixner <[email protected]> wrote:
>> Brian Gerst <[email protected]> writes:
>> Now the question is whether we care about the packed stubs or just make
>> them larger by using alignment to get rid of this silly +0x80 and
>> ~vector fixup later on. The straight forward thing clearly has its charm
>> and I doubt it matters in measurable ways.
>
> I think we can get rid of the inversion. That was done so orig_ax had
> a negative number (signifying it's not a syscall), but if you replace
> it with -1 that isn't necessary. A simple -0x80 offset should be
> sufficient.
>
> I think it's a worthy optimization to keep. There are 240 of these
> stubs, so increasing the allocation to 16 bytes would add 1920 bytes
> to the kernel text.
I rather pay the 2k text size for readable and straight forward
code. Can you remind me why we are actually worrying at that level about
32bit x86 instead of making it depend on CONFIG_OBSCURE?
Thanks,
tglx
Andy Lutomirski <[email protected]> writes:
>> On Feb 26, 2020, at 12:13 PM, Thomas Gleixner <[email protected]> wrote:
>> Brian Gerst <[email protected]> writes:
>> Now the question is whether we care about the packed stubs or just make
>> them larger by using alignment to get rid of this silly +0x80 and
>> ~vector fixup later on. The straight forward thing clearly has its charm
>> and I doubt it matters in measurable ways.
>
> I agree it probably doesn’t matter. That being said, I have a distinct
> memory of fixing that asm so it would fail the build if the alignment
> was off.
Hrm. Doesn't look like. Gah, and I love the hardcoded * 8 in the IDT
code. Let me add something to catch such things in the future.
Thanks,
tglx
On Wed, Feb 26, 2020 at 6:43 PM Thomas Gleixner <[email protected]> wrote:
>
> Brian Gerst <[email protected]> writes:
> > On Wed, Feb 26, 2020 at 3:13 PM Thomas Gleixner <[email protected]> wrote:
> >> Brian Gerst <[email protected]> writes:
> >> Now the question is whether we care about the packed stubs or just make
> >> them larger by using alignment to get rid of this silly +0x80 and
> >> ~vector fixup later on. The straight forward thing clearly has its charm
> >> and I doubt it matters in measurable ways.
> >
> > I think we can get rid of the inversion. That was done so orig_ax had
> > a negative number (signifying it's not a syscall), but if you replace
> > it with -1 that isn't necessary. A simple -0x80 offset should be
> > sufficient.
> >
> > I think it's a worthy optimization to keep. There are 240 of these
> > stubs, so increasing the allocation to 16 bytes would add 1920 bytes
> > to the kernel text.
>
> I rather pay the 2k text size for readable and straight forward
> code. Can you remind me why we are actually worrying at that level about
> 32bit x86 instead of making it depend on CONFIG_OBSCURE?
Because this also applies to the 64-bit kernel?
--
Brian Gerst