Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751694AbbDHHoS (ORCPT ); Wed, 8 Apr 2015 03:44:18 -0400 Received: from terminus.zytor.com ([198.137.202.10]:34866 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751637AbbDHHoN (ORCPT ); Wed, 8 Apr 2015 03:44:13 -0400 Date: Wed, 8 Apr 2015 00:43:35 -0700 From: tip-bot for Denys Vlasenko Message-ID: Cc: bp@suse.de, bp@alien8.de, luto@amacapital.net, linux-kernel@vger.kernel.org, wad@chromium.org, hpa@zytor.com, tglx@linutronix.de, mingo@kernel.org, keescook@chromium.org, dvlasenk@redhat.com, torvalds@linux-foundation.org, ast@plumgrid.com, rostedt@goodmis.org, fweisbec@gmail.com, oleg@redhat.com Reply-To: dvlasenk@redhat.com, rostedt@goodmis.org, torvalds@linux-foundation.org, ast@plumgrid.com, fweisbec@gmail.com, oleg@redhat.com, bp@suse.de, bp@alien8.de, luto@amacapital.net, wad@chromium.org, linux-kernel@vger.kernel.org, mingo@kernel.org, tglx@linutronix.de, hpa@zytor.com, keescook@chromium.org In-Reply-To: <1428090553-7283-1-git-send-email-dvlasenk@redhat.com> References: <1428090553-7283-1-git-send-email-dvlasenk@redhat.com> To: linux-tip-commits@vger.kernel.org Subject: [tip:x86/asm] x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout Git-Commit-ID: 3304c9c37bef30ebd2ef71d986e6568372ce80f8 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7989 Lines: 250 Commit-ID: 3304c9c37bef30ebd2ef71d986e6568372ce80f8 Gitweb: http://git.kernel.org/tip/3304c9c37bef30ebd2ef71d986e6568372ce80f8 Author: Denys Vlasenko AuthorDate: Fri, 3 Apr 2015 21:49:13 +0200 Committer: Ingo Molnar CommitDate: Wed, 8 Apr 2015 09:02:13 +0200 x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout Interrupt entry points are handled with the following code, each 32-byte code block contains seven entry points: ... [push][jump 22] // 4 bytes [push][jump 18] // 4 bytes [push][jump 14] // 4 bytes [push][jump 10] // 4 bytes [push][jump 6] // 4 bytes [push][jump 2] // 4 bytes [push][jump common_interrupt][padding] // 8 bytes [push][jump] [push][jump] [push][jump] [push][jump] [push][jump] [push][jump] [push][jump common_interrupt][padding] [padding_2] common_interrupt: And there is a table which holds pointers to every entry point, IOW: to every push. In cold cache, two jumps are still costlier than one, even though we get the benefit of them residing in the same cacheline. This change replaces short jumps with near ones to 'common_interrupt', and pads every push+jump pair to 8 bytes. This way, each interrupt takes only one jump. This change replaces ".p2align CONFIG_X86_L1_CACHE_SHIFT" before dispatch table with ".align 8" - we do not need anything stronger than that. The table of entry addresses (the interrupt[] array) is no longer necessary, the address of entries can be easily calculated as (irq_entries_start + i*8). text data bss dec hex filename 12546 0 0 12546 3102 entry_64.o.before 11626 0 0 11626 2d6a entry_64.o The size decrease is because 1656 bytes of .init.rodata are gone. That's initdata, though. The resident size does go up a bit. Run-tested (32 and 64 bits). Acked-and-Tested-by: Borislav Petkov Signed-off-by: Denys Vlasenko Cc: Alexei Starovoitov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Steven Rostedt Cc: Will Drewry Link: http://lkml.kernel.org/r/1428090553-7283-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 5 ++--- arch/x86/kernel/entry_32.S | 41 ++++++++++------------------------------- arch/x86/kernel/entry_64.S | 41 ++++++++++------------------------------- arch/x86/kernel/irqinit.c | 3 ++- arch/x86/lguest/boot.c | 3 ++- 5 files changed, 26 insertions(+), 67 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9662290..e9571dd 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif -extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR - - FIRST_EXTERNAL_VECTOR])(void); +extern char irq_entries_start[]; #ifdef CONFIG_TRACING -#define trace_interrupt interrupt +#define trace_irq_entries_start irq_entries_start #endif #define VECTOR_UNDEFINED (-1) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index effa279..02bec0f 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -723,43 +723,22 @@ END(sysenter_badsys) .endm /* - * Build the entry stubs and pointer table with some assembler magic. - * We pack 7 stubs into a single 32-byte chunk, which will fit in a - * single cache line on all modern x86 implementations. + * Build the entry stubs with some assembler magic. + * We pack 1 stub into every 8-byte block. */ -.section .init.rodata,"a" -ENTRY(interrupt) -.section .entry.text, "ax" - .p2align 5 - .p2align CONFIG_X86_L1_CACHE_SHIFT + .align 8 ENTRY(irq_entries_start) RING0_INT_FRAME -vector=FIRST_EXTERNAL_VECTOR -.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 - .balign 32 - .rept 7 - .if vector < FIRST_SYSTEM_VECTOR - .if vector <> FIRST_EXTERNAL_VECTOR + vector=FIRST_EXTERNAL_VECTOR + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_interrupt CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 - jmp 2f - .endif - .previous - .long 1b - .section .entry.text, "ax" -vector=vector+1 - .endif - .endr -2: jmp common_interrupt -.endr + .align 8 + .endr END(irq_entries_start) -.previous -END(interrupt) -.previous - /* * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e4c8103..4ca03c5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -608,44 +608,23 @@ ENTRY(ret_from_fork) END(ret_from_fork) /* - * Build the entry stubs and pointer table with some assembler magic. - * We pack 7 stubs into a single 32-byte chunk, which will fit in a - * single cache line on all modern x86 implementations. + * Build the entry stubs with some assembler magic. + * We pack 1 stub into every 8-byte block. */ - .section .init.rodata,"a" -ENTRY(interrupt) - .section .entry.text - .p2align 5 - .p2align CONFIG_X86_L1_CACHE_SHIFT + .align 8 ENTRY(irq_entries_start) INTR_FRAME -vector=FIRST_EXTERNAL_VECTOR -.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 - .balign 32 - .rept 7 - .if vector < FIRST_SYSTEM_VECTOR - .if vector <> FIRST_EXTERNAL_VECTOR + vector=FIRST_EXTERNAL_VECTOR + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_interrupt CFI_ADJUST_CFA_OFFSET -8 - .endif -1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 - jmp 2f - .endif - .previous - .quad 1b - .section .entry.text -vector=vector+1 - .endif - .endr -2: jmp common_interrupt -.endr + .align 8 + .endr CFI_ENDPROC END(irq_entries_start) -.previous -END(interrupt) -.previous - /* * Interrupt entry/exit. * diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 70e181e..cd10a64 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -178,7 +178,8 @@ void __init native_init_IRQ(void) #endif for_each_clear_bit_from(i, used_vectors, first_system_vector) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); + set_intr_gate(i, irq_entries_start + + 8 * (i - FIRST_EXTERNAL_VECTOR)); } #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, used_vectors, NR_VECTORS) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8561585..717908b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void) /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); + set_intr_gate(i, irq_entries_start + + 8 * (i - FIRST_EXTERNAL_VECTOR)); } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/