Change from v3:
- Separate modifications to make review easy.
- Refactor implementations registering exception/irq_vector
handers. (Patch 1, 2, 3)
This series introduce page fault tracepoints.
Detailed descriptions are explained in each patch.
Any comments are welcome.
Seiji Aguchi (4):
Move set_intr_gate() into macro
Register exception handler to trace IDT
Delete __trace_alloc_intr_gate()
Add page fault tracepoints
arch/x86/include/asm/desc.h | 57 ++++++++++++++-------------------
arch/x86/include/asm/hw_irq.h | 3 ++
arch/x86/include/asm/segment.h | 3 ++
arch/x86/include/asm/trace/exceptions.h | 52 ++++++++++++++++++++++++++++++
arch/x86/include/asm/traps.h | 20 ++++++++++++
arch/x86/kernel/entry_32.S | 10 ++++++
arch/x86/kernel/entry_64.S | 13 +++++++-
arch/x86/kernel/head64.c | 2 +-
arch/x86/kernel/kvm.c | 2 +-
arch/x86/kernel/traps.c | 28 ++++++++--------
arch/x86/mm/Makefile | 2 ++
arch/x86/mm/fault.c | 23 +++++++++++++
12 files changed, 165 insertions(+), 50 deletions(-)
create mode 100644 arch/x86/include/asm/trace/exceptions.h
--
1.8.3.1
Move set_intr_gate() into a macro by removing __alloc_intr_gate().
The purpose is to avoid failing a kernel build after applying
a subsequent patch which changes set_intr_gate() to macro.
Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/desc.h | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index b90e5df..d939567 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -395,15 +395,10 @@ static inline void trace_set_intr_gate(unsigned int gate, void *addr)
#define __trace_alloc_intr_gate(n, addr)
#endif
-static inline void __alloc_intr_gate(unsigned int n, void *addr)
-{
- set_intr_gate(n, addr);
-}
-
#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
- __alloc_intr_gate(n, addr); \
+ set_intr_gate(n, addr); \
__trace_alloc_intr_gate(n, trace_##addr); \
} while (0)
--
1.8.3.1
This patch introduces page fault tracepoints to x86 architecture
by switching IDT.
Two events, for user and kernel spaces, are introduced at the beginning
of page fault handler for tracing.
- User space event
There is a request of page fault event for user space as below.
https://lkml.kernel.org/r/1368079520-11015-2-git-send-email-fdeslaur+()+gmail+!+com
https://lkml.kernel.org/r/1368079520-11015-1-git-send-email-fdeslaur+()+gmail+!+com
- Kernel space event:
When we meature an overhead in kernel space for investigating performance
issues, we can check if it comes from the page fault events.
Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/trace/exceptions.h | 52 +++++++++++++++++++++++++++++++++
arch/x86/mm/Makefile | 2 ++
arch/x86/mm/fault.c | 13 +++++++++
3 files changed, 67 insertions(+)
create mode 100644 arch/x86/include/asm/trace/exceptions.h
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
new file mode 100644
index 0000000..86540c0
--- /dev/null
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -0,0 +1,52 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM exceptions
+
+#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_FAULT_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_irq_vector_regfunc(void);
+extern void trace_irq_vector_unregfunc(void);
+
+DECLARE_EVENT_CLASS(x86_exceptions,
+
+ TP_PROTO(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code),
+
+ TP_ARGS(address, regs, error_code),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, address )
+ __field( unsigned long, ip )
+ __field( unsigned long, error_code )
+ ),
+
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->ip = regs->ip;
+ __entry->error_code = error_code;
+ ),
+
+ TP_printk("address=%pf ip=%pf error_code=0x%lx",
+ (void *)__entry->address, (void *)__entry->ip,
+ __entry->error_code) );
+
+#define DEFINE_PAGE_FAULT_EVENT(name) \
+DEFINE_EVENT_FN(x86_exceptions, name, \
+ TP_PROTO(unsigned long address, struct pt_regs *regs, \
+ unsigned long error_code), \
+ TP_ARGS(address, regs, error_code), \
+ trace_irq_vector_regfunc, \
+ trace_irq_vector_unregfunc);
+
+DEFINE_PAGE_FAULT_EVENT(user_page_fault);
+DEFINE_PAGE_FAULT_EVENT(kernel_page_fault);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE exceptions
+#endif /* _TRACE_PAGE_FAULT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 23d8e5f..6a19ad9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -6,6 +6,8 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp)
+CFLAGS_fault.o := -I$(src)/../include/asm/trace
+
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
obj-$(CONFIG_SMP) += tlb.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fd3e281..f2730cbc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -20,6 +20,9 @@
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
+#define CREATE_TRACE_POINTS
+#include <asm/trace/exceptions.h>
+
/*
* Page fault error code bits:
*
@@ -1232,12 +1235,22 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
exception_exit(prev_state);
}
+static void trace_page_fault_entries(struct pt_regs *regs,
+ unsigned long error_code)
+{
+ if (user_mode(regs))
+ trace_user_page_fault(read_cr2(), regs, error_code);
+ else
+ trace_kernel_page_fault(read_cr2(), regs, error_code);
+}
+
dotraplinkage void __kprobes
trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
prev_state = exception_enter();
+ trace_page_fault_entries(regs, error_code);
__do_page_fault(regs, error_code);
exception_exit(prev_state);
}
--
1.8.3.1
This patch registers exception handlers for tracing to a trace IDT.
To implemented it in set_intr_gate(), this patch does followings.
- Register the exception handlers to
the trace IDT by prepending "trace_" to the handler's names.
- Also, newly introduce trace_page_fault() to add tracepoints
in a subsequent patch.
Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/desc.h | 28 +++++++++++++++++++++++-----
arch/x86/include/asm/hw_irq.h | 3 +++
arch/x86/include/asm/segment.h | 3 +++
arch/x86/include/asm/traps.h | 20 ++++++++++++++++++++
arch/x86/kernel/entry_32.S | 10 ++++++++++
arch/x86/kernel/entry_64.S | 13 ++++++++++++-
arch/x86/kernel/head64.c | 2 +-
arch/x86/kernel/kvm.c | 2 +-
arch/x86/kernel/traps.c | 28 ++++++++++++++--------------
arch/x86/mm/fault.c | 10 ++++++++++
10 files changed, 97 insertions(+), 22 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d939567..3d73437 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -327,10 +327,25 @@ static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
write_idt_entry(trace_idt_table, entry, gate);
}
+
+static inline void _trace_set_gate(int gate, unsigned type, void *addr,
+ unsigned dpl, unsigned ist, unsigned seg)
+{
+ gate_desc s;
+
+ pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+ /*
+ * does not need to be atomic because it is only done once at
+ * setup time
+ */
+ write_trace_idt_entry(gate, &s);
+}
#else
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
}
+
+#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
#endif
static inline void _set_gate(int gate, unsigned type, void *addr,
@@ -353,11 +368,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write-protected.
*/
-static inline void set_intr_gate(unsigned int n, void *addr)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
-}
+#define set_intr_gate(n, addr) \
+ do { \
+ BUG_ON((unsigned)n > 0xFF); \
+ _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
+ __KERNEL_CS); \
+ _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
+ 0, 0, __KERNEL_CS); \
+ } while (0)
extern int first_system_vector;
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 92b3bae..cba45d9 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -187,6 +187,9 @@ extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif
extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+#ifdef CONFIG_TRACING
+#define trace_interrupt interrupt
+#endif
typedef int vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index c48a950..6f1c3a8 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -214,6 +214,9 @@
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+#ifdef CONFIG_TRACING
+#define trace_early_idt_handlers early_idt_handlers
+#endif
/*
* Load a segment. Fall back on loading the zero
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7036cb6..58d66fe 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -37,6 +37,23 @@ asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);
+#ifdef CONFIG_TRACING
+asmlinkage void trace_page_fault(void);
+#define trace_divide_error divide_error
+#define trace_bounds bounds
+#define trace_invalid_op invalid_op
+#define trace_device_not_available device_not_available
+#define trace_coprocessor_segment_overrun coprocessor_segment_overrun
+#define trace_invalid_TSS invalid_TSS
+#define trace_segment_not_present segment_not_present
+#define trace_general_protection general_protection
+#define trace_spurious_interrupt_bug spurious_interrupt_bug
+#define trace_coprocessor_error coprocessor_error
+#define trace_alignment_check alignment_check
+#define trace_simd_coprocessor_error simd_coprocessor_error
+#define trace_async_page_fault async_page_fault
+#endif
+
dotraplinkage void do_divide_error(struct pt_regs *, long);
dotraplinkage void do_debug(struct pt_regs *, long);
dotraplinkage void do_nmi(struct pt_regs *, long);
@@ -55,6 +72,9 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
+#ifdef CONFIG_TRACING
+dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
+#endif
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
dotraplinkage void do_alignment_check(struct pt_regs *, long);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0dcb0c..0661abe 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1247,6 +1247,16 @@ return_to_handler:
*/
.pushsection .kprobes.text, "ax"
+#ifdef CONFIG_TRACING
+ENTRY(trace_page_fault)
+ RING0_EC_FRAME
+ ASM_CLAC
+ pushl_cfi $trace_do_page_fault
+ jmp error_code
+ CFI_ENDPROC
+END(trace_page_fault)
+#endif
+
ENTRY(page_fault)
RING0_EC_FRAME
ASM_CLAC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b077f4c..8b7b169 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1280,6 +1280,17 @@ ENTRY(\sym)
END(\sym)
.endm
+#ifdef CONFIG_TRACING
+.macro trace_errorentry sym do_sym
+errorentry trace(\sym) trace(\do_sym)
+errorentry \sym \do_sym
+.endm
+#else
+.macro trace_errorentry sym do_sym
+errorentry \sym \do_sym
+.endm
+#endif
+
/* error code is on the stack already */
.macro paranoiderrorentry sym do_sym
ENTRY(\sym)
@@ -1482,7 +1493,7 @@ zeroentry xen_int3 do_int3
errorentry xen_stack_segment do_stack_segment
#endif
errorentry general_protection do_general_protection
-errorentry page_fault do_page_fault
+trace_errorentry page_fault do_page_fault
#ifdef CONFIG_KVM_GUEST
errorentry async_page_fault do_async_page_fault
#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1be8e43..85126cc 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -162,7 +162,7 @@ asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
- set_intr_gate(i, &early_idt_handlers[i]);
+ set_intr_gate(i, early_idt_handlers[i]);
load_idt((const struct desc_ptr *)&idt_descr);
copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a0e2a8a..028d7a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -464,7 +464,7 @@ static struct notifier_block kvm_cpu_notifier = {
static void __init kvm_apf_trap_init(void)
{
- set_intr_gate(14, &async_page_fault);
+ set_intr_gate(14, async_page_fault);
}
void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8c8093b..1c9d0ad 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -713,7 +713,7 @@ void __init early_trap_init(void)
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
#ifdef CONFIG_X86_32
- set_intr_gate(X86_TRAP_PF, &page_fault);
+ set_intr_gate(X86_TRAP_PF, page_fault);
#endif
load_idt(&idt_descr);
}
@@ -721,7 +721,7 @@ void __init early_trap_init(void)
void __init early_trap_pf_init(void)
{
#ifdef CONFIG_X86_64
- set_intr_gate(X86_TRAP_PF, &page_fault);
+ set_intr_gate(X86_TRAP_PF, page_fault);
#endif
}
@@ -737,30 +737,30 @@ void __init trap_init(void)
early_iounmap(p, 4);
#endif
- set_intr_gate(X86_TRAP_DE, ÷_error);
+ set_intr_gate(X86_TRAP_DE, divide_error);
set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
/* int4 can be called from all */
set_system_intr_gate(X86_TRAP_OF, &overflow);
- set_intr_gate(X86_TRAP_BR, &bounds);
- set_intr_gate(X86_TRAP_UD, &invalid_op);
- set_intr_gate(X86_TRAP_NM, &device_not_available);
+ set_intr_gate(X86_TRAP_BR, bounds);
+ set_intr_gate(X86_TRAP_UD, invalid_op);
+ set_intr_gate(X86_TRAP_NM, device_not_available);
#ifdef CONFIG_X86_32
set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
#else
set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
#endif
- set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun);
- set_intr_gate(X86_TRAP_TS, &invalid_TSS);
- set_intr_gate(X86_TRAP_NP, &segment_not_present);
+ set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
+ set_intr_gate(X86_TRAP_TS, invalid_TSS);
+ set_intr_gate(X86_TRAP_NP, segment_not_present);
set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK);
- set_intr_gate(X86_TRAP_GP, &general_protection);
- set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug);
- set_intr_gate(X86_TRAP_MF, &coprocessor_error);
- set_intr_gate(X86_TRAP_AC, &alignment_check);
+ set_intr_gate(X86_TRAP_GP, general_protection);
+ set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
+ set_intr_gate(X86_TRAP_MF, coprocessor_error);
+ set_intr_gate(X86_TRAP_AC, alignment_check);
#ifdef CONFIG_X86_MCE
set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
#endif
- set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error);
+ set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3aaeffc..fd3e281 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1231,3 +1231,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
__do_page_fault(regs, error_code);
exception_exit(prev_state);
}
+
+dotraplinkage void __kprobes
+trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ enum ctx_state prev_state;
+
+ prev_state = exception_enter();
+ __do_page_fault(regs, error_code);
+ exception_exit(prev_state);
+}
--
1.8.3.1
Currently irq vector handlers for tracing are registered in both set_intr_gate()
and __trace_alloc_intr_gate() in alloc_intr_gate().
But, we don't need to do that twice.
So, let's delete __trace_alloc_intr_gate().
Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/desc.h | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 3d73437..50d033a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -392,32 +392,10 @@ static inline void alloc_system_vector(int vector)
}
}
-#ifdef CONFIG_TRACING
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
- gate_desc s;
-
- pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
- write_idt_entry(trace_idt_table, gate, &s);
-}
-
-static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
-{
- trace_set_intr_gate(n, addr);
-}
-#else
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
-}
-
-#define __trace_alloc_intr_gate(n, addr)
-#endif
-
#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
set_intr_gate(n, addr); \
- __trace_alloc_intr_gate(n, trace_##addr); \
} while (0)
/*
--
1.8.3.1