2013-10-30 20:42:28

by Seiji Aguchi

[permalink] [raw]
Subject: [PATCH v4 0/4] Introduce page fault tracepoints

Change from v3:
- Separate modifications to make review easy.
- Refactor implementations registering exception/irq_vector
handers. (Patch 1, 2, 3)

This series introduce page fault tracepoints.

Detailed descriptions are explained in each patch.
Any comments are welcome.

Seiji Aguchi (4):
Move set_intr_gate() into macro
Register exception handler to trace IDT
Delete __trace_alloc_intr_gate()
Add page fault tracepoints

arch/x86/include/asm/desc.h | 57 ++++++++++++++-------------------
arch/x86/include/asm/hw_irq.h | 3 ++
arch/x86/include/asm/segment.h | 3 ++
arch/x86/include/asm/trace/exceptions.h | 52 ++++++++++++++++++++++++++++++
arch/x86/include/asm/traps.h | 20 ++++++++++++
arch/x86/kernel/entry_32.S | 10 ++++++
arch/x86/kernel/entry_64.S | 13 +++++++-
arch/x86/kernel/head64.c | 2 +-
arch/x86/kernel/kvm.c | 2 +-
arch/x86/kernel/traps.c | 28 ++++++++--------
arch/x86/mm/Makefile | 2 ++
arch/x86/mm/fault.c | 23 +++++++++++++
12 files changed, 165 insertions(+), 50 deletions(-)
create mode 100644 arch/x86/include/asm/trace/exceptions.h

--
1.8.3.1


2013-10-30 20:42:19

by Seiji Aguchi

[permalink] [raw]
Subject: [PATCH v4 1/4] Move set_intr_gate() into macro

Move set_intr_gate() into a macro by removing __alloc_intr_gate().

The purpose is to avoid failing a kernel build after applying
a subsequent patch which changes set_intr_gate() to macro.

Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/desc.h | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index b90e5df..d939567 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -395,15 +395,10 @@ static inline void trace_set_intr_gate(unsigned int gate, void *addr)
#define __trace_alloc_intr_gate(n, addr)
#endif

-static inline void __alloc_intr_gate(unsigned int n, void *addr)
-{
- set_intr_gate(n, addr);
-}
-
#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
- __alloc_intr_gate(n, addr); \
+ set_intr_gate(n, addr); \
__trace_alloc_intr_gate(n, trace_##addr); \
} while (0)

--
1.8.3.1

2013-10-30 20:42:23

by Seiji Aguchi

[permalink] [raw]
Subject: [PATCH v4 4/4] Add page fault tracepoints

This patch introduces page fault tracepoints to x86 architecture
by switching IDT.

Two events, for user and kernel spaces, are introduced at the beginning
of page fault handler for tracing.

- User space event
There is a request of page fault event for user space as below.

https://lkml.kernel.org/r/1368079520-11015-2-git-send-email-fdeslaur+()+gmail+!+com
https://lkml.kernel.org/r/1368079520-11015-1-git-send-email-fdeslaur+()+gmail+!+com

- Kernel space event:
When we meature an overhead in kernel space for investigating performance
issues, we can check if it comes from the page fault events.

Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/trace/exceptions.h | 52 +++++++++++++++++++++++++++++++++
arch/x86/mm/Makefile | 2 ++
arch/x86/mm/fault.c | 13 +++++++++
3 files changed, 67 insertions(+)
create mode 100644 arch/x86/include/asm/trace/exceptions.h

diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
new file mode 100644
index 0000000..86540c0
--- /dev/null
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -0,0 +1,52 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM exceptions
+
+#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_FAULT_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_irq_vector_regfunc(void);
+extern void trace_irq_vector_unregfunc(void);
+
+DECLARE_EVENT_CLASS(x86_exceptions,
+
+ TP_PROTO(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code),
+
+ TP_ARGS(address, regs, error_code),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, address )
+ __field( unsigned long, ip )
+ __field( unsigned long, error_code )
+ ),
+
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->ip = regs->ip;
+ __entry->error_code = error_code;
+ ),
+
+ TP_printk("address=%pf ip=%pf error_code=0x%lx",
+ (void *)__entry->address, (void *)__entry->ip,
+ __entry->error_code) );
+
+#define DEFINE_PAGE_FAULT_EVENT(name) \
+DEFINE_EVENT_FN(x86_exceptions, name, \
+ TP_PROTO(unsigned long address, struct pt_regs *regs, \
+ unsigned long error_code), \
+ TP_ARGS(address, regs, error_code), \
+ trace_irq_vector_regfunc, \
+ trace_irq_vector_unregfunc);
+
+DEFINE_PAGE_FAULT_EVENT(user_page_fault);
+DEFINE_PAGE_FAULT_EVENT(kernel_page_fault);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE exceptions
+#endif /* _TRACE_PAGE_FAULT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 23d8e5f..6a19ad9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -6,6 +6,8 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp)

+CFLAGS_fault.o := -I$(src)/../include/asm/trace
+
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
obj-$(CONFIG_SMP) += tlb.o

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fd3e281..f2730cbc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -20,6 +20,9 @@
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */

+#define CREATE_TRACE_POINTS
+#include <asm/trace/exceptions.h>
+
/*
* Page fault error code bits:
*
@@ -1232,12 +1235,22 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
exception_exit(prev_state);
}

+static void trace_page_fault_entries(struct pt_regs *regs,
+ unsigned long error_code)
+{
+ if (user_mode(regs))
+ trace_user_page_fault(read_cr2(), regs, error_code);
+ else
+ trace_kernel_page_fault(read_cr2(), regs, error_code);
+}
+
dotraplinkage void __kprobes
trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;

prev_state = exception_enter();
+ trace_page_fault_entries(regs, error_code);
__do_page_fault(regs, error_code);
exception_exit(prev_state);
}
--
1.8.3.1

2013-10-30 20:42:31

by Seiji Aguchi

[permalink] [raw]
Subject: [PATCH v4 2/4] Register exception handler to trace IDT

This patch registers exception handlers for tracing to a trace IDT.

To implemented it in set_intr_gate(), this patch does followings.
- Register the exception handlers to
the trace IDT by prepending "trace_" to the handler's names.
- Also, newly introduce trace_page_fault() to add tracepoints
in a subsequent patch.

Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/desc.h | 28 +++++++++++++++++++++++-----
arch/x86/include/asm/hw_irq.h | 3 +++
arch/x86/include/asm/segment.h | 3 +++
arch/x86/include/asm/traps.h | 20 ++++++++++++++++++++
arch/x86/kernel/entry_32.S | 10 ++++++++++
arch/x86/kernel/entry_64.S | 13 ++++++++++++-
arch/x86/kernel/head64.c | 2 +-
arch/x86/kernel/kvm.c | 2 +-
arch/x86/kernel/traps.c | 28 ++++++++++++++--------------
arch/x86/mm/fault.c | 10 ++++++++++
10 files changed, 97 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d939567..3d73437 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -327,10 +327,25 @@ static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
write_idt_entry(trace_idt_table, entry, gate);
}
+
+static inline void _trace_set_gate(int gate, unsigned type, void *addr,
+ unsigned dpl, unsigned ist, unsigned seg)
+{
+ gate_desc s;
+
+ pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
+ /*
+ * does not need to be atomic because it is only done once at
+ * setup time
+ */
+ write_trace_idt_entry(gate, &s);
+}
#else
static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
{
}
+
+#define _trace_set_gate(gate, type, addr, dpl, ist, seg)
#endif

static inline void _set_gate(int gate, unsigned type, void *addr,
@@ -353,11 +368,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write-protected.
*/
-static inline void set_intr_gate(unsigned int n, void *addr)
-{
- BUG_ON((unsigned)n > 0xFF);
- _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
-}
+#define set_intr_gate(n, addr) \
+ do { \
+ BUG_ON((unsigned)n > 0xFF); \
+ _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
+ __KERNEL_CS); \
+ _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
+ 0, 0, __KERNEL_CS); \
+ } while (0)

extern int first_system_vector;
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 92b3bae..cba45d9 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -187,6 +187,9 @@ extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif

extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+#ifdef CONFIG_TRACING
+#define trace_interrupt interrupt
+#endif

typedef int vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index c48a950..6f1c3a8 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -214,6 +214,9 @@
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
+#ifdef CONFIG_TRACING
+#define trace_early_idt_handlers early_idt_handlers
+#endif

/*
* Load a segment. Fall back on loading the zero
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7036cb6..58d66fe 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -37,6 +37,23 @@ asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);

+#ifdef CONFIG_TRACING
+asmlinkage void trace_page_fault(void);
+#define trace_divide_error divide_error
+#define trace_bounds bounds
+#define trace_invalid_op invalid_op
+#define trace_device_not_available device_not_available
+#define trace_coprocessor_segment_overrun coprocessor_segment_overrun
+#define trace_invalid_TSS invalid_TSS
+#define trace_segment_not_present segment_not_present
+#define trace_general_protection general_protection
+#define trace_spurious_interrupt_bug spurious_interrupt_bug
+#define trace_coprocessor_error coprocessor_error
+#define trace_alignment_check alignment_check
+#define trace_simd_coprocessor_error simd_coprocessor_error
+#define trace_async_page_fault async_page_fault
+#endif
+
dotraplinkage void do_divide_error(struct pt_regs *, long);
dotraplinkage void do_debug(struct pt_regs *, long);
dotraplinkage void do_nmi(struct pt_regs *, long);
@@ -55,6 +72,9 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
+#ifdef CONFIG_TRACING
+dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long);
+#endif
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long);
dotraplinkage void do_coprocessor_error(struct pt_regs *, long);
dotraplinkage void do_alignment_check(struct pt_regs *, long);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0dcb0c..0661abe 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1247,6 +1247,16 @@ return_to_handler:
*/
.pushsection .kprobes.text, "ax"

+#ifdef CONFIG_TRACING
+ENTRY(trace_page_fault)
+ RING0_EC_FRAME
+ ASM_CLAC
+ pushl_cfi $trace_do_page_fault
+ jmp error_code
+ CFI_ENDPROC
+END(trace_page_fault)
+#endif
+
ENTRY(page_fault)
RING0_EC_FRAME
ASM_CLAC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b077f4c..8b7b169 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1280,6 +1280,17 @@ ENTRY(\sym)
END(\sym)
.endm

+#ifdef CONFIG_TRACING
+.macro trace_errorentry sym do_sym
+errorentry trace(\sym) trace(\do_sym)
+errorentry \sym \do_sym
+.endm
+#else
+.macro trace_errorentry sym do_sym
+errorentry \sym \do_sym
+.endm
+#endif
+
/* error code is on the stack already */
.macro paranoiderrorentry sym do_sym
ENTRY(\sym)
@@ -1482,7 +1493,7 @@ zeroentry xen_int3 do_int3
errorentry xen_stack_segment do_stack_segment
#endif
errorentry general_protection do_general_protection
-errorentry page_fault do_page_fault
+trace_errorentry page_fault do_page_fault
#ifdef CONFIG_KVM_GUEST
errorentry async_page_fault do_async_page_fault
#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1be8e43..85126cc 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -162,7 +162,7 @@ asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
clear_bss();

for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
- set_intr_gate(i, &early_idt_handlers[i]);
+ set_intr_gate(i, early_idt_handlers[i]);
load_idt((const struct desc_ptr *)&idt_descr);

copy_bootdata(__va(real_mode_data));
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a0e2a8a..028d7a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -464,7 +464,7 @@ static struct notifier_block kvm_cpu_notifier = {

static void __init kvm_apf_trap_init(void)
{
- set_intr_gate(14, &async_page_fault);
+ set_intr_gate(14, async_page_fault);
}

void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8c8093b..1c9d0ad 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -713,7 +713,7 @@ void __init early_trap_init(void)
/* int3 can be called from all */
set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
#ifdef CONFIG_X86_32
- set_intr_gate(X86_TRAP_PF, &page_fault);
+ set_intr_gate(X86_TRAP_PF, page_fault);
#endif
load_idt(&idt_descr);
}
@@ -721,7 +721,7 @@ void __init early_trap_init(void)
void __init early_trap_pf_init(void)
{
#ifdef CONFIG_X86_64
- set_intr_gate(X86_TRAP_PF, &page_fault);
+ set_intr_gate(X86_TRAP_PF, page_fault);
#endif
}

@@ -737,30 +737,30 @@ void __init trap_init(void)
early_iounmap(p, 4);
#endif

- set_intr_gate(X86_TRAP_DE, &divide_error);
+ set_intr_gate(X86_TRAP_DE, divide_error);
set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
/* int4 can be called from all */
set_system_intr_gate(X86_TRAP_OF, &overflow);
- set_intr_gate(X86_TRAP_BR, &bounds);
- set_intr_gate(X86_TRAP_UD, &invalid_op);
- set_intr_gate(X86_TRAP_NM, &device_not_available);
+ set_intr_gate(X86_TRAP_BR, bounds);
+ set_intr_gate(X86_TRAP_UD, invalid_op);
+ set_intr_gate(X86_TRAP_NM, device_not_available);
#ifdef CONFIG_X86_32
set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
#else
set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
#endif
- set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun);
- set_intr_gate(X86_TRAP_TS, &invalid_TSS);
- set_intr_gate(X86_TRAP_NP, &segment_not_present);
+ set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
+ set_intr_gate(X86_TRAP_TS, invalid_TSS);
+ set_intr_gate(X86_TRAP_NP, segment_not_present);
set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK);
- set_intr_gate(X86_TRAP_GP, &general_protection);
- set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug);
- set_intr_gate(X86_TRAP_MF, &coprocessor_error);
- set_intr_gate(X86_TRAP_AC, &alignment_check);
+ set_intr_gate(X86_TRAP_GP, general_protection);
+ set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
+ set_intr_gate(X86_TRAP_MF, coprocessor_error);
+ set_intr_gate(X86_TRAP_AC, alignment_check);
#ifdef CONFIG_X86_MCE
set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
#endif
- set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error);
+ set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);

/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3aaeffc..fd3e281 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1231,3 +1231,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
__do_page_fault(regs, error_code);
exception_exit(prev_state);
}
+
+dotraplinkage void __kprobes
+trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+ enum ctx_state prev_state;
+
+ prev_state = exception_enter();
+ __do_page_fault(regs, error_code);
+ exception_exit(prev_state);
+}
--
1.8.3.1

2013-10-30 20:42:26

by Seiji Aguchi

[permalink] [raw]
Subject: [PATCH v4 3/4] Delete __trace_alloc_intr_gate()

Currently irq vector handlers for tracing are registered in both set_intr_gate()
and __trace_alloc_intr_gate() in alloc_intr_gate().
But, we don't need to do that twice.
So, let's delete __trace_alloc_intr_gate().

Signed-off-by: Seiji Aguchi <[email protected]>
---
arch/x86/include/asm/desc.h | 22 ----------------------
1 file changed, 22 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 3d73437..50d033a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -392,32 +392,10 @@ static inline void alloc_system_vector(int vector)
}
}

-#ifdef CONFIG_TRACING
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
- gate_desc s;
-
- pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
- write_idt_entry(trace_idt_table, gate, &s);
-}
-
-static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
-{
- trace_set_intr_gate(n, addr);
-}
-#else
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
-}
-
-#define __trace_alloc_intr_gate(n, addr)
-#endif
-
#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
set_intr_gate(n, addr); \
- __trace_alloc_intr_gate(n, trace_##addr); \
} while (0)

/*
--
1.8.3.1