Some background about why I think this might be useful.
When I was profiling some hugetlb related program, I got page-faults event
doubled when hugetlb is enabled. When I looked into the code, I found page-faults
come from two places, do_page_fault and gup. So, I tried to figure out which
play a role (or both) in my use case. But I can't find existing finer tracing
event for sub page-faults in current mainline kernel.
So, I added the gup trace points support to have finer tracing events for
page-faults. The below events are added:
__get_user_pages
__get_user_pages_fast
fixup_user_fault
Both __get_user_pages and fixup_user_fault call handle_mm_fault.
Just added trace points to raw version __get_user_pages since all variants
will call it finally to do real work.
Although __get_user_pages_fast doesn't call handle_mm_fault, it might be useful
to have it to distinguish between slow and fast version.
Yang Shi (7):
trace/events: Add gup trace events
mm/gup: add gup trace points
x86: mm/gup: add gup trace points
mips: mm/gup: add gup trace points
s390: mm/gup: add gup trace points
sh: mm/gup: add gup trace points
sparc64: mm/gup: add gup trace points
arch/mips/mm/gup.c | 7 +++++++
arch/s390/mm/gup.c | 7 +++++++
arch/sh/mm/gup.c | 8 ++++++++
arch/sparc/mm/gup.c | 8 ++++++++
arch/x86/mm/gup.c | 7 +++++++
include/trace/events/gup.h | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
mm/gup.c | 8 ++++++++
7 files changed, 122 insertions(+)
page-faults events record the invoke to handle_mm_fault, but the invoke
may come from do_page_fault or gup. In some use cases, the finer event count
mey be needed, so add trace events support for:
__get_user_pages
__get_user_pages_fast
fixup_user_fault
Signed-off-by: Yang Shi <[email protected]>
---
include/trace/events/gup.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
create mode 100644 include/trace/events/gup.h
diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
new file mode 100644
index 0000000..37d18f9
--- /dev/null
+++ b/include/trace/events/gup.h
@@ -0,0 +1,77 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gup
+
+#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GUP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(gup_fixup_user_fault,
+
+ TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long address, unsigned int fault_flags),
+
+ TP_ARGS(tsk, mm, address, fault_flags),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( unsigned long, address )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->address = address;
+ ),
+
+ TP_printk("comm=%s address=%lx", __entry->comm, __entry->address)
+);
+
+TRACE_EVENT(gup_get_user_pages,
+
+ TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas, int *nonblocking),
+
+ TP_ARGS(tsk, mm, start, nr_pages, gup_flags, pages, vmas, nonblocking),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( unsigned long, start )
+ __field( unsigned long, nr_pages )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->start = start;
+ __entry->nr_pages = nr_pages;
+ ),
+
+ TP_printk("comm=%s start=%lx nr_pages=%lu", __entry->comm, __entry->start, __entry->nr_pages)
+);
+
+TRACE_EVENT(gup_get_user_pages_fast,
+
+ TP_PROTO(unsigned long start, int nr_pages, int write,
+ struct page **pages),
+
+ TP_ARGS(start, nr_pages, write, pages),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, start )
+ __field( unsigned long, nr_pages )
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->nr_pages = nr_pages;
+ ),
+
+ TP_printk("start=%lx nr_pages=%lu", __entry->start, __entry->nr_pages)
+);
+
+#endif /* _TRACE_GUP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
2.0.2
For slow version, just add trace point for raw __get_user_pages since all
slow variants call it to do the real work finally.
Signed-off-by: Yang Shi <[email protected]>
---
mm/gup.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mm/gup.c b/mm/gup.c
index deafa2c..72de7af 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
#include <linux/rwsem.h>
#include <linux/hugetlb.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>
+
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -462,6 +465,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (!nr_pages)
return 0;
+ trace_gup_get_user_pages(tsk, mm, start, nr_pages, gup_flags,
+ pages, vmas, nonblocking);
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
/*
@@ -599,6 +604,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
if (!(vm_flags & vma->vm_flags))
return -EFAULT;
+ trace_gup_fixup_user_fault(tsk, mm, address, fault_flags);
ret = handle_mm_fault(mm, vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
@@ -1340,6 +1346,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* Disable interrupts. We use the nested form as we can already have
* interrupts disabled by get_futex_key.
--
2.0.2
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/x86/mm/gup.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index ae9a37b..ed6cca9 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -10,6 +10,9 @@
#include <linux/highmem.h>
#include <linux/swap.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>>
+
#include <asm/pgtable.h>
static inline pte_t gup_get_pte(pte_t *ptep)
@@ -270,6 +273,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
@@ -342,6 +347,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
goto slow_irqon;
#endif
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
--
2.0.2
Cc: Ralf Baechle <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/mips/mm/gup.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 349995d..3c5b8c8 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -12,6 +12,9 @@
#include <linux/swap.h>
#include <linux/hugetlb.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>
+
#include <asm/cpu-features.h>
#include <asm/pgtable.h>
@@ -211,6 +214,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
@@ -277,6 +282,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
if (end < start || cpu_has_dc_aliases)
goto slow_irqon;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/* XXX: batch / limit 'nr' */
local_irq_disable();
pgdp = pgd_offset(mm, addr);
--
2.0.2
Cc: Martin Schwidefsky <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/s390/mm/gup.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 12bbf0e..ac25e28 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -10,6 +10,10 @@
#include <linux/vmstat.h>
#include <linux/pagemap.h>
#include <linux/rwsem.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>
+
#include <asm/pgtable.h>
/*
@@ -188,6 +192,9 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
end = start + len;
if ((end <= start) || (end > TASK_SIZE))
return 0;
+
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* local_irq_save() doesn't prevent pagetable teardown, but does
* prevent the pagetables from being freed on s390.
--
2.0.2
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/sh/mm/gup.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index e7af6a6..6df3e97 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -12,6 +12,10 @@
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/highmem.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>
+
#include <asm/pgtable.h>
static inline pte_t gup_get_pte(pte_t *ptep)
@@ -178,6 +182,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables and pages from being freed.
@@ -231,6 +237,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
if (end < start)
goto slow_irqon;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
local_irq_disable();
pgdp = pgd_offset(mm, addr);
do {
--
2.0.2
Cc: "David S. Miller" <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
The context depends on the below patch:
https://www.mail-archive.com/[email protected]/msg1028752.html
arch/sparc/mm/gup.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index cf4fb47..6dcfc4d 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -10,6 +10,10 @@
#include <linux/vmstat.h>
#include <linux/pagemap.h>
#include <linux/rwsem.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>
+
#include <asm/pgtable.h>
/*
@@ -177,6 +181,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
@@ -209,6 +215,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
if (end < start)
goto slow_irqon;
+ trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
--
2.0.2
On Tue, 1 Dec 2015 15:06:11 -0800
Yang Shi <[email protected]> wrote:
> page-faults events record the invoke to handle_mm_fault, but the invoke
> may come from do_page_fault or gup. In some use cases, the finer event count
> mey be needed, so add trace events support for:
>
> __get_user_pages
> __get_user_pages_fast
> fixup_user_fault
>
> Signed-off-by: Yang Shi <[email protected]>
> ---
> include/trace/events/gup.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 77 insertions(+)
> create mode 100644 include/trace/events/gup.h
>
> diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
> new file mode 100644
> index 0000000..37d18f9
> --- /dev/null
> +++ b/include/trace/events/gup.h
> @@ -0,0 +1,77 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM gup
> +
> +#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_GUP_H
> +
> +#include <linux/types.h>
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(gup_fixup_user_fault,
> +
> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
> + unsigned long address, unsigned int fault_flags),
> +
> + TP_ARGS(tsk, mm, address, fault_flags),
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
Why save the comm? The tracing infrastructure should keep track of that.
> + __field( unsigned long, address )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
> + __entry->address = address;
> + ),
> +
> + TP_printk("comm=%s address=%lx", __entry->comm, __entry->address)
> +);
> +
> +TRACE_EVENT(gup_get_user_pages,
> +
> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
> + unsigned long start, unsigned long nr_pages,
> + unsigned int gup_flags, struct page **pages,
> + struct vm_area_struct **vmas, int *nonblocking),
> +
> + TP_ARGS(tsk, mm, start, nr_pages, gup_flags, pages, vmas, nonblocking),
Why so many arguments? Most are not used.
-- Steve
> +
> + TP_STRUCT__entry(
> + __array( char, comm, TASK_COMM_LEN )
> + __field( unsigned long, start )
> + __field( unsigned long, nr_pages )
> + ),
> +
> + TP_fast_assign(
> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
> + __entry->start = start;
> + __entry->nr_pages = nr_pages;
> + ),
> +
> + TP_printk("comm=%s start=%lx nr_pages=%lu", __entry->comm, __entry->start, __entry->nr_pages)
> +);
> +
> +TRACE_EVENT(gup_get_user_pages_fast,
> +
> + TP_PROTO(unsigned long start, int nr_pages, int write,
> + struct page **pages),
> +
> + TP_ARGS(start, nr_pages, write, pages),
> +
> + TP_STRUCT__entry(
> + __field( unsigned long, start )
> + __field( unsigned long, nr_pages )
> + ),
> +
> + TP_fast_assign(
> + __entry->start = start;
> + __entry->nr_pages = nr_pages;
> + ),
> +
> + TP_printk("start=%lx nr_pages=%lu", __entry->start, __entry->nr_pages)
> +);
> +
> +#endif /* _TRACE_GUP_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
On 12/1/2015 3:56 PM, Steven Rostedt wrote:
> On Tue, 1 Dec 2015 15:06:11 -0800
> Yang Shi <[email protected]> wrote:
>
>> page-faults events record the invoke to handle_mm_fault, but the invoke
>> may come from do_page_fault or gup. In some use cases, the finer event count
>> mey be needed, so add trace events support for:
>>
>> __get_user_pages
>> __get_user_pages_fast
>> fixup_user_fault
>>
>> Signed-off-by: Yang Shi <[email protected]>
>> ---
>> include/trace/events/gup.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 77 insertions(+)
>> create mode 100644 include/trace/events/gup.h
>>
>> diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
>> new file mode 100644
>> index 0000000..37d18f9
>> --- /dev/null
>> +++ b/include/trace/events/gup.h
>> @@ -0,0 +1,77 @@
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM gup
>> +
>> +#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_GUP_H
>> +
>> +#include <linux/types.h>
>> +#include <linux/tracepoint.h>
>> +
>> +TRACE_EVENT(gup_fixup_user_fault,
>> +
>> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
>> + unsigned long address, unsigned int fault_flags),
>> +
>> + TP_ARGS(tsk, mm, address, fault_flags),
>> +
>> + TP_STRUCT__entry(
>> + __array( char, comm, TASK_COMM_LEN )
>
> Why save the comm? The tracing infrastructure should keep track of that.
The code is referred to kmem.h which has comm copied. If it is
unnecessary, it definitely could be removed.
>
>> + __field( unsigned long, address )
>> + ),
>> +
>> + TP_fast_assign(
>> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
>> + __entry->address = address;
>> + ),
>> +
>> + TP_printk("comm=%s address=%lx", __entry->comm, __entry->address)
>> +);
>> +
>> +TRACE_EVENT(gup_get_user_pages,
>> +
>> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
>> + unsigned long start, unsigned long nr_pages,
>> + unsigned int gup_flags, struct page **pages,
>> + struct vm_area_struct **vmas, int *nonblocking),
>> +
>> + TP_ARGS(tsk, mm, start, nr_pages, gup_flags, pages, vmas, nonblocking),
>
> Why so many arguments? Most are not used.
My understanding to TP_ARGS may be not right. Doesn't it require all the
args defined by the function? If not, it could definitely be shrunk.
Just need keep the args used by TP_printk?
Thanks,
Yang
>
> -- Steve
>
>> +
>> + TP_STRUCT__entry(
>> + __array( char, comm, TASK_COMM_LEN )
>> + __field( unsigned long, start )
>> + __field( unsigned long, nr_pages )
>> + ),
>> +
>> + TP_fast_assign(
>> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
>> + __entry->start = start;
>> + __entry->nr_pages = nr_pages;
>> + ),
>> +
>> + TP_printk("comm=%s start=%lx nr_pages=%lu", __entry->comm, __entry->start, __entry->nr_pages)
>> +);
>> +
>> +TRACE_EVENT(gup_get_user_pages_fast,
>> +
>> + TP_PROTO(unsigned long start, int nr_pages, int write,
>> + struct page **pages),
>> +
>> + TP_ARGS(start, nr_pages, write, pages),
>> +
>> + TP_STRUCT__entry(
>> + __field( unsigned long, start )
>> + __field( unsigned long, nr_pages )
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->start = start;
>> + __entry->nr_pages = nr_pages;
>> + ),
>> +
>> + TP_printk("start=%lx nr_pages=%lu", __entry->start, __entry->nr_pages)
>> +);
>> +
>> +#endif /* _TRACE_GUP_H */
>> +
>> +/* This part must be outside protection */
>> +#include <trace/define_trace.h>
>
On Tue, 01 Dec 2015 16:07:44 -0800
"Shi, Yang" <[email protected]> wrote:
> On 12/1/2015 3:56 PM, Steven Rostedt wrote:
> > On Tue, 1 Dec 2015 15:06:11 -0800
> > Yang Shi <[email protected]> wrote:
> >
> >> page-faults events record the invoke to handle_mm_fault, but the invoke
> >> may come from do_page_fault or gup. In some use cases, the finer event count
> >> mey be needed, so add trace events support for:
> >>
> >> __get_user_pages
> >> __get_user_pages_fast
> >> fixup_user_fault
> >>
> >> Signed-off-by: Yang Shi <[email protected]>
> >> ---
> >> include/trace/events/gup.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++
> >> 1 file changed, 77 insertions(+)
> >> create mode 100644 include/trace/events/gup.h
> >>
> >> diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
> >> new file mode 100644
> >> index 0000000..37d18f9
> >> --- /dev/null
> >> +++ b/include/trace/events/gup.h
> >> @@ -0,0 +1,77 @@
> >> +#undef TRACE_SYSTEM
> >> +#define TRACE_SYSTEM gup
> >> +
> >> +#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
> >> +#define _TRACE_GUP_H
> >> +
> >> +#include <linux/types.h>
> >> +#include <linux/tracepoint.h>
> >> +
> >> +TRACE_EVENT(gup_fixup_user_fault,
> >> +
> >> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
> >> + unsigned long address, unsigned int fault_flags),
> >> +
> >> + TP_ARGS(tsk, mm, address, fault_flags),
> >> +
> >> + TP_STRUCT__entry(
> >> + __array( char, comm, TASK_COMM_LEN )
> >
> > Why save the comm? The tracing infrastructure should keep track of that.
>
> The code is referred to kmem.h which has comm copied. If it is
> unnecessary, it definitely could be removed.
Sometimes comm isn't that reliable. But really, the only tracepoint
that should record it is sched_switch, and sched_wakeup. With those
two, the rest of the trace points should be fine.
>
> >
> >> + __field( unsigned long, address )
> >> + ),
> >> +
> >> + TP_fast_assign(
> >> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
> >> + __entry->address = address;
> >> + ),
> >> +
> >> + TP_printk("comm=%s address=%lx", __entry->comm, __entry->address)
> >> +);
> >> +
> >> +TRACE_EVENT(gup_get_user_pages,
> >> +
> >> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
> >> + unsigned long start, unsigned long nr_pages,
> >> + unsigned int gup_flags, struct page **pages,
> >> + struct vm_area_struct **vmas, int *nonblocking),
> >> +
> >> + TP_ARGS(tsk, mm, start, nr_pages, gup_flags, pages, vmas, nonblocking),
> >
> > Why so many arguments? Most are not used.
>
> My understanding to TP_ARGS may be not right. Doesn't it require all the
> args defined by the function? If not, it could definitely be shrunk.
> Just need keep the args used by TP_printk?
It only needs what is used by TP_fast_assign().
-- Steve
On 12/1/2015 4:18 PM, Steven Rostedt wrote:
> On Tue, 01 Dec 2015 16:07:44 -0800
> "Shi, Yang" <[email protected]> wrote:
>
>> On 12/1/2015 3:56 PM, Steven Rostedt wrote:
>>> On Tue, 1 Dec 2015 15:06:11 -0800
>>> Yang Shi <[email protected]> wrote:
>>>
>>>> page-faults events record the invoke to handle_mm_fault, but the invoke
>>>> may come from do_page_fault or gup. In some use cases, the finer event count
>>>> mey be needed, so add trace events support for:
>>>>
>>>> __get_user_pages
>>>> __get_user_pages_fast
>>>> fixup_user_fault
>>>>
>>>> Signed-off-by: Yang Shi <[email protected]>
>>>> ---
>>>> include/trace/events/gup.h | 77 ++++++++++++++++++++++++++++++++++++++++++++++
>>>> 1 file changed, 77 insertions(+)
>>>> create mode 100644 include/trace/events/gup.h
>>>>
>>>> diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
>>>> new file mode 100644
>>>> index 0000000..37d18f9
>>>> --- /dev/null
>>>> +++ b/include/trace/events/gup.h
>>>> @@ -0,0 +1,77 @@
>>>> +#undef TRACE_SYSTEM
>>>> +#define TRACE_SYSTEM gup
>>>> +
>>>> +#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
>>>> +#define _TRACE_GUP_H
>>>> +
>>>> +#include <linux/types.h>
>>>> +#include <linux/tracepoint.h>
>>>> +
>>>> +TRACE_EVENT(gup_fixup_user_fault,
>>>> +
>>>> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
>>>> + unsigned long address, unsigned int fault_flags),
>>>> +
>>>> + TP_ARGS(tsk, mm, address, fault_flags),
>>>> +
>>>> + TP_STRUCT__entry(
>>>> + __array( char, comm, TASK_COMM_LEN )
>>>
>>> Why save the comm? The tracing infrastructure should keep track of that.
>>
>> The code is referred to kmem.h which has comm copied. If it is
>> unnecessary, it definitely could be removed.
>
> Sometimes comm isn't that reliable. But really, the only tracepoint
> that should record it is sched_switch, and sched_wakeup. With those
> two, the rest of the trace points should be fine.
>
>>
>>>
>>>> + __field( unsigned long, address )
>>>> + ),
>>>> +
>>>> + TP_fast_assign(
>>>> + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
>>>> + __entry->address = address;
>>>> + ),
>>>> +
>>>> + TP_printk("comm=%s address=%lx", __entry->comm, __entry->address)
>>>> +);
>>>> +
>>>> +TRACE_EVENT(gup_get_user_pages,
>>>> +
>>>> + TP_PROTO(struct task_struct *tsk, struct mm_struct *mm,
>>>> + unsigned long start, unsigned long nr_pages,
>>>> + unsigned int gup_flags, struct page **pages,
>>>> + struct vm_area_struct **vmas, int *nonblocking),
>>>> +
>>>> + TP_ARGS(tsk, mm, start, nr_pages, gup_flags, pages, vmas, nonblocking),
>>>
>>> Why so many arguments? Most are not used.
>>
>> My understanding to TP_ARGS may be not right. Doesn't it require all the
>> args defined by the function? If not, it could definitely be shrunk.
>> Just need keep the args used by TP_printk?
>
> It only needs what is used by TP_fast_assign().
Thanks, will fix them in V2.
Yang
>
> -- Steve
>
On Tue, Dec 01, 2015 at 03:06:14PM -0800, Yang Shi wrote:
> arch/mips/mm/gup.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
> index 349995d..3c5b8c8 100644
> --- a/arch/mips/mm/gup.c
> +++ b/arch/mips/mm/gup.c
> @@ -12,6 +12,9 @@
> #include <linux/swap.h>
> #include <linux/hugetlb.h>
>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/gup.h>
> +
> #include <asm/cpu-features.h>
> #include <asm/pgtable.h>
>
> @@ -211,6 +214,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> (void __user *)start, len)))
> return 0;
>
> + trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
> +
> /*
> * XXX: batch / limit 'nr', to avoid large irq off latency
> * needs some instrumenting to determine the common sizes used by
> @@ -277,6 +282,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
> if (end < start || cpu_has_dc_aliases)
> goto slow_irqon;
>
> + trace_gup_get_user_pages_fast(start, nr_pages, write, pages);
> +
> /* XXX: batch / limit 'nr' */
> local_irq_disable();
> pgdp = pgd_offset(mm, addr);
Acked-by: Ralf Baechle <[email protected]>
Please feel free to merge this upstream with the remainder of the
series once it's been acked.
Ralf