v5:
* Fixed a typo introduced by v4 rebase
* Removed redundant "#define CREATE_TRACE_POINTS" from architecture specifc
gup.c
v4:
* Adopted Steven's suggestion to use "unsigned int" for nr_pages to save
space in ring buffer since it is unlikely to have more than 0xffffffff
pages are touched by gup in one invoke
* Remove unnecessray type cast
v3:
* Adopted suggestion from Dave Hansen to move the gup header include to the last
* Adopted comments from Steven:
- Use DECLARE_EVENT_CLASS and DEFINE_EVENT
- Just keep necessary TP_ARGS
* Moved archtichture specific fall-backable fast version trace point after the
do while loop since it may jump to the slow version.
* Not implement recording return value since Steven plans to have it in generic
tracing code
v2:
* Adopted commetns from Steven
- remove all reference to tsk->comm since it is unnecessary for non-sched
trace points
- reduce arguments for __get_user_pages trace point and update mm/gup.c
accordingly
* Added Ralf's acked-by for patch 4/7.
Some background about why I think this might be useful.
When I was profiling some hugetlb related program, I got page-faults event
doubled when hugetlb is enabled. When I looked into the code, I found page-faults
come from two places, do_page_fault and gup. So, I tried to figure out which
play a role (or both) in my use case. But I can't find existing finer tracing
event for sub page-faults in current mainline kernel.
So, I added the gup trace points support to have finer tracing events for
page-faults. The below events are added:
__get_user_pages
__get_user_pages_fast
fixup_user_fault
Both __get_user_pages and fixup_user_fault call handle_mm_fault.
Just added trace points to raw version __get_user_pages since all variants
will call it finally to do real work.
Although __get_user_pages_fast doesn't call handle_mm_fault, it might be useful
to have it to distinguish between slow and fast version.
page-faults events record the invoke to handle_mm_fault, but the invoke
may come from do_page_fault or gup. In some use cases, the finer event count
mey be needed, so add trace events support for:
__get_user_pages
__get_user_pages_fast
fixup_user_fault
Signed-off-by: Yang Shi <[email protected]>
---
include/trace/events/gup.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
create mode 100644 include/trace/events/gup.h
diff --git a/include/trace/events/gup.h b/include/trace/events/gup.h
new file mode 100644
index 0000000..ac0e011
--- /dev/null
+++ b/include/trace/events/gup.h
@@ -0,0 +1,63 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gup
+
+#if !defined(_TRACE_GUP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GUP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(gup_fixup_user_fault,
+
+ TP_PROTO(unsigned long address),
+
+ TP_ARGS(address),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, address )
+ ),
+
+ TP_fast_assign(
+ __entry->address = address;
+ ),
+
+ TP_printk("address=%lx", __entry->address)
+);
+
+DECLARE_EVENT_CLASS(gup,
+
+ TP_PROTO(unsigned long start, unsigned int nr_pages),
+
+ TP_ARGS(start, nr_pages),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, start )
+ __field( unsigned int, nr_pages )
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->nr_pages = nr_pages;
+ ),
+
+ TP_printk("start=%lx nr_pages=%d", __entry->start, __entry->nr_pages)
+);
+
+DEFINE_EVENT(gup, gup_get_user_pages,
+
+ TP_PROTO(unsigned long start, unsigned int nr_pages),
+
+ TP_ARGS(start, nr_pages)
+);
+
+DEFINE_EVENT(gup, gup_get_user_pages_fast,
+
+ TP_PROTO(unsigned long start, unsigned int nr_pages),
+
+ TP_ARGS(start, nr_pages)
+);
+
+#endif /* _TRACE_GUP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
2.0.2
For slow version, just add trace point for raw __get_user_pages since all
slow variants call it to do the real work finally.
Signed-off-by: Yang Shi <[email protected]>
---
mm/gup.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mm/gup.c b/mm/gup.c
index deafa2c..00a3cff 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -18,6 +18,9 @@
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/gup.h>
+
static struct page *no_page_table(struct vm_area_struct *vma,
unsigned int flags)
{
@@ -462,6 +465,8 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (!nr_pages)
return 0;
+ trace_gup_get_user_pages(start, nr_pages);
+
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
/*
@@ -599,6 +604,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
if (!(vm_flags & vma->vm_flags))
return -EFAULT;
+ trace_gup_fixup_user_fault(address);
ret = handle_mm_fault(mm, vma, address, fault_flags);
if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM)
@@ -1340,6 +1346,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
/*
* Disable interrupts. We use the nested form as we can already have
* interrupts disabled by get_futex_key.
--
2.0.2
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/x86/mm/gup.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index ae9a37b..df5f3ab 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -12,6 +12,8 @@
#include <asm/pgtable.h>
+#include <trace/events/gup.h>
+
static inline pte_t gup_get_pte(pte_t *ptep)
{
#ifndef CONFIG_X86_PAE
@@ -270,6 +272,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
@@ -373,6 +377,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
--
2.0.2
Cc: [email protected]
Acked-by: Ralf Baechle <[email protected]>
Signed-off-by: Yang Shi <[email protected]>
---
arch/mips/mm/gup.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 349995d..7d90883 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -15,6 +15,8 @@
#include <asm/cpu-features.h>
#include <asm/pgtable.h>
+#include <trace/events/gup.h>
+
static inline pte_t gup_get_pte(pte_t *ptep)
{
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -211,6 +213,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
@@ -291,6 +295,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
slow:
--
2.0.2
Cc: Martin Schwidefsky <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/s390/mm/gup.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 12bbf0e..a1d5db7 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -12,6 +12,8 @@
#include <linux/rwsem.h>
#include <asm/pgtable.h>
+#include <trace/events/gup.h>
+
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
@@ -188,6 +190,9 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
end = start + len;
if ((end <= start) || (end > TASK_SIZE))
return 0;
+
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
/*
* local_irq_save() doesn't prevent pagetable teardown, but does
* prevent the pagetables from being freed on s390.
--
2.0.2
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/sh/mm/gup.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index e7af6a6..dc80480 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -14,6 +14,8 @@
#include <linux/highmem.h>
#include <asm/pgtable.h>
+#include <trace/events/gup.h>
+
static inline pte_t gup_get_pte(pte_t *ptep)
{
#ifndef CONFIG_X2TLB
@@ -178,6 +180,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
(void __user *)start, len)))
return 0;
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables and pages from being freed.
@@ -244,6 +248,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
--
2.0.2
Cc: "David S. Miller" <[email protected]>
Cc: [email protected]
Signed-off-by: Yang Shi <[email protected]>
---
arch/sparc/mm/gup.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 2e5c4fc..5a06c34 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -12,6 +12,8 @@
#include <linux/rwsem.h>
#include <asm/pgtable.h>
+#include <trace/events/gup.h>
+
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
@@ -174,6 +176,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
@@ -236,6 +240,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
local_irq_enable();
+ trace_gup_get_user_pages_fast(start, nr_pages);
+
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
--
2.0.2