2014-12-03 07:48:45

by Joonsoo Kim

[permalink] [raw]
Subject: [PATCH 1/3] mm/compaction: enhance trace output to know more about compaction internals

It'd be useful to know where the both scanner is start. And, it also be
useful to know current range where compaction work. It will help to find
odd behaviour or problem on compaction.

Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/compaction.h | 2 +
include/trace/events/compaction.h | 79 +++++++++++++++++++++++++++----------
mm/compaction.c | 23 ++++++++---
3 files changed, 78 insertions(+), 26 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 3238ffa..a9547b6 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -12,6 +12,7 @@
#define COMPACT_PARTIAL 3
/* The full zone was compacted */
#define COMPACT_COMPLETE 4
+/* When adding new state, please change compaction_status_string, too */

/* Used to signal whether compaction detected need_sched() or lock contention */
/* No contention detected */
@@ -22,6 +23,7 @@
#define COMPACT_CONTENDED_LOCK 2

#ifdef CONFIG_COMPACTION
+extern char *compaction_status_string[];
extern int sysctl_compact_memory;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos);
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index c6814b9..139020b 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -11,39 +11,55 @@

DECLARE_EVENT_CLASS(mm_compaction_isolate_template,

- TP_PROTO(unsigned long nr_scanned,
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long nr_scanned,
unsigned long nr_taken),

- TP_ARGS(nr_scanned, nr_taken),
+ TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken),

TP_STRUCT__entry(
+ __field(unsigned long, start_pfn)
+ __field(unsigned long, end_pfn)
__field(unsigned long, nr_scanned)
__field(unsigned long, nr_taken)
),

TP_fast_assign(
+ __entry->start_pfn = start_pfn;
+ __entry->end_pfn = end_pfn;
__entry->nr_scanned = nr_scanned;
__entry->nr_taken = nr_taken;
),

- TP_printk("nr_scanned=%lu nr_taken=%lu",
+ TP_printk("range=(0x%lx ~ 0x%lx) nr_scanned=%lu nr_taken=%lu",
+ __entry->start_pfn,
+ __entry->end_pfn,
__entry->nr_scanned,
__entry->nr_taken)
);

DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_migratepages,

- TP_PROTO(unsigned long nr_scanned,
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long nr_scanned,
unsigned long nr_taken),

- TP_ARGS(nr_scanned, nr_taken)
+ TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken)
);

DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages,
- TP_PROTO(unsigned long nr_scanned,
+
+ TP_PROTO(
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long nr_scanned,
unsigned long nr_taken),

- TP_ARGS(nr_scanned, nr_taken)
+ TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken)
);

TRACE_EVENT(mm_compaction_migratepages,
@@ -85,46 +101,67 @@ TRACE_EVENT(mm_compaction_migratepages,
);

TRACE_EVENT(mm_compaction_begin,
- TP_PROTO(unsigned long zone_start, unsigned long migrate_start,
- unsigned long free_start, unsigned long zone_end),
+ TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
+ unsigned long free_pfn, unsigned long zone_end, bool sync),

- TP_ARGS(zone_start, migrate_start, free_start, zone_end),
+ TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync),

TP_STRUCT__entry(
__field(unsigned long, zone_start)
- __field(unsigned long, migrate_start)
- __field(unsigned long, free_start)
+ __field(unsigned long, migrate_pfn)
+ __field(unsigned long, free_pfn)
__field(unsigned long, zone_end)
+ __field(bool, sync)
),

TP_fast_assign(
__entry->zone_start = zone_start;
- __entry->migrate_start = migrate_start;
- __entry->free_start = free_start;
+ __entry->migrate_pfn = migrate_pfn;
+ __entry->free_pfn = free_pfn;
__entry->zone_end = zone_end;
+ __entry->sync = sync;
),

- TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu",
+ TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s",
__entry->zone_start,
- __entry->migrate_start,
- __entry->free_start,
- __entry->zone_end)
+ __entry->migrate_pfn,
+ __entry->free_pfn,
+ __entry->zone_end,
+ __entry->sync ? "sync" : "async")
);

TRACE_EVENT(mm_compaction_end,
- TP_PROTO(int status),
+ TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn,
+ unsigned long free_pfn, unsigned long zone_end, bool sync,
+ int status),

- TP_ARGS(status),
+ TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status),

TP_STRUCT__entry(
+ __field(unsigned long, zone_start)
+ __field(unsigned long, migrate_pfn)
+ __field(unsigned long, free_pfn)
+ __field(unsigned long, zone_end)
+ __field(bool, sync)
__field(int, status)
),

TP_fast_assign(
+ __entry->zone_start = zone_start;
+ __entry->migrate_pfn = migrate_pfn;
+ __entry->free_pfn = free_pfn;
+ __entry->zone_end = zone_end;
+ __entry->sync = sync;
__entry->status = status;
),

- TP_printk("status=%d", __entry->status)
+ TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s status=%s",
+ __entry->zone_start,
+ __entry->migrate_pfn,
+ __entry->free_pfn,
+ __entry->zone_end,
+ __entry->sync ? "sync" : "async",
+ compaction_status_string[__entry->status])
);

#endif /* _TRACE_COMPACTION_H */
diff --git a/mm/compaction.c b/mm/compaction.c
index a857225..4c7b837 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -19,6 +19,14 @@
#include "internal.h"

#ifdef CONFIG_COMPACTION
+char *compaction_status_string[] = {
+ "deferred",
+ "skipped",
+ "continue",
+ "partial",
+ "complete",
+};
+
static inline void count_compact_event(enum vm_event_item item)
{
count_vm_event(item);
@@ -421,11 +429,12 @@ isolate_fail:

}

+ trace_mm_compaction_isolate_freepages(*start_pfn, end_pfn,
+ nr_scanned, total_isolated);
+
/* Record how far we have got within the block */
*start_pfn = blockpfn;

- trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
-
/*
* If strict isolation is requested by CMA then check that all the
* pages requested were isolated. If there were any failures, 0 is
@@ -581,6 +590,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
unsigned long flags = 0;
bool locked = false;
struct page *page = NULL, *valid_page = NULL;
+ unsigned long start_pfn = low_pfn;

/*
* Ensure that there are not too many pages isolated from the LRU
@@ -741,7 +751,8 @@ isolate_success:
if (low_pfn == end_pfn)
update_pageblock_skip(cc, valid_page, nr_isolated, true);

- trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
+ trace_mm_compaction_isolate_migratepages(start_pfn, end_pfn,
+ nr_scanned, nr_isolated);

count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);
if (nr_isolated)
@@ -1196,7 +1207,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
}

- trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn);
+ trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
+ cc->free_pfn, end_pfn, sync);

migrate_prep_local();

@@ -1297,7 +1309,8 @@ out:
zone->compact_cached_free_pfn = free_pfn;
}

- trace_mm_compaction_end(ret);
+ trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
+ cc->free_pfn, end_pfn, sync, ret);

return ret;
}
--
1.7.9.5


2014-12-03 07:48:46

by Joonsoo Kim

[permalink] [raw]
Subject: [PATCH 3/3] mm/compaction: add tracepoint to observe behaviour of compaction defer

compaction deferring logic is heavy hammer that block the way to
the compaction. It doesn't consider overall system state, so it
could prevent user from doing compaction falsely. In other words,
even if system has enough range of memory to compact, compaction would be
skipped due to compaction deferring logic. This patch add new tracepoint
to understand work of deferring logic. This will also help to check
compaction success and fail.

Signed-off-by: Joonsoo Kim <[email protected]>
---
include/trace/events/compaction.h | 56 +++++++++++++++++++++++++++++++++++++
mm/compaction.c | 7 ++++-
2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 5e47cb2..673d59a 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -255,6 +255,62 @@ DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable,
TP_ARGS(zone, order, alloc_flags, classzone_idx, ret)
);

+DECLARE_EVENT_CLASS(mm_compaction_defer_template,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order),
+
+ TP_ARGS(zone, order),
+
+ TP_STRUCT__entry(
+ __field(char *, name)
+ __field(unsigned int, order)
+ __field(unsigned int, considered)
+ __field(unsigned int, defer_shift)
+ __field(int, order_failed)
+ ),
+
+ TP_fast_assign(
+ __entry->name = (char *)zone->name;
+ __entry->order = order;
+ __entry->considered = zone->compact_considered;
+ __entry->defer_shift = zone->compact_defer_shift;
+ __entry->order_failed = zone->compact_order_failed;
+ ),
+
+ TP_printk("zone=%-8s order=%u order_failed=%u reason=%s consider=%u limit=%lu",
+ __entry->name,
+ __entry->order,
+ __entry->order_failed,
+ __entry->order < __entry->order_failed ? "order" : "try",
+ __entry->considered,
+ 1UL << __entry->defer_shift)
+);
+
+DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_deffered,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order),
+
+ TP_ARGS(zone, order)
+);
+
+DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_compaction,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order),
+
+ TP_ARGS(zone, order)
+);
+
+DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order),
+
+ TP_ARGS(zone, order)
+);
+
#endif /* _TRACE_COMPACTION_H */

/* This part must be outside protection */
diff --git a/mm/compaction.c b/mm/compaction.c
index f5d2405..e005620 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1413,8 +1413,10 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
int status;
int zone_contended;

- if (compaction_deferred(zone, order))
+ if (compaction_deferred(zone, order)) {
+ trace_mm_compaction_deffered(zone, order);
continue;
+ }

status = compact_zone_order(zone, order, gfp_mask, mode,
&zone_contended, alloc_flags, classzone_idx);
@@ -1435,6 +1437,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
* succeeds in this zone.
*/
compaction_defer_reset(zone, order, false);
+ trace_mm_compaction_defer_reset(zone, order);
+
/*
* It is possible that async compaction aborted due to
* need_resched() and the watermarks were ok thanks to
@@ -1456,6 +1460,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
* succeeding after all, it will be reset.
*/
defer_compaction(zone, order);
+ trace_mm_compaction_defer_compaction(zone, order);
}

/*
--
1.7.9.5

2014-12-03 07:48:44

by Joonsoo Kim

[permalink] [raw]
Subject: [PATCH 2/3] mm/compaction: add more trace to understand compaction start/finish condition

It is not well analyzed that when compaction start and when compaction
finish. With this tracepoint for compaction start/finish condition, I can
find following bug.

http://www.spinics.net/lists/linux-mm/msg81582.html

Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/compaction.h | 2 +
include/trace/events/compaction.h | 91 +++++++++++++++++++++++++++++++++++++
mm/compaction.c | 40 ++++++++++++++--
3 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index a9547b6..bdb4b99 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -12,6 +12,8 @@
#define COMPACT_PARTIAL 3
/* The full zone was compacted */
#define COMPACT_COMPLETE 4
+/* For more detailed tracepoint output, will be converted to COMPACT_CONTINUE */
+#define COMPACT_NOT_SUITABLE 5
/* When adding new state, please change compaction_status_string, too */

/* Used to signal whether compaction detected need_sched() or lock contention */
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 139020b..5e47cb2 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -164,6 +164,97 @@ TRACE_EVENT(mm_compaction_end,
compaction_status_string[__entry->status])
);

+TRACE_EVENT(mm_compaction_try_to_compact_pages,
+
+ TP_PROTO(
+ unsigned int order,
+ gfp_t gfp_mask,
+ enum migrate_mode mode,
+ int alloc_flags,
+ int classzone_idx),
+
+ TP_ARGS(order, gfp_mask, mode, alloc_flags, classzone_idx),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, order)
+ __field(gfp_t, gfp_mask)
+ __field(enum migrate_mode, mode)
+ __field(int, alloc_flags)
+ __field(int, classzone_idx)
+ ),
+
+ TP_fast_assign(
+ __entry->order = order;
+ __entry->gfp_mask = gfp_mask;
+ __entry->mode = mode;
+ __entry->alloc_flags = alloc_flags;
+ __entry->classzone_idx = classzone_idx;
+ ),
+
+ TP_printk("order=%u gfp_mask=0x%x mode=%d alloc_flags=0x%x classzone_idx=%d",
+ __entry->order,
+ __entry->gfp_mask,
+ (int)__entry->mode,
+ __entry->alloc_flags,
+ __entry->classzone_idx)
+);
+
+DECLARE_EVENT_CLASS(mm_compaction_suitable_template,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order,
+ int alloc_flags,
+ int classzone_idx,
+ int ret),
+
+ TP_ARGS(zone, order, alloc_flags, classzone_idx, ret),
+
+ TP_STRUCT__entry(
+ __field(char *, name)
+ __field(unsigned int, order)
+ __field(int, alloc_flags)
+ __field(int, classzone_idx)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->name = (char *)zone->name;
+ __entry->order = order;
+ __entry->alloc_flags = alloc_flags;
+ __entry->classzone_idx = classzone_idx;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("zone=%-8s order=%u alloc_flags=0x%x classzone_idx=%d ret=%s",
+ __entry->name,
+ __entry->order,
+ __entry->alloc_flags,
+ __entry->classzone_idx,
+ compaction_status_string[__entry->ret])
+);
+
+DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_finished,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order,
+ int alloc_flags,
+ int classzone_idx,
+ int ret),
+
+ TP_ARGS(zone, order, alloc_flags, classzone_idx, ret)
+);
+
+DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable,
+
+ TP_PROTO(struct zone *zone,
+ unsigned int order,
+ int alloc_flags,
+ int classzone_idx,
+ int ret),
+
+ TP_ARGS(zone, order, alloc_flags, classzone_idx, ret)
+);
+
#endif /* _TRACE_COMPACTION_H */

/* This part must be outside protection */
diff --git a/mm/compaction.c b/mm/compaction.c
index 4c7b837..f5d2405 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -25,6 +25,7 @@ char *compaction_status_string[] = {
"continue",
"partial",
"complete",
+ "not_suitable_page",
};

static inline void count_compact_event(enum vm_event_item item)
@@ -1048,7 +1049,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
}

-static int compact_finished(struct zone *zone, struct compact_control *cc,
+static int __compact_finished(struct zone *zone, struct compact_control *cc,
const int migratetype)
{
unsigned int order;
@@ -1103,7 +1104,21 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
return COMPACT_PARTIAL;
}

- return COMPACT_CONTINUE;
+ return COMPACT_NOT_SUITABLE;
+}
+
+static int compact_finished(struct zone *zone, struct compact_control *cc,
+ const int migratetype)
+{
+ int ret;
+
+ ret = __compact_finished(zone, cc, migratetype);
+ trace_mm_compaction_finished(zone, cc->order, cc->alloc_flags,
+ cc->classzone_idx, ret);
+ if (ret == COMPACT_NOT_SUITABLE)
+ ret = COMPACT_CONTINUE;
+
+ return ret;
}

/*
@@ -1113,7 +1128,7 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
* COMPACT_PARTIAL - If the allocation would succeed without compaction
* COMPACT_CONTINUE - If compaction should run now
*/
-unsigned long compaction_suitable(struct zone *zone, int order,
+static unsigned long __compaction_suitable(struct zone *zone, int order,
int alloc_flags, int classzone_idx)
{
int fragindex;
@@ -1157,11 +1172,25 @@ unsigned long compaction_suitable(struct zone *zone, int order,
*/
fragindex = fragmentation_index(zone, order);
if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
- return COMPACT_SKIPPED;
+ return COMPACT_NOT_SUITABLE;

return COMPACT_CONTINUE;
}

+unsigned long compaction_suitable(struct zone *zone, int order,
+ int alloc_flags, int classzone_idx)
+{
+ unsigned long ret;
+
+ ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx);
+ trace_mm_compaction_suitable(zone, order, alloc_flags,
+ classzone_idx, ret);
+ if (ret == COMPACT_NOT_SUITABLE)
+ ret = COMPACT_SKIPPED;
+
+ return ret;
+}
+
static int compact_zone(struct zone *zone, struct compact_control *cc)
{
int ret;
@@ -1375,6 +1404,9 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
if (!order || !may_enter_fs || !may_perform_io)
return COMPACT_SKIPPED;

+ trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode,
+ alloc_flags, classzone_idx);
+
/* Compact each zone in the list */
for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
nodemask) {
--
1.7.9.5