2018-11-22 13:43:29

by Steven Rostedt

[permalink] [raw]
Subject: [RFC][PATCH 11/14] function_graph: Convert ret_stack to a series of longs

From: "Steven Rostedt (VMware)" <[email protected]>

In order to make it possible to have multiple callbacks registered with the
function_graph tracer, the retstack needs to be converted from an array of
ftrace_ret_stack structures to an array of longs. This will allow to store
the list of callbacks on the stack for the return side of the functions.

[ Note, this currently breaks architectures that access the ret_stack of a
task to handle unwinding when 'return_to_handler' is on the stack ]

Signed-off-by: Steven Rostedt (VMware) <[email protected]>
---
include/linux/sched.h | 2 +-
kernel/trace/fgraph.c | 123 +++++++++++++++++++++++-------------------
2 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d6183a55e8eb..71a084a300da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1119,7 +1119,7 @@ struct task_struct {
int curr_ret_depth;

/* Stack of return addresses for return function tracing: */
- struct ftrace_ret_stack *ret_stack;
+ unsigned long *ret_stack;

/* Timestamp for last schedule: */
unsigned long long ftrace_timestamp;
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 9b85638ecded..1389fe39f64c 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -23,6 +23,17 @@
#define ASSIGN_OPS_HASH(opsname, val)
#endif

+#define FGRAPH_RET_SIZE (sizeof(struct ftrace_ret_stack))
+#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
+#define SHADOW_STACK_SIZE (FTRACE_RETFUNC_DEPTH * FGRAPH_RET_SIZE)
+#define SHADOW_STACK_INDEX \
+ (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
+#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
+
+#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index]))
+#define RET_STACK_INC(c) ({ c += FGRAPH_RET_INDEX; })
+#define RET_STACK_DEC(c) ({ c -= FGRAPH_RET_INDEX; })
+
static bool kill_ftrace_graph;
int ftrace_graph_active;

@@ -59,6 +70,7 @@ static int
ftrace_push_return_trace(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
+ struct ftrace_ret_stack *ret_stack;
unsigned long long calltime;
int index;

@@ -75,23 +87,25 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
smp_rmb();

/* The return trace stack is full */
- if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ if (current->curr_ret_stack > SHADOW_STACK_MAX_INDEX) {
atomic_inc(&current->trace_overrun);
return -EBUSY;
}

calltime = trace_clock_local();

- index = ++current->curr_ret_stack;
+ index = current->curr_ret_stack;
+ RET_STACK_INC(current->curr_ret_stack);
+ ret_stack = RET_STACK(current, index);
barrier();
- current->ret_stack[index].ret = ret;
- current->ret_stack[index].func = func;
- current->ret_stack[index].calltime = calltime;
+ ret_stack->ret = ret;
+ ret_stack->func = func;
+ ret_stack->calltime = calltime;
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- current->ret_stack[index].fp = frame_pointer;
+ ret_stack->fp = frame_pointer;
#endif
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
- current->ret_stack[index].retp = retp;
+ ret_stack->retp = retp;
#endif
return 0;
}
@@ -114,7 +128,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,

return 0;
out_ret:
- current->curr_ret_stack--;
+ RET_STACK_DEC(current->curr_ret_stack);
out:
current->curr_ret_depth--;
return -EBUSY;
@@ -125,11 +139,13 @@ static void
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
unsigned long frame_pointer)
{
+ struct ftrace_ret_stack *ret_stack;
int index;

index = current->curr_ret_stack;
+ RET_STACK_DEC(index);

- if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
+ if (unlikely(index < 0 || index > SHADOW_STACK_MAX_INDEX)) {
ftrace_graph_stop();
WARN_ON(1);
/* Might as well panic, otherwise we have no where to go */
@@ -137,6 +153,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
return;
}

+ ret_stack = RET_STACK(current, index);
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
/*
* The arch may choose to record the frame pointer used
@@ -152,22 +169,22 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
* Note, -mfentry does not use frame pointers, and this test
* is not needed if CC_USING_FENTRY is set.
*/
- if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+ if (unlikely(ret_stack->fp != frame_pointer)) {
ftrace_graph_stop();
WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
" from func %ps return to %lx\n",
current->ret_stack[index].fp,
frame_pointer,
- (void *)current->ret_stack[index].func,
- current->ret_stack[index].ret);
+ (void *)ret_stack->func,
+ ret_stack->ret);
*ret = (unsigned long)panic;
return;
}
#endif

- *ret = current->ret_stack[index].ret;
- trace->func = current->ret_stack[index].func;
- trace->calltime = current->ret_stack[index].calltime;
+ *ret = ret_stack->ret;
+ trace->func = ret_stack->func;
+ trace->calltime = ret_stack->calltime;
trace->overrun = atomic_read(&current->trace_overrun);
trace->depth = current->curr_ret_depth--;
/*
@@ -221,7 +238,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
* curr_ret_stack is after that.
*/
barrier();
- current->curr_ret_stack--;
+ RET_STACK_DEC(current->curr_ret_stack);

if (unlikely(!ret)) {
ftrace_graph_stop();
@@ -236,12 +253,13 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
struct ftrace_ret_stack *
ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
{
- idx = current->curr_ret_stack - idx;
+ int index = task->curr_ret_stack;

- if (idx >= 0 && idx <= task->curr_ret_stack)
- return &current->ret_stack[idx];
+ index -= FGRAPH_RET_INDEX * (idx + 1);
+ if (index < 0)
+ return NULL;

- return NULL;
+ return RET_STACK(task, index);
}

/**
@@ -263,18 +281,20 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp)
{
+ struct ftrace_ret_stack *ret_stack;
int index = task->curr_ret_stack;
int i;

if (ret != (unsigned long)return_to_handler)
return ret;

- if (index < 0)
- return ret;
+ RET_STACK_DEC(index);

- for (i = 0; i <= index; i++)
- if (task->ret_stack[i].retp == retp)
- return task->ret_stack[i].ret;
+ for (i = index; i >= 0; RET_STACK_DEC(i)) {
+ ret_stack = RET_STACK(task, i);
+ if (ret_stack->retp == retp)
+ return ret_stack->ret;
+ }

return ret;
}
@@ -288,14 +308,15 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
return ret;

task_idx = task->curr_ret_stack;
+ RET_STACK_DEC(task_idx);

if (!task->ret_stack || task_idx < *idx)
return ret;

task_idx -= *idx;
- (*idx)++;
+ RET_STACK_INC(*idx);

- return task->ret_stack[task_idx].ret;
+ return RET_STACK(task, task_idx);
}
#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */

@@ -329,7 +350,7 @@ trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;

/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
-static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
+static int alloc_retstack_tasklist(unsigned long **ret_stack_list)
{
int i;
int ret = 0;
@@ -337,10 +358,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
struct task_struct *g, *t;

for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
- ret_stack_list[i] =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
+ ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack_list[i]) {
start = 0;
end = i;
@@ -359,9 +377,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
if (t->ret_stack == NULL) {
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
- t->curr_ret_stack = -1;
+ t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
- /* Make sure the tasks see the -1 first: */
+ /* Make sure the tasks see the 0 first: */
smp_wmb();
t->ret_stack = ret_stack_list[start++];
}
@@ -379,6 +397,7 @@ static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
+ struct ftrace_ret_stack *ret_stack;
unsigned long long timestamp;
int index;

@@ -403,8 +422,11 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
*/
timestamp -= next->ftrace_timestamp;

- for (index = next->curr_ret_stack; index >= 0; index--)
- next->ret_stack[index].calltime += timestamp;
+ for (index = next->curr_ret_stack - FGRAPH_RET_INDEX; index >= 0; ) {
+ ret_stack = RET_STACK(next, index);
+ ret_stack->calltime += timestamp;
+ index -= FGRAPH_RET_INDEX;
+ }
}

static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
@@ -447,10 +469,10 @@ void update_function_graph_func(void)
ftrace_graph_entry = __ftrace_graph_entry;
}

-static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+static DEFINE_PER_CPU(unsigned long *, idle_ret_stack);

static void
-graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+graph_init_task(struct task_struct *t, unsigned long *ret_stack)
{
atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
@@ -466,7 +488,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
*/
void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
{
- t->curr_ret_stack = -1;
+ t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
/*
* The idle task has no parent, it either has its own
@@ -476,14 +498,11 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));

if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
+ unsigned long *ret_stack;

ret_stack = per_cpu(idle_ret_stack, cpu);
if (!ret_stack) {
- ret_stack =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
+ ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack)
return;
per_cpu(idle_ret_stack, cpu) = ret_stack;
@@ -497,15 +516,13 @@ void ftrace_graph_init_task(struct task_struct *t)
{
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
- t->curr_ret_stack = -1;
+ t->curr_ret_stack = 0;
t->curr_ret_depth = -1;

if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
+ unsigned long *ret_stack;

- ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
+ ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack)
return;
graph_init_task(t, ret_stack);
@@ -514,7 +531,7 @@ void ftrace_graph_init_task(struct task_struct *t)

void ftrace_graph_exit_task(struct task_struct *t)
{
- struct ftrace_ret_stack *ret_stack = t->ret_stack;
+ unsigned long *ret_stack = t->ret_stack;

t->ret_stack = NULL;
/* NULL must become visible to IRQs before we free it: */
@@ -526,12 +543,10 @@ void ftrace_graph_exit_task(struct task_struct *t)
/* Allocate a return stack for each task */
static int start_graph_tracing(void)
{
- struct ftrace_ret_stack **ret_stack_list;
+ unsigned long **ret_stack_list;
int ret, cpu;

- ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
- sizeof(struct ftrace_ret_stack *),
- GFP_KERNEL);
+ ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);

if (!ret_stack_list)
return -ENOMEM;
--
2.19.1




2018-11-24 09:04:00

by Joel Fernandes

[permalink] [raw]
Subject: Re: [RFC][PATCH 11/14] function_graph: Convert ret_stack to a series of longs

On Wed, Nov 21, 2018 at 08:27:19PM -0500, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <[email protected]>
>
> In order to make it possible to have multiple callbacks registered with the
> function_graph tracer, the retstack needs to be converted from an array of
> ftrace_ret_stack structures to an array of longs. This will allow to store
> the list of callbacks on the stack for the return side of the functions.
>
> [ Note, this currently breaks architectures that access the ret_stack of a
> task to handle unwinding when 'return_to_handler' is on the stack ]
>
> Signed-off-by: Steven Rostedt (VMware) <[email protected]>
> ---
> include/linux/sched.h | 2 +-
> kernel/trace/fgraph.c | 123 +++++++++++++++++++++++-------------------
> 2 files changed, 70 insertions(+), 55 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d6183a55e8eb..71a084a300da 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1119,7 +1119,7 @@ struct task_struct {
> int curr_ret_depth;
>
> /* Stack of return addresses for return function tracing: */
> - struct ftrace_ret_stack *ret_stack;
> + unsigned long *ret_stack;
>
> /* Timestamp for last schedule: */
> unsigned long long ftrace_timestamp;
> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> index 9b85638ecded..1389fe39f64c 100644
> --- a/kernel/trace/fgraph.c
> +++ b/kernel/trace/fgraph.c
> @@ -23,6 +23,17 @@
> #define ASSIGN_OPS_HASH(opsname, val)
> #endif
>
> +#define FGRAPH_RET_SIZE (sizeof(struct ftrace_ret_stack))
> +#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
> +#define SHADOW_STACK_SIZE (FTRACE_RETFUNC_DEPTH * FGRAPH_RET_SIZE)
> +#define SHADOW_STACK_INDEX \
> + (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
> +#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
> +
> +#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index]))
> +#define RET_STACK_INC(c) ({ c += FGRAPH_RET_INDEX; })
> +#define RET_STACK_DEC(c) ({ c -= FGRAPH_RET_INDEX; })
> +
[...]
> @@ -514,7 +531,7 @@ void ftrace_graph_init_task(struct task_struct *t)
>
> void ftrace_graph_exit_task(struct task_struct *t)
> {
> - struct ftrace_ret_stack *ret_stack = t->ret_stack;
> + unsigned long *ret_stack = t->ret_stack;
>
> t->ret_stack = NULL;
> /* NULL must become visible to IRQs before we free it: */
> @@ -526,12 +543,10 @@ void ftrace_graph_exit_task(struct task_struct *t)
> /* Allocate a return stack for each task */
> static int start_graph_tracing(void)
> {
> - struct ftrace_ret_stack **ret_stack_list;
> + unsigned long **ret_stack_list;
> int ret, cpu;
>
> - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
> - sizeof(struct ftrace_ret_stack *),
> - GFP_KERNEL);
> + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
>

I had dumped the fgraph size related macros to understand the patch better, I
got:
[ 0.909528] val of FGRAPH_RET_SIZE is 40
[ 0.910250] val of FGRAPH_RET_INDEX is 5
[ 0.910866] val of FGRAPH_ARRAY_SIZE is 16
[ 0.911488] val of FGRAPH_ARRAY_MASK is 255
[ 0.912134] val of FGRAPH_MAX_INDEX is 16
[ 0.912751] val of FGRAPH_INDEX_SHIFT is 8
[ 0.913382] val of FGRAPH_FRAME_SIZE is 168
[ 0.914033] val of FGRAPH_FRAME_INDEX is 21
FTRACE_RETFUNC_DEPTH is 50
[ 0.914686] val of SHADOW_STACK_SIZE is 8400

I had a concern about memory overhead per-task. It seems the total memory
needed per task for the stack is 8400 bytes (with my configuration with
FUNCTION_PROFILE
turned off).

Where as before it would be 32 * 40 = 1280 bytes. That looks like ~7 times
more than before.

On my system with ~4000 threads, that becomes ~32MB which seems a bit
wasteful especially if there was only one or 2 function graph callbacks
registered and most of the callback array in the stack isn't used.

Could we make the array size configurable at compile time and start it with a
small number like 4 or 6?

Also for patches 1 through 10:
Reviewed-by: Joel Fernandes (Google) <[email protected]>

thanks,

- Joel


2018-11-26 16:13:46

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [RFC][PATCH 11/14] function_graph: Convert ret_stack to a series of longs

On Fri, 23 Nov 2018 21:31:38 -0800
Joel Fernandes <[email protected]> wrote:

> On Wed, Nov 21, 2018 at 08:27:19PM -0500, Steven Rostedt wrote:
> > From: "Steven Rostedt (VMware)" <[email protected]>
> >
> > In order to make it possible to have multiple callbacks registered with the
> > function_graph tracer, the retstack needs to be converted from an array of
> > ftrace_ret_stack structures to an array of longs. This will allow to store
> > the list of callbacks on the stack for the return side of the functions.
> >
> > [ Note, this currently breaks architectures that access the ret_stack of a
> > task to handle unwinding when 'return_to_handler' is on the stack ]
> >
> > Signed-off-by: Steven Rostedt (VMware) <[email protected]>
> > ---
> > include/linux/sched.h | 2 +-
> > kernel/trace/fgraph.c | 123 +++++++++++++++++++++++-------------------
> > 2 files changed, 70 insertions(+), 55 deletions(-)
> >
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index d6183a55e8eb..71a084a300da 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -1119,7 +1119,7 @@ struct task_struct {
> > int curr_ret_depth;
> >
> > /* Stack of return addresses for return function tracing: */
> > - struct ftrace_ret_stack *ret_stack;
> > + unsigned long *ret_stack;
> >
> > /* Timestamp for last schedule: */
> > unsigned long long ftrace_timestamp;
> > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> > index 9b85638ecded..1389fe39f64c 100644
> > --- a/kernel/trace/fgraph.c
> > +++ b/kernel/trace/fgraph.c
> > @@ -23,6 +23,17 @@
> > #define ASSIGN_OPS_HASH(opsname, val)
> > #endif
> >
> > +#define FGRAPH_RET_SIZE (sizeof(struct ftrace_ret_stack))
> > +#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
> > +#define SHADOW_STACK_SIZE (FTRACE_RETFUNC_DEPTH * FGRAPH_RET_SIZE)
> > +#define SHADOW_STACK_INDEX \
> > + (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
> > +#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
> > +
> > +#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index]))
> > +#define RET_STACK_INC(c) ({ c += FGRAPH_RET_INDEX; })
> > +#define RET_STACK_DEC(c) ({ c -= FGRAPH_RET_INDEX; })
> > +
> [...]
> > @@ -514,7 +531,7 @@ void ftrace_graph_init_task(struct task_struct *t)
> >
> > void ftrace_graph_exit_task(struct task_struct *t)
> > {
> > - struct ftrace_ret_stack *ret_stack = t->ret_stack;
> > + unsigned long *ret_stack = t->ret_stack;
> >
> > t->ret_stack = NULL;
> > /* NULL must become visible to IRQs before we free it: */
> > @@ -526,12 +543,10 @@ void ftrace_graph_exit_task(struct task_struct *t)
> > /* Allocate a return stack for each task */
> > static int start_graph_tracing(void)
> > {
> > - struct ftrace_ret_stack **ret_stack_list;
> > + unsigned long **ret_stack_list;
> > int ret, cpu;
> >
> > - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
> > - sizeof(struct ftrace_ret_stack *),
> > - GFP_KERNEL);
> > + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
> >
>
> I had dumped the fgraph size related macros to understand the patch better, I
> got:
> [ 0.909528] val of FGRAPH_RET_SIZE is 40
> [ 0.910250] val of FGRAPH_RET_INDEX is 5
> [ 0.910866] val of FGRAPH_ARRAY_SIZE is 16
> [ 0.911488] val of FGRAPH_ARRAY_MASK is 255
> [ 0.912134] val of FGRAPH_MAX_INDEX is 16
> [ 0.912751] val of FGRAPH_INDEX_SHIFT is 8
> [ 0.913382] val of FGRAPH_FRAME_SIZE is 168
> [ 0.914033] val of FGRAPH_FRAME_INDEX is 21
> FTRACE_RETFUNC_DEPTH is 50
> [ 0.914686] val of SHADOW_STACK_SIZE is 8400
>
> I had a concern about memory overhead per-task. It seems the total memory
> needed per task for the stack is 8400 bytes (with my configuration with
> FUNCTION_PROFILE
> turned off).
>
> Where as before it would be 32 * 40 = 1280 bytes. That looks like ~7 times
> more than before.

Hmm, this seems too big... I thought the shadow-stack size should be
smaller than 1 page (4kB). Steve, can we give a 4k page for shadow stack
and define FTRACE_RETFUNC_DEPTH = 4096 / FGRAPH_RET_SIZE ?

> On my system with ~4000 threads, that becomes ~32MB which seems a bit
> wasteful especially if there was only one or 2 function graph callbacks
> registered and most of the callback array in the stack isn't used.
>
> Could we make the array size configurable at compile time and start it with a
> small number like 4 or 6?

Or, we can introduce online setting :)

Thank you,

>
> Also for patches 1 through 10:
> Reviewed-by: Joel Fernandes (Google) <[email protected]>
>
> thanks,
>
> - Joel
>


--
Masami Hiramatsu <[email protected]>

2018-11-26 16:29:30

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC][PATCH 11/14] function_graph: Convert ret_stack to a series of longs

On Tue, 27 Nov 2018 01:07:55 +0900
Masami Hiramatsu <[email protected]> wrote:

> > > --- a/include/linux/sched.h
> > > +++ b/include/linux/sched.h
> > > @@ -1119,7 +1119,7 @@ struct task_struct {
> > > int curr_ret_depth;
> > >
> > > /* Stack of return addresses for return function tracing: */
> > > - struct ftrace_ret_stack *ret_stack;
> > > + unsigned long *ret_stack;
> > >
> > > /* Timestamp for last schedule: */
> > > unsigned long long ftrace_timestamp;
> > > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> > > index 9b85638ecded..1389fe39f64c 100644
> > > --- a/kernel/trace/fgraph.c
> > > +++ b/kernel/trace/fgraph.c
> > > @@ -23,6 +23,17 @@
> > > #define ASSIGN_OPS_HASH(opsname, val)
> > > #endif
> > >
> > > +#define FGRAPH_RET_SIZE (sizeof(struct ftrace_ret_stack))
> > > +#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
> > > +#define SHADOW_STACK_SIZE (FTRACE_RETFUNC_DEPTH * FGRAPH_RET_SIZE)
> > > +#define SHADOW_STACK_INDEX \
> > > + (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
> > > +#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
> > > +
> > > +#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index]))
> > > +#define RET_STACK_INC(c) ({ c += FGRAPH_RET_INDEX; })
> > > +#define RET_STACK_DEC(c) ({ c -= FGRAPH_RET_INDEX; })
> > > +
> > [...]
> > > @@ -514,7 +531,7 @@ void ftrace_graph_init_task(struct task_struct *t)
> > >
> > > void ftrace_graph_exit_task(struct task_struct *t)
> > > {
> > > - struct ftrace_ret_stack *ret_stack = t->ret_stack;
> > > + unsigned long *ret_stack = t->ret_stack;
> > >
> > > t->ret_stack = NULL;
> > > /* NULL must become visible to IRQs before we free it: */
> > > @@ -526,12 +543,10 @@ void ftrace_graph_exit_task(struct task_struct *t)
> > > /* Allocate a return stack for each task */
> > > static int start_graph_tracing(void)
> > > {
> > > - struct ftrace_ret_stack **ret_stack_list;
> > > + unsigned long **ret_stack_list;
> > > int ret, cpu;
> > >
> > > - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
> > > - sizeof(struct ftrace_ret_stack *),
> > > - GFP_KERNEL);
> > > + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
> > >
> >
> > I had dumped the fgraph size related macros to understand the patch better, I
> > got:
> > [ 0.909528] val of FGRAPH_RET_SIZE is 40
> > [ 0.910250] val of FGRAPH_RET_INDEX is 5
> > [ 0.910866] val of FGRAPH_ARRAY_SIZE is 16
> > [ 0.911488] val of FGRAPH_ARRAY_MASK is 255
> > [ 0.912134] val of FGRAPH_MAX_INDEX is 16
> > [ 0.912751] val of FGRAPH_INDEX_SHIFT is 8
> > [ 0.913382] val of FGRAPH_FRAME_SIZE is 168
> > [ 0.914033] val of FGRAPH_FRAME_INDEX is 21
> > FTRACE_RETFUNC_DEPTH is 50
> > [ 0.914686] val of SHADOW_STACK_SIZE is 8400
> >
> > I had a concern about memory overhead per-task. It seems the total memory
> > needed per task for the stack is 8400 bytes (with my configuration with
> > FUNCTION_PROFILE
> > turned off).
> >
> > Where as before it would be 32 * 40 = 1280 bytes. That looks like ~7 times
> > more than before.
>
> Hmm, this seems too big... I thought the shadow-stack size should be
> smaller than 1 page (4kB). Steve, can we give a 4k page for shadow stack
> and define FTRACE_RETFUNC_DEPTH = 4096 / FGRAPH_RET_SIZE ?

For the first pass, I decided not to worry about the size. It made the
code less complex :-)

Yes, I plan on working on making the size of the stack smaller, but
that will probably be added on patches to do so.

>
> > On my system with ~4000 threads, that becomes ~32MB which seems a bit
> > wasteful especially if there was only one or 2 function graph callbacks
> > registered and most of the callback array in the stack isn't used.

Note, all 4000 threads could be doing those trace backs, and if you are
doing full function graph tracing, it will use a lot.

> >
> > Could we make the array size configurable at compile time and start it with a
> > small number like 4 or 6?
>
> Or, we can introduce online setting :)

Yes, that can easily be added. I didn't try to make this into the
perfect solution, I wanted a solid one first, and then massage it into
something that is more efficient, both with memory consumption and
performance.

Joel and Masami, thanks for the feedback.

-- Steve

2018-11-26 21:32:57

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC][PATCH 11/14] function_graph: Convert ret_stack to a series of longs

On Fri, 23 Nov 2018 21:31:38 -0800
Joel Fernandes <[email protected]> wrote:

> Also for patches 1 through 10:
> Reviewed-by: Joel Fernandes (Google) <[email protected]>

Thanks!

I'll move these over to my ftrace/core branch and start testing them
for linux-next. The rest can probably wait till the next merge window
after that.

-- Steve

2018-11-28 01:39:52

by Joel Fernandes

[permalink] [raw]
Subject: Re: [RFC][PATCH 11/14] function_graph: Convert ret_stack to a series of longs

On Mon, Nov 26, 2018 at 11:26:03AM -0500, Steven Rostedt wrote:
> On Tue, 27 Nov 2018 01:07:55 +0900
> Masami Hiramatsu <[email protected]> wrote:
>
> > > > --- a/include/linux/sched.h
> > > > +++ b/include/linux/sched.h
> > > > @@ -1119,7 +1119,7 @@ struct task_struct {
> > > > int curr_ret_depth;
> > > >
> > > > /* Stack of return addresses for return function tracing: */
> > > > - struct ftrace_ret_stack *ret_stack;
> > > > + unsigned long *ret_stack;
> > > >
> > > > /* Timestamp for last schedule: */
> > > > unsigned long long ftrace_timestamp;
> > > > diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> > > > index 9b85638ecded..1389fe39f64c 100644
> > > > --- a/kernel/trace/fgraph.c
> > > > +++ b/kernel/trace/fgraph.c
> > > > @@ -23,6 +23,17 @@
> > > > #define ASSIGN_OPS_HASH(opsname, val)
> > > > #endif
> > > >
> > > > +#define FGRAPH_RET_SIZE (sizeof(struct ftrace_ret_stack))
> > > > +#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
> > > > +#define SHADOW_STACK_SIZE (FTRACE_RETFUNC_DEPTH * FGRAPH_RET_SIZE)
> > > > +#define SHADOW_STACK_INDEX \
> > > > + (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
> > > > +#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
> > > > +
> > > > +#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index]))
> > > > +#define RET_STACK_INC(c) ({ c += FGRAPH_RET_INDEX; })
> > > > +#define RET_STACK_DEC(c) ({ c -= FGRAPH_RET_INDEX; })
> > > > +
> > > [...]
> > > > @@ -514,7 +531,7 @@ void ftrace_graph_init_task(struct task_struct *t)
> > > >
> > > > void ftrace_graph_exit_task(struct task_struct *t)
> > > > {
> > > > - struct ftrace_ret_stack *ret_stack = t->ret_stack;
> > > > + unsigned long *ret_stack = t->ret_stack;
> > > >
> > > > t->ret_stack = NULL;
> > > > /* NULL must become visible to IRQs before we free it: */
> > > > @@ -526,12 +543,10 @@ void ftrace_graph_exit_task(struct task_struct *t)
> > > > /* Allocate a return stack for each task */
> > > > static int start_graph_tracing(void)
> > > > {
> > > > - struct ftrace_ret_stack **ret_stack_list;
> > > > + unsigned long **ret_stack_list;
> > > > int ret, cpu;
> > > >
> > > > - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
> > > > - sizeof(struct ftrace_ret_stack *),
> > > > - GFP_KERNEL);
> > > > + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
> > > >
> > >
> > > I had dumped the fgraph size related macros to understand the patch better, I
> > > got:
> > > [ 0.909528] val of FGRAPH_RET_SIZE is 40
> > > [ 0.910250] val of FGRAPH_RET_INDEX is 5
> > > [ 0.910866] val of FGRAPH_ARRAY_SIZE is 16
> > > [ 0.911488] val of FGRAPH_ARRAY_MASK is 255
> > > [ 0.912134] val of FGRAPH_MAX_INDEX is 16
> > > [ 0.912751] val of FGRAPH_INDEX_SHIFT is 8
> > > [ 0.913382] val of FGRAPH_FRAME_SIZE is 168
> > > [ 0.914033] val of FGRAPH_FRAME_INDEX is 21
> > > FTRACE_RETFUNC_DEPTH is 50
> > > [ 0.914686] val of SHADOW_STACK_SIZE is 8400
> > >
> > > I had a concern about memory overhead per-task. It seems the total memory
> > > needed per task for the stack is 8400 bytes (with my configuration with
> > > FUNCTION_PROFILE
> > > turned off).
> > >
> > > Where as before it would be 32 * 40 = 1280 bytes. That looks like ~7 times
> > > more than before.
> >
> > Hmm, this seems too big... I thought the shadow-stack size should be
> > smaller than 1 page (4kB). Steve, can we give a 4k page for shadow stack
> > and define FTRACE_RETFUNC_DEPTH = 4096 / FGRAPH_RET_SIZE ?
>
> For the first pass, I decided not to worry about the size. It made the
> code less complex :-)
>
> Yes, I plan on working on making the size of the stack smaller, but
> that will probably be added on patches to do so.

Cool, sounds good.

> > > On my system with ~4000 threads, that becomes ~32MB which seems a bit
> > > wasteful especially if there was only one or 2 function graph callbacks
> > > registered and most of the callback array in the stack isn't used.
>
> Note, all 4000 threads could be doing those trace backs, and if you are
> doing full function graph tracing, it will use a lot.

But I think each of the threads will only use N entries in the callback array
where N is the number of function graph callback users who registered, right?
So the remaining total-N allocated callback array entries per thread will not
be used.

> > > Could we make the array size configurable at compile time and start it with a
> > > small number like 4 or 6?
> >
> > Or, we can introduce online setting :)
>
> Yes, that can easily be added. I didn't try to make this into the
> perfect solution, I wanted a solid one first, and then massage it into
> something that is more efficient, both with memory consumption and
> performance.
>
> Joel and Masami, thanks for the feedback.

I agree the first step is good so far. Looking forward to the patches, thanks
a lot,

- Joel