From: Steven Rostedt <[email protected]>
Impact: more efficient code for ftrace graph tracer
This patch uses the dynamic patching, when available, to patch
the function graph code into the kernel.
This patch will ease the way for letting both function tracing
and function graph tracing run together.
Signed-off-by: Steven Rostedt <[email protected]>
---
arch/x86/kernel/entry_32.S | 5 ++++
arch/x86/kernel/ftrace.c | 48 ++++++++++++++++++++++++++++++++++++++++++-
include/linux/ftrace.h | 5 ++++
kernel/trace/ftrace.c | 35 ++++++++++++++-----------------
4 files changed, 72 insertions(+), 21 deletions(-)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 71cdda1..37feecf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1185,6 +1185,11 @@ ftrace_call:
popl %edx
popl %ecx
popl %eax
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
.globl ftrace_stub
ftrace_stub:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 26b2d92..7ef914e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -111,7 +111,6 @@ static void ftrace_mod_code(void)
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
-
}
void ftrace_nmi_enter(void)
@@ -325,7 +324,51 @@ int __init ftrace_dyn_arch_init(void *data)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#ifndef CONFIG_DYNAMIC_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+
+static int ftrace_mod_jmp(unsigned long ip,
+ int old_offset, int new_offset)
+{
+ unsigned char code[MCOUNT_INSN_SIZE];
+
+ if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
+ return -EINVAL;
+
+ *(int *)(&code[1]) = new_offset;
+
+ if (do_ftrace_mod_code(ip, &code))
+ return -EPERM;
+
+ return 0;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+#else /* CONFIG_DYNAMIC_FTRACE */
/*
* These functions are picked from those used on
@@ -343,6 +386,7 @@ void ftrace_nmi_exit(void)
{
atomic_dec(&in_nmi);
}
+
#endif /* !CONFIG_DYNAMIC_FTRACE */
/* Add a function return address to the trace stack on thread info.*/
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fc2d549..f9792c0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -117,6 +117,11 @@ extern void ftrace_call(void);
extern void mcount_call(void);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
extern void ftrace_graph_caller(void);
+extern int ftrace_enable_ftrace_graph_caller(void);
+extern int ftrace_disable_ftrace_graph_caller(void);
+#else
+static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; }
+static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
#endif
/**
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 00d98c6..5f7c864 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -281,6 +281,8 @@ enum {
FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
FTRACE_ENABLE_MCOUNT = (1 << 3),
FTRACE_DISABLE_MCOUNT = (1 << 4),
+ FTRACE_START_FUNC_RET = (1 << 5),
+ FTRACE_STOP_FUNC_RET = (1 << 6),
};
static int ftrace_filtered;
@@ -465,14 +467,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
unsigned long ip, fl;
unsigned long ftrace_addr;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
- ftrace_addr = (unsigned long)ftrace_caller;
- else
- ftrace_addr = (unsigned long)ftrace_graph_caller;
-#else
ftrace_addr = (unsigned long)ftrace_caller;
-#endif
ip = rec->ip;
@@ -605,6 +600,11 @@ static int __ftrace_modify_code(void *data)
if (*command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
+ if (*command & FTRACE_START_FUNC_RET)
+ ftrace_enable_ftrace_graph_caller();
+ else if (*command & FTRACE_STOP_FUNC_RET)
+ ftrace_disable_ftrace_graph_caller();
+
return 0;
}
@@ -629,10 +629,8 @@ static void ftrace_startup_enable(int command)
ftrace_run_update_code(command);
}
-static void ftrace_startup(void)
+static void ftrace_startup(int command)
{
- int command = 0;
-
if (unlikely(ftrace_disabled))
return;
@@ -645,10 +643,8 @@ static void ftrace_startup(void)
mutex_unlock(&ftrace_start_lock);
}
-static void ftrace_shutdown(void)
+static void ftrace_shutdown(int command)
{
- int command = 0;
-
if (unlikely(ftrace_disabled))
return;
@@ -1453,8 +1449,9 @@ device_initcall(ftrace_nodyn_init);
static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
-# define ftrace_startup() do { } while (0)
-# define ftrace_shutdown() do { } while (0)
+/* Keep as macros so we do not need to define the commands */
+# define ftrace_startup(command) do { } while (0)
+# define ftrace_shutdown(command) do { } while (0)
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
#endif /* CONFIG_DYNAMIC_FTRACE */
@@ -1585,7 +1582,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
}
ret = __register_ftrace_function(ops);
- ftrace_startup();
+ ftrace_startup(0);
out:
mutex_unlock(&ftrace_sysctl_lock);
@@ -1604,7 +1601,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
mutex_lock(&ftrace_sysctl_lock);
ret = __unregister_ftrace_function(ops);
- ftrace_shutdown();
+ ftrace_shutdown(0);
mutex_unlock(&ftrace_sysctl_lock);
return ret;
@@ -1751,7 +1748,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
ftrace_tracing_type = FTRACE_TYPE_RETURN;
ftrace_graph_return = retfunc;
ftrace_graph_entry = entryfunc;
- ftrace_startup();
+ ftrace_startup(FTRACE_START_FUNC_RET);
out:
mutex_unlock(&ftrace_sysctl_lock);
@@ -1765,7 +1762,7 @@ void unregister_ftrace_graph(void)
atomic_dec(&ftrace_graph_active);
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub;
- ftrace_shutdown();
+ ftrace_shutdown(FTRACE_STOP_FUNC_RET);
/* Restore normal tracing type */
ftrace_tracing_type = FTRACE_TYPE_ENTER;
--
1.5.6.5
--
On Wed, 26 Nov 2008 00:16:24 -0500 Steven Rostedt <[email protected]> wrote:
> From: Steven Rostedt <[email protected]>
>
> Impact: more efficient code for ftrace graph tracer
>
> This patch uses the dynamic patching, when available, to patch
> the function graph code into the kernel.
>
> This patch will ease the way for letting both function tracing
> and function graph tracing run together.
>
> ...
>
> +static int ftrace_mod_jmp(unsigned long ip,
> + int old_offset, int new_offset)
> +{
> + unsigned char code[MCOUNT_INSN_SIZE];
> +
> + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
> + return -EFAULT;
> +
> + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
erk. I suspect that there's a nicer way of doing this amongst our
forest of get_unaligned_foo() interfaces. Harvey will know.
> + return -EINVAL;
> +
> + *(int *)(&code[1]) = new_offset;
Might be able to use put_unaligned_foo() here.
The problem is that these functions use sizeof(*ptr) to work out what
to do, so a cast is still needed. A get_unaligned32(ptr) would be
nice. One which takes a void* and assumes CPU ordering.
> + if (do_ftrace_mod_code(ip, &code))
> + return -EPERM;
> +
> + return 0;
> +}
> +
On Tue, 2008-11-25 at 21:35 -0800, Andrew Morton wrote:
> On Wed, 26 Nov 2008 00:16:24 -0500 Steven Rostedt <[email protected]> wrote:
>
> > From: Steven Rostedt <[email protected]>
> >
> > Impact: more efficient code for ftrace graph tracer
> >
> > This patch uses the dynamic patching, when available, to patch
> > the function graph code into the kernel.
> >
> > This patch will ease the way for letting both function tracing
> > and function graph tracing run together.
> >
> > ...
> >
> > +static int ftrace_mod_jmp(unsigned long ip,
> > + int old_offset, int new_offset)
> > +{
> > + unsigned char code[MCOUNT_INSN_SIZE];
> > +
> > + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
> > + return -EFAULT;
> > +
> > + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
>
> erk. I suspect that there's a nicer way of doing this amongst our
> forest of get_unaligned_foo() interfaces. Harvey will know.
>
if (code[0] != 0xe9 || old_offset != get_unaligned((int *)(&code[1])))
> > + return -EINVAL;
> > +
> > + *(int *)(&code[1]) = new_offset;
>
> Might be able to use put_unaligned_foo() here.
>
put_unaligned(new_offset, (int *)(&code[1]));
> The problem is that these functions use sizeof(*ptr) to work out what
> to do, so a cast is still needed. A get_unaligned32(ptr) would be
> nice. One which takes a void* and assumes CPU ordering.
I've been thinking similarly, I could investigate something that
goes in with the _noalign stuff?
I'll finish the documentation patch for the _noalign stuff and then see
about doing the host-order bits to fit in as well.
Harvey
On Tue, 25 Nov 2008 22:52:29 -0800 Harvey Harrison <[email protected]> wrote:
> On Tue, 2008-11-25 at 21:35 -0800, Andrew Morton wrote:
> > On Wed, 26 Nov 2008 00:16:24 -0500 Steven Rostedt <[email protected]> wrote:
> >
> > > From: Steven Rostedt <[email protected]>
> > >
> > > Impact: more efficient code for ftrace graph tracer
> > >
> > > This patch uses the dynamic patching, when available, to patch
> > > the function graph code into the kernel.
> > >
> > > This patch will ease the way for letting both function tracing
> > > and function graph tracing run together.
> > >
> > > ...
> > >
> > > +static int ftrace_mod_jmp(unsigned long ip,
> > > + int old_offset, int new_offset)
> > > +{
> > > + unsigned char code[MCOUNT_INSN_SIZE];
> > > +
> > > + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
> > > + return -EFAULT;
> > > +
> > > + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
> >
> > erk. I suspect that there's a nicer way of doing this amongst our
> > forest of get_unaligned_foo() interfaces. Harvey will know.
> >
>
> if (code[0] != 0xe9 || old_offset != get_unaligned((int *)(&code[1])))
urgh, OK, that didn't really improve anything except to document
something which was already rather obvious.
> > > + return -EINVAL;
> > > +
> > > + *(int *)(&code[1]) = new_offset;
> >
> > Might be able to use put_unaligned_foo() here.
> >
>
> put_unaligned(new_offset, (int *)(&code[1]));
>
> > The problem is that these functions use sizeof(*ptr) to work out what
> > to do, so a cast is still needed. A get_unaligned32(ptr) would be
> > nice. One which takes a void* and assumes CPU ordering.
>
> I've been thinking similarly, I could investigate something that
> goes in with the _noalign stuff?
If it's a commonly used pattern (you'd know better than I) then sure,
it would be clean to have
u32 just_gimme_the_u32_at(void *this_address);
> I'll finish the documentation patch for the _noalign stuff and then see
> about doing the host-order bits to fit in as well.
>
> Harvey
On Wed, 2008-11-26 at 00:04 -0800, Andrew Morton wrote:
> On Tue, 25 Nov 2008 22:52:29 -0800 Harvey Harrison <[email protected]> wrote:
> > > > + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
> > >
> > > erk. I suspect that there's a nicer way of doing this amongst our
> > > forest of get_unaligned_foo() interfaces. Harvey will know.
> > >
> >
> > if (code[0] != 0xe9 || old_offset != get_unaligned((int *)(&code[1])))
>
> urgh, OK, that didn't really improve anything except to document
> something which was already rather obvious.
>
Yeah, it really isn't that great. It could be somewhat nicer if we had
the API for host-endian taking void and being explicit about the size:
if (code[0] != 0xe9 || old_offset != load32_noalign(&code[1]))
This is similar to the new API in -mm load_le32_noalign, but I
don't think it would be worth load_u32_noalign...load32 should
be enough.
> > > > + return -EINVAL;
> > > > +
> > > > + *(int *)(&code[1]) = new_offset;
> > >
> > > Might be able to use put_unaligned_foo() here.
> > >
> >
> > put_unaligned(new_offset, (int *)(&code[1]));
> >
In a similar vein to above, this becomes:
store32_noalign(&code[1], new_offset);
I rather like this as it would allow most (if not all) of the figure
out what size based on pointer type versions to be chucked.
Turns out all the pieces are there in -mm that the patch is pretty
trivial (not for application, just a heads-up):
include/asm-generic/unaligned.h | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h
index d2f3998..5a055f7 100644
--- a/include/asm-generic/unaligned.h
+++ b/include/asm-generic/unaligned.h
@@ -343,9 +343,23 @@ extern void __bad_unaligned_access_size(void);
#ifdef __LITTLE_ENDIAN
# define get_unaligned __get_unaligned_le
# define put_unaligned __put_unaligned_le
+
+# define load16_noalign(p) load_le16_noalign((void *)(p))
+# define load32_noalign(p) load_le32_noalign((void *)(p))
+# define load64_noalign(p) load_le64_noalign((void *)(p))
+# define store16_noalign(p, val) store_le16_noalign((void *)(p), (val))
+# define store32_noalign(p, val) store_le32_noalign((void *)(p), (val))
+# define store64_noalign(p, val) store_le64_noalign((void *)(p), (val))
#else
# define get_unaligned __get_unaligned_be
# define put_unaligned __put_unaligned_be
+
+# define load16_noalign(p) load_be16_noalign((void *)(p))
+# define load32_noalign(p) load_be32_noalign((void *)(p))
+# define load64_noalign(p) load_be64_noalign((void *)(p))
+# define store16_noalign(p, val) store_be16_noalign((void *)(p), (val))
+# define store32_noalign(p, val) store_be32_noalign((void *)(p), (val))
+# define store64_noalign(p, val) store_be64_noalign((void *)(p), (val))
#endif
#endif /* _ASM_GENERIC_UNALIGNED_H */
--
1.6.0.4.1044.g77718
On Wed, 26 Nov 2008 00:27:04 -0800 Harvey Harrison <[email protected]> wrote:
> if (code[0] != 0xe9 || old_offset != load32_noalign(&code[1]))
>
> This is similar to the new API in -mm load_le32_noalign, but I
> don't think it would be worth load_u32_noalign...load32 should
> be enough.
>
> > > > > + return -EINVAL;
> > > > > +
> > > > > + *(int *)(&code[1]) = new_offset;
> > > >
> > > > Might be able to use put_unaligned_foo() here.
> > > >
> > >
> > > put_unaligned(new_offset, (int *)(&code[1]));
> > >
>
> In a similar vein to above, this becomes:
>
> store32_noalign(&code[1], new_offset);
>
yes, that's much better than the party tricks with magical sizeof,
which forces you to run around and check the types of everything.
I've seen people doing get_user() on `long' types and such things
occasionally.
On Wed, 2008-11-26 at 00:35 -0800, Andrew Morton wrote:
> On Wed, 26 Nov 2008 00:27:04 -0800 Harvey Harrison <[email protected]> wrote:
>
> > if (code[0] != 0xe9 || old_offset != load32_noalign(&code[1]))
> >
> > This is similar to the new API in -mm load_le32_noalign, but I
> > don't think it would be worth load_u32_noalign...load32 should
> > be enough.
> >
> > > > > > + return -EINVAL;
> > > > > > +
> > > > > > + *(int *)(&code[1]) = new_offset;
> > > > >
> > > > > Might be able to use put_unaligned_foo() here.
> > > > >
> > > >
> > > > put_unaligned(new_offset, (int *)(&code[1]));
> > > >
> >
> > In a similar vein to above, this becomes:
> >
> > store32_noalign(&code[1], new_offset);
> >
>
> yes, that's much better than the party tricks with magical sizeof,
> which forces you to run around and check the types of everything.
>
> I've seen people doing get_user() on `long' types and such things
> occasionally.
>
Do you want to carry the patches to move to the new helpers until they
hit mainline, or would you rather I waited until they can go through
maintainer trees?
I've got the pile ready removing all of the get/put_{endian} and moving
to the load/store API. Not much left after that to just remove the
magical sizeof versions too. Just let me know what timing you'd prefer.
Also, all of this ends up being so intertwined in the aligned/unaligned
cases that I'd like to move most of Documentation/unaligned_memory_access.txt
into a new alignment_and_byteorder.txt to cover all of these new helpers.
I started most of a byteorder document, but constantly referring to the other
file made it a bit tiresome, would you mind a consolidated document?
Harvey
On Wed, 26 Nov 2008 00:44:48 -0800 Harvey Harrison <[email protected]> wrote:
> On Wed, 2008-11-26 at 00:35 -0800, Andrew Morton wrote:
> > On Wed, 26 Nov 2008 00:27:04 -0800 Harvey Harrison <[email protected]> wrote:
> >
> > > if (code[0] != 0xe9 || old_offset != load32_noalign(&code[1]))
> > >
> > > This is similar to the new API in -mm load_le32_noalign, but I
> > > don't think it would be worth load_u32_noalign...load32 should
> > > be enough.
> > >
> > > > > > > + return -EINVAL;
> > > > > > > +
> > > > > > > + *(int *)(&code[1]) = new_offset;
> > > > > >
> > > > > > Might be able to use put_unaligned_foo() here.
> > > > > >
> > > > >
> > > > > put_unaligned(new_offset, (int *)(&code[1]));
> > > > >
> > >
> > > In a similar vein to above, this becomes:
> > >
> > > store32_noalign(&code[1], new_offset);
> > >
> >
> > yes, that's much better than the party tricks with magical sizeof,
> > which forces you to run around and check the types of everything.
> >
> > I've seen people doing get_user() on `long' types and such things
> > occasionally.
> >
>
> Do you want to carry the patches to move to the new helpers until they
> hit mainline, or would you rather I waited until they can go through
> maintainer trees?
I don't know what that means, really.
> I've got the pile ready removing all of the get/put_{endian} and moving
> to the load/store API. Not much left after that to just remove the
> magical sizeof versions too. Just let me know what timing you'd prefer.
I don't understand the dependencies and relationships here.
If you have a bunch of patches which are applicable to current mainline
then just spray 'em out with suitable cc's and we'll see which bits
stick where. Or is it more complicated than that?
> Also, all of this ends up being so intertwined in the aligned/unaligned
> cases that I'd like to move most of Documentation/unaligned_memory_access.txt
> into a new alignment_and_byteorder.txt to cover all of these new helpers.
>
> I started most of a byteorder document, but constantly referring to the other
> file made it a bit tiresome, would you mind a consolidated document?
Whatever you think is best. Propose something...
On Wed, 2008-11-26 at 01:05 -0800, Andrew Morton wrote:
> >
> > Do you want to carry the patches to move to the new helpers until they
> > hit mainline, or would you rather I waited until they can go through
> > maintainer trees?
>
> I don't know what that means, really.
>
> > I've got the pile ready removing all of the get/put_{endian} and moving
> > to the load/store API. Not much left after that to just remove the
> > magical sizeof versions too. Just let me know what timing you'd prefer.
>
> I don't understand the dependencies and relationships here.
The pile I refer to depends on the full set of patches in -mm which
includes (I can give all the patch names if you want):
1) byteorder patches moving all remaining arches to use
include/linux/byteorder.h
2) unaligned access patches consolidating arches in
asm-generic/unaligned.h with no changes
3) move the memset-using arches and ARM to the asm-generic version
by moving __packed onto the struct rather than the struct members
4) introduce the load/store_{endian} API
Timing:
1) - 2) I hope all gets into mainline in 2.6.29.
3) I believe could go in 2.6.29, and I'm pretty confident it is OK after
talking to some compiler folks.
4) can go in with 3)
My pile is just removing users of get_unaligned/put_unaligned/{endian}_to_cpup
and using the new typesafe versions. At the end of the series we remove the old API.
>
> If you have a bunch of patches which are applicable to current mainline
> then just spray 'em out with suitable cc's and we'll see which bits
> stick where. Or is it more complicated than that?
See above, not suitable for mainline.
>
> > Also, all of this ends up being so intertwined in the aligned/unaligned
> > cases that I'd like to move most of Documentation/unaligned_memory_access.txt
> > into a new alignment_and_byteorder.txt to cover all of these new helpers.
> >
> > I started most of a byteorder document, but constantly referring to the other
> > file made it a bit tiresome, would you mind a consolidated document?
>
> Whatever you think is best. Propose something...
Thinking some more, I'll just dump the new endian docs into unaligned_access.txt
and that at least gets the documentation out there. Later on it be renamed to
something more reflective of its contents.
Harvey
On Tue, 25 Nov 2008, Andrew Morton wrote:
> On Wed, 26 Nov 2008 00:16:24 -0500 Steven Rostedt <[email protected]> wrote:
>
> > From: Steven Rostedt <[email protected]>
> >
> > Impact: more efficient code for ftrace graph tracer
> >
> > This patch uses the dynamic patching, when available, to patch
> > the function graph code into the kernel.
> >
> > This patch will ease the way for letting both function tracing
> > and function graph tracing run together.
> >
> > ...
> >
> > +static int ftrace_mod_jmp(unsigned long ip,
> > + int old_offset, int new_offset)
> > +{
> > + unsigned char code[MCOUNT_INSN_SIZE];
> > +
> > + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
> > + return -EFAULT;
> > +
> > + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
>
> erk. I suspect that there's a nicer way of doing this amongst our
> forest of get_unaligned_foo() interfaces. Harvey will know.
Hmm, I may be able to make a struct out of code.
struct {
unsigned char op;
unsigned int offset;
} code __attribute__((packed));
Would that look better?
>
> > + return -EINVAL;
> > +
> > + *(int *)(&code[1]) = new_offset;
>
> Might be able to use put_unaligned_foo() here.
>
> The problem is that these functions use sizeof(*ptr) to work out what
> to do, so a cast is still needed. A get_unaligned32(ptr) would be
> nice. One which takes a void* and assumes CPU ordering.
Is there a correctness concern here? This is arch specific code, so I'm
not worried about other archs.
-- Steve
>
> > + if (do_ftrace_mod_code(ip, &code))
> > + return -EPERM;
> > +
> > + return 0;
> > +}
> > +
>
>
>
On Wed, 26 Nov 2008 11:46:58 -0500 (EST) Steven Rostedt <[email protected]> wrote:
>
>
> On Tue, 25 Nov 2008, Andrew Morton wrote:
>
> > On Wed, 26 Nov 2008 00:16:24 -0500 Steven Rostedt <[email protected]> wrote:
> >
> > > From: Steven Rostedt <[email protected]>
> > >
> > > Impact: more efficient code for ftrace graph tracer
> > >
> > > This patch uses the dynamic patching, when available, to patch
> > > the function graph code into the kernel.
> > >
> > > This patch will ease the way for letting both function tracing
> > > and function graph tracing run together.
> > >
> > > ...
> > >
> > > +static int ftrace_mod_jmp(unsigned long ip,
> > > + int old_offset, int new_offset)
> > > +{
> > > + unsigned char code[MCOUNT_INSN_SIZE];
> > > +
> > > + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
> > > + return -EFAULT;
> > > +
> > > + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
> >
> > erk. I suspect that there's a nicer way of doing this amongst our
> > forest of get_unaligned_foo() interfaces. Harvey will know.
>
> Hmm, I may be able to make a struct out of code.
>
> struct {
> unsigned char op;
> unsigned int offset;
> } code __attribute__((packed));
>
> Would that look better?
nah, let's do something more generic for this.
> >
> > > + return -EINVAL;
> > > +
> > > + *(int *)(&code[1]) = new_offset;
> >
> > Might be able to use put_unaligned_foo() here.
> >
> > The problem is that these functions use sizeof(*ptr) to work out what
> > to do, so a cast is still needed. A get_unaligned32(ptr) would be
> > nice. One which takes a void* and assumes CPU ordering.
>
> Is there a correctness concern here? This is arch specific code, so I'm
> not worried about other archs.
No, the code is OK as-is.
It's just that "read a word from an [maybe-]unaligned address" is such
a common operation that there should be a nice clean simple function to
do it, rather than doing open-coded (and different) weird C tricks at each
and\ every site.
On Wed, 2008-11-26 at 10:02 -0800, Andrew Morton wrote:
> > >
> > > > + return -EINVAL;
> > > > +
> > > > + *(int *)(&code[1]) = new_offset;
> > >
> > > Might be able to use put_unaligned_foo() here.
> > >
> > > The problem is that these functions use sizeof(*ptr) to work out what
> > > to do, so a cast is still needed. A get_unaligned32(ptr) would be
> > > nice. One which takes a void* and assumes CPU ordering.
> >
> > Is there a correctness concern here? This is arch specific code, so I'm
> > not worried about other archs.
>
> No, the code is OK as-is.
>
> It's just that "read a word from an [maybe-]unaligned address" is such
> a common operation that there should be a nice clean simple function to
> do it, rather than doing open-coded (and different) weird C tricks at each
> and\ every site.
>
Also it is arch-specific, so if you know unaligned access is OK, just doing the
cast+deref is OK.
Harvey