2009-04-16 02:19:58

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH 1/5] ftrace: use module notifier for function tracer

From: Steven Rostedt <[email protected]>

Impact: fix and clean up

The hooks in the module code for the function tracer must be called
before any of that module code runs. The function tracer hooks
modify the module (replacing calls to mcount to nops). If the code
is executed while the change occurs, then the CPU can take a GPF.

To handle the above with a bit of paranoia, I originally implemented
the hooks as calls directly from the module code.

After examining the notifier calls, it looks as though the start up
notify is called before any of the module's code is executed. This makes
the use of the notify safe with ftrace.

Only the startup notify is required to be "safe". The shutdown simply
removes the entries from the ftrace function list, and does not modify
any code.

This change has another benefit. It removes a issue with a reverse dependency
in the mutexes of ftrace_lock and module_mutex.

Cc: Rusty Russell <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
---
include/linux/ftrace.h | 7 ----
include/linux/module.h | 4 ++
kernel/module.c | 19 ++++------
kernel/trace/ftrace.c | 90 ++++++++++++++++++++++++++++++++++--------------
4 files changed, 75 insertions(+), 45 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 53869be..97c83e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);

extern int skip_trace(unsigned long ip);

-extern void ftrace_release(void *start, unsigned long size);
-
extern void ftrace_disable_daemon(void);
extern void ftrace_enable_daemon(void);
#else
@@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)

#ifdef CONFIG_FTRACE_MCOUNT_RECORD
extern void ftrace_init(void);
-extern void ftrace_init_module(struct module *mod,
- unsigned long *start, unsigned long *end);
#else
static inline void ftrace_init(void) { }
-static inline void
-ftrace_init_module(struct module *mod,
- unsigned long *start, unsigned long *end) { }
#endif

/*
diff --git a/include/linux/module.h b/include/linux/module.h
index 6155fa4..a8f2c0a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -341,6 +341,10 @@ struct module
struct ftrace_event_call *trace_events;
unsigned int num_trace_events;
#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ unsigned long *ftrace_callsites;
+ unsigned int num_ftrace_callsites;
+#endif

#ifdef CONFIG_MODULE_UNLOAD
/* What modules depend on me? */
diff --git a/kernel/module.c b/kernel/module.c
index a039470..2383e60 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1490,9 +1490,6 @@ static void free_module(struct module *mod)
/* Free any allocated parameters. */
destroy_params(mod->kp, mod->num_kp);

- /* release any pointers to mcount in this module */
- ftrace_release(mod->module_core, mod->core_size);
-
/* This may be NULL, but that's OK */
module_free(mod, mod->module_init);
kfree(mod->args);
@@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
- unsigned int num_mcount;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
- unsigned long *mseg;
mm_segment_t old_fs;

DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod,
sizeof(*mod->trace_events),
&mod->num_trace_events);
#endif
-
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ /* sechdrs[0].sh_size is always zero */
+ mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
+ "__mcount_loc",
+ sizeof(*mod->ftrace_callsites),
+ &mod->num_ftrace_callsites);
+#endif
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !mod->crcs)
|| (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
dynamic_debug_setup(debug, num_debug);
}

- /* sechdrs[0].sh_size is always zero */
- mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
- sizeof(*mseg), &num_mcount);
- ftrace_init_module(mod, mseg, mseg + num_mcount);
-
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
goto cleanup;
@@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
cleanup:
kobject_del(&mod->mkobj.kobj);
kobject_put(&mod->mkobj.kobj);
- ftrace_release(mod->module_core, mod->core_size);
free_unload:
module_unload_free(mod);
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a234889..5b606f4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
rec->flags |= FTRACE_FL_FREE;
}

-void ftrace_release(void *start, unsigned long size)
-{
- struct dyn_ftrace *rec;
- struct ftrace_page *pg;
- unsigned long s = (unsigned long)start;
- unsigned long e = s + size;
-
- if (ftrace_disabled || !start)
- return;
-
- mutex_lock(&ftrace_lock);
- do_for_each_ftrace_rec(pg, rec) {
- if ((rec->ip >= s) && (rec->ip < e)) {
- /*
- * rec->ip is changed in ftrace_free_rec()
- * It should not between s and e if record was freed.
- */
- FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
- ftrace_free_rec(rec);
- }
- } while_for_each_ftrace_rec();
- mutex_unlock(&ftrace_lock);
-}
-
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
{
struct dyn_ftrace *rec;
@@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod,
return 0;
}

-void ftrace_init_module(struct module *mod,
- unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+ struct dyn_ftrace *rec;
+ struct ftrace_page *pg;
+ unsigned long s = (unsigned long)start;
+ unsigned long e = (unsigned long)end;
+
+ if (ftrace_disabled || !start || start == end)
+ return;
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_rec(pg, rec) {
+ if ((rec->ip >= s) && (rec->ip < e)) {
+ /*
+ * rec->ip is changed in ftrace_free_rec()
+ * It should not between s and e if record was freed.
+ */
+ FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+ ftrace_free_rec(rec);
+ }
+ } while_for_each_ftrace_rec();
+ mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+ unsigned long *start, unsigned long *end)
{
if (ftrace_disabled || start == end)
return;
ftrace_convert_nops(mod, start, end);
}

+static int ftrace_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ ftrace_init_module(mod, mod->ftrace_callsites,
+ mod->ftrace_callsites +
+ mod->num_ftrace_callsites);
+ break;
+ case MODULE_STATE_GOING:
+ ftrace_release(mod->ftrace_callsites,
+ mod->ftrace_callsites +
+ mod->num_ftrace_callsites);
+ break;
+ }
+
+ return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+ .notifier_call = ftrace_module_notify,
+ .priority = 0,
+};
+
extern unsigned long __start_mcount_loc[];
extern unsigned long __stop_mcount_loc[];

@@ -2791,6 +2825,10 @@ void __init ftrace_init(void)
__start_mcount_loc,
__stop_mcount_loc);

+ ret = register_module_notifier(&ftrace_module_nb);
+ if (!ret)
+ pr_warning("Failed to register trace ftrace module notifier\n");
+
return;
failed:
ftrace_disabled = 1;
--
1.6.2.1

--


2009-04-16 15:36:36

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Wed, Apr 15, 2009 at 10:18:31PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <[email protected]>
>
> Impact: fix and clean up
>
> The hooks in the module code for the function tracer must be called
> before any of that module code runs. The function tracer hooks
> modify the module (replacing calls to mcount to nops). If the code
> is executed while the change occurs, then the CPU can take a GPF.
>
> To handle the above with a bit of paranoia, I originally implemented
> the hooks as calls directly from the module code.
>
> After examining the notifier calls, it looks as though the start up
> notify is called before any of the module's code is executed. This makes
> the use of the notify safe with ftrace.
>
> Only the startup notify is required to be "safe". The shutdown simply
> removes the entries from the ftrace function list, and does not modify
> any code.
>
> This change has another benefit. It removes a issue with a reverse dependency
> in the mutexes of ftrace_lock and module_mutex.
>
> Cc: Rusty Russell <[email protected]>
> Signed-off-by: Steven Rostedt <[email protected]>
> ---
> include/linux/ftrace.h | 7 ----
> include/linux/module.h | 4 ++
> kernel/module.c | 19 ++++------
> kernel/trace/ftrace.c | 90 ++++++++++++++++++++++++++++++++++--------------
> 4 files changed, 75 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 53869be..97c83e1 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
>
> extern int skip_trace(unsigned long ip);
>
> -extern void ftrace_release(void *start, unsigned long size);
> -
> extern void ftrace_disable_daemon(void);
> extern void ftrace_enable_daemon(void);
> #else
> @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
>
> #ifdef CONFIG_FTRACE_MCOUNT_RECORD
> extern void ftrace_init(void);
> -extern void ftrace_init_module(struct module *mod,
> - unsigned long *start, unsigned long *end);
> #else
> static inline void ftrace_init(void) { }
> -static inline void
> -ftrace_init_module(struct module *mod,
> - unsigned long *start, unsigned long *end) { }
> #endif
>
> /*
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 6155fa4..a8f2c0a 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -341,6 +341,10 @@ struct module
> struct ftrace_event_call *trace_events;
> unsigned int num_trace_events;
> #endif
> +#ifdef CONFIG_FTRACE_MCOUNT_RECORD
> + unsigned long *ftrace_callsites;
> + unsigned int num_ftrace_callsites;
> +#endif
>
> #ifdef CONFIG_MODULE_UNLOAD
> /* What modules depend on me? */
> diff --git a/kernel/module.c b/kernel/module.c
> index a039470..2383e60 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -1490,9 +1490,6 @@ static void free_module(struct module *mod)
> /* Free any allocated parameters. */
> destroy_params(mod->kp, mod->num_kp);
>
> - /* release any pointers to mcount in this module */
> - ftrace_release(mod->module_core, mod->core_size);
> -
> /* This may be NULL, but that's OK */
> module_free(mod, mod->module_init);
> kfree(mod->args);
> @@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
> unsigned int symindex = 0;
> unsigned int strindex = 0;
> unsigned int modindex, versindex, infoindex, pcpuindex;
> - unsigned int num_mcount;
> struct module *mod;
> long err = 0;
> void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
> - unsigned long *mseg;
> mm_segment_t old_fs;
>
> DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
> @@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod,
> sizeof(*mod->trace_events),
> &mod->num_trace_events);
> #endif
> -
> +#ifdef CONFIG_FTRACE_MCOUNT_RECORD
> + /* sechdrs[0].sh_size is always zero */
> + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
> + "__mcount_loc",
> + sizeof(*mod->ftrace_callsites),
> + &mod->num_ftrace_callsites);
> +#endif
> #ifdef CONFIG_MODVERSIONS
> if ((mod->num_syms && !mod->crcs)
> || (mod->num_gpl_syms && !mod->gpl_crcs)
> @@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
> dynamic_debug_setup(debug, num_debug);
> }
>
> - /* sechdrs[0].sh_size is always zero */
> - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
> - sizeof(*mseg), &num_mcount);
> - ftrace_init_module(mod, mseg, mseg + num_mcount);
> -
> err = module_finalize(hdr, sechdrs, mod);
> if (err < 0)
> goto cleanup;
> @@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
> cleanup:
> kobject_del(&mod->mkobj.kobj);
> kobject_put(&mod->mkobj.kobj);
> - ftrace_release(mod->module_core, mod->core_size);
> free_unload:
> module_unload_free(mod);
> #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a234889..5b606f4 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
> rec->flags |= FTRACE_FL_FREE;
> }
>
> -void ftrace_release(void *start, unsigned long size)
> -{
> - struct dyn_ftrace *rec;
> - struct ftrace_page *pg;
> - unsigned long s = (unsigned long)start;
> - unsigned long e = s + size;
> -
> - if (ftrace_disabled || !start)
> - return;
> -
> - mutex_lock(&ftrace_lock);
> - do_for_each_ftrace_rec(pg, rec) {
> - if ((rec->ip >= s) && (rec->ip < e)) {
> - /*
> - * rec->ip is changed in ftrace_free_rec()
> - * It should not between s and e if record was freed.
> - */
> - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
> - ftrace_free_rec(rec);
> - }
> - } while_for_each_ftrace_rec();
> - mutex_unlock(&ftrace_lock);
> -}
> -
> static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
> {
> struct dyn_ftrace *rec;
> @@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod,
> return 0;
> }
>
> -void ftrace_init_module(struct module *mod,
> - unsigned long *start, unsigned long *end)
> +#ifdef CONFIG_MODULES
> +void ftrace_release(void *start, void *end)
> +{
> + struct dyn_ftrace *rec;
> + struct ftrace_page *pg;
> + unsigned long s = (unsigned long)start;
> + unsigned long e = (unsigned long)end;
> +
> + if (ftrace_disabled || !start || start == end)
> + return;
> +
> + mutex_lock(&ftrace_lock);
> + do_for_each_ftrace_rec(pg, rec) {
> + if ((rec->ip >= s) && (rec->ip < e)) {
> + /*
> + * rec->ip is changed in ftrace_free_rec()
> + * It should not between s and e if record was freed.
> + */
> + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
> + ftrace_free_rec(rec);
> + }
> + } while_for_each_ftrace_rec();
> + mutex_unlock(&ftrace_lock);
> +}
> +
> +static void ftrace_init_module(struct module *mod,
> + unsigned long *start, unsigned long *end)
> {
> if (ftrace_disabled || start == end)
> return;
> ftrace_convert_nops(mod, start, end);
> }
>
> +static int ftrace_module_notify(struct notifier_block *self,
> + unsigned long val, void *data)
> +{
> + struct module *mod = data;
> +
> + switch (val) {
> + case MODULE_STATE_COMING:
> + ftrace_init_module(mod, mod->ftrace_callsites,
> + mod->ftrace_callsites +
> + mod->num_ftrace_callsites);
> + break;
> + case MODULE_STATE_GOING:
> + ftrace_release(mod->ftrace_callsites,
> + mod->ftrace_callsites +
> + mod->num_ftrace_callsites);
> + break;
> + }
> +
> + return 0;
> +}
> +#else
> +static int ftrace_module_notify(struct notifier_block *self,
> + unsigned long val, void *data)
> +{
> + return 0;
> +}



You don't seem to like my __init idea :)



> +#endif /* CONFIG_MODULES */
> +
> +struct notifier_block ftrace_module_nb = {
> + .notifier_call = ftrace_module_notify,
> + .priority = 0,
> +};
> +



Neither the __initdata_or_module.


Frederic.

> extern unsigned long __start_mcount_loc[];
> extern unsigned long __stop_mcount_loc[];
>
> @@ -2791,6 +2825,10 @@ void __init ftrace_init(void)
> __start_mcount_loc,
> __stop_mcount_loc);
>
> + ret = register_module_notifier(&ftrace_module_nb);
> + if (!ret)
> + pr_warning("Failed to register trace ftrace module notifier\n");
> +
> return;
> failed:
> ftrace_disabled = 1;
> --
> 1.6.2.1
>
> --

2009-04-16 15:54:55

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer


On Thu, 16 Apr 2009, Frederic Weisbecker wrote:
> >
> > +static int ftrace_module_notify(struct notifier_block *self,
> > + unsigned long val, void *data)
> > +{
> > + struct module *mod = data;
> > +
> > + switch (val) {
> > + case MODULE_STATE_COMING:
> > + ftrace_init_module(mod, mod->ftrace_callsites,
> > + mod->ftrace_callsites +
> > + mod->num_ftrace_callsites);
> > + break;
> > + case MODULE_STATE_GOING:
> > + ftrace_release(mod->ftrace_callsites,
> > + mod->ftrace_callsites +
> > + mod->num_ftrace_callsites);
> > + break;
> > + }
> > +
> > + return 0;
> > +}
> > +#else
> > +static int ftrace_module_notify(struct notifier_block *self,
> > + unsigned long val, void *data)
> > +{
> > + return 0;
> > +}
>
>
>
> You don't seem to like my __init idea :)

Nah, just forgot about it.

>
>
>
> > +#endif /* CONFIG_MODULES */
> > +
> > +struct notifier_block ftrace_module_nb = {
> > + .notifier_call = ftrace_module_notify,
> > + .priority = 0,
> > +};
> > +
>
>
>
> Neither the __initdata_or_module.

Ooh, I never knew that existed.

-- Steve

2009-04-17 11:45:15

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer


Rusty,

Can I get your Acked-by on this patch too?

Thanks,

-- Steve


On Wed, 15 Apr 2009, Steven Rostedt wrote:

> From: Steven Rostedt <[email protected]>
>
> Impact: fix and clean up
>
> The hooks in the module code for the function tracer must be called
> before any of that module code runs. The function tracer hooks
> modify the module (replacing calls to mcount to nops). If the code
> is executed while the change occurs, then the CPU can take a GPF.
>
> To handle the above with a bit of paranoia, I originally implemented
> the hooks as calls directly from the module code.
>
> After examining the notifier calls, it looks as though the start up
> notify is called before any of the module's code is executed. This makes
> the use of the notify safe with ftrace.
>
> Only the startup notify is required to be "safe". The shutdown simply
> removes the entries from the ftrace function list, and does not modify
> any code.
>
> This change has another benefit. It removes a issue with a reverse dependency
> in the mutexes of ftrace_lock and module_mutex.
>
> Cc: Rusty Russell <[email protected]>
> Signed-off-by: Steven Rostedt <[email protected]>
> ---
> include/linux/ftrace.h | 7 ----
> include/linux/module.h | 4 ++
> kernel/module.c | 19 ++++------
> kernel/trace/ftrace.c | 90 ++++++++++++++++++++++++++++++++++--------------
> 4 files changed, 75 insertions(+), 45 deletions(-)
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 53869be..97c83e1 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
>
> extern int skip_trace(unsigned long ip);
>
> -extern void ftrace_release(void *start, unsigned long size);
> -
> extern void ftrace_disable_daemon(void);
> extern void ftrace_enable_daemon(void);
> #else
> @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
>
> #ifdef CONFIG_FTRACE_MCOUNT_RECORD
> extern void ftrace_init(void);
> -extern void ftrace_init_module(struct module *mod,
> - unsigned long *start, unsigned long *end);
> #else
> static inline void ftrace_init(void) { }
> -static inline void
> -ftrace_init_module(struct module *mod,
> - unsigned long *start, unsigned long *end) { }
> #endif
>
> /*
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 6155fa4..a8f2c0a 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -341,6 +341,10 @@ struct module
> struct ftrace_event_call *trace_events;
> unsigned int num_trace_events;
> #endif
> +#ifdef CONFIG_FTRACE_MCOUNT_RECORD
> + unsigned long *ftrace_callsites;
> + unsigned int num_ftrace_callsites;
> +#endif
>
> #ifdef CONFIG_MODULE_UNLOAD
> /* What modules depend on me? */
> diff --git a/kernel/module.c b/kernel/module.c
> index a039470..2383e60 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -1490,9 +1490,6 @@ static void free_module(struct module *mod)
> /* Free any allocated parameters. */
> destroy_params(mod->kp, mod->num_kp);
>
> - /* release any pointers to mcount in this module */
> - ftrace_release(mod->module_core, mod->core_size);
> -
> /* This may be NULL, but that's OK */
> module_free(mod, mod->module_init);
> kfree(mod->args);
> @@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
> unsigned int symindex = 0;
> unsigned int strindex = 0;
> unsigned int modindex, versindex, infoindex, pcpuindex;
> - unsigned int num_mcount;
> struct module *mod;
> long err = 0;
> void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
> - unsigned long *mseg;
> mm_segment_t old_fs;
>
> DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
> @@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod,
> sizeof(*mod->trace_events),
> &mod->num_trace_events);
> #endif
> -
> +#ifdef CONFIG_FTRACE_MCOUNT_RECORD
> + /* sechdrs[0].sh_size is always zero */
> + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
> + "__mcount_loc",
> + sizeof(*mod->ftrace_callsites),
> + &mod->num_ftrace_callsites);
> +#endif
> #ifdef CONFIG_MODVERSIONS
> if ((mod->num_syms && !mod->crcs)
> || (mod->num_gpl_syms && !mod->gpl_crcs)
> @@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
> dynamic_debug_setup(debug, num_debug);
> }
>
> - /* sechdrs[0].sh_size is always zero */
> - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
> - sizeof(*mseg), &num_mcount);
> - ftrace_init_module(mod, mseg, mseg + num_mcount);
> -
> err = module_finalize(hdr, sechdrs, mod);
> if (err < 0)
> goto cleanup;
> @@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
> cleanup:
> kobject_del(&mod->mkobj.kobj);
> kobject_put(&mod->mkobj.kobj);
> - ftrace_release(mod->module_core, mod->core_size);
> free_unload:
> module_unload_free(mod);
> #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a234889..5b606f4 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
> rec->flags |= FTRACE_FL_FREE;
> }
>
> -void ftrace_release(void *start, unsigned long size)
> -{
> - struct dyn_ftrace *rec;
> - struct ftrace_page *pg;
> - unsigned long s = (unsigned long)start;
> - unsigned long e = s + size;
> -
> - if (ftrace_disabled || !start)
> - return;
> -
> - mutex_lock(&ftrace_lock);
> - do_for_each_ftrace_rec(pg, rec) {
> - if ((rec->ip >= s) && (rec->ip < e)) {
> - /*
> - * rec->ip is changed in ftrace_free_rec()
> - * It should not between s and e if record was freed.
> - */
> - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
> - ftrace_free_rec(rec);
> - }
> - } while_for_each_ftrace_rec();
> - mutex_unlock(&ftrace_lock);
> -}
> -
> static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
> {
> struct dyn_ftrace *rec;
> @@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod,
> return 0;
> }
>
> -void ftrace_init_module(struct module *mod,
> - unsigned long *start, unsigned long *end)
> +#ifdef CONFIG_MODULES
> +void ftrace_release(void *start, void *end)
> +{
> + struct dyn_ftrace *rec;
> + struct ftrace_page *pg;
> + unsigned long s = (unsigned long)start;
> + unsigned long e = (unsigned long)end;
> +
> + if (ftrace_disabled || !start || start == end)
> + return;
> +
> + mutex_lock(&ftrace_lock);
> + do_for_each_ftrace_rec(pg, rec) {
> + if ((rec->ip >= s) && (rec->ip < e)) {
> + /*
> + * rec->ip is changed in ftrace_free_rec()
> + * It should not between s and e if record was freed.
> + */
> + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
> + ftrace_free_rec(rec);
> + }
> + } while_for_each_ftrace_rec();
> + mutex_unlock(&ftrace_lock);
> +}
> +
> +static void ftrace_init_module(struct module *mod,
> + unsigned long *start, unsigned long *end)
> {
> if (ftrace_disabled || start == end)
> return;
> ftrace_convert_nops(mod, start, end);
> }
>
> +static int ftrace_module_notify(struct notifier_block *self,
> + unsigned long val, void *data)
> +{
> + struct module *mod = data;
> +
> + switch (val) {
> + case MODULE_STATE_COMING:
> + ftrace_init_module(mod, mod->ftrace_callsites,
> + mod->ftrace_callsites +
> + mod->num_ftrace_callsites);
> + break;
> + case MODULE_STATE_GOING:
> + ftrace_release(mod->ftrace_callsites,
> + mod->ftrace_callsites +
> + mod->num_ftrace_callsites);
> + break;
> + }
> +
> + return 0;
> +}
> +#else
> +static int ftrace_module_notify(struct notifier_block *self,
> + unsigned long val, void *data)
> +{
> + return 0;
> +}
> +#endif /* CONFIG_MODULES */
> +
> +struct notifier_block ftrace_module_nb = {
> + .notifier_call = ftrace_module_notify,
> + .priority = 0,
> +};
> +
> extern unsigned long __start_mcount_loc[];
> extern unsigned long __stop_mcount_loc[];
>
> @@ -2791,6 +2825,10 @@ void __init ftrace_init(void)
> __start_mcount_loc,
> __stop_mcount_loc);
>
> + ret = register_module_notifier(&ftrace_module_nb);
> + if (!ret)
> + pr_warning("Failed to register trace ftrace module notifier\n");
> +
> return;
> failed:
> ftrace_disabled = 1;
> --
> 1.6.2.1
>
> --
>

2009-04-19 11:25:26

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Thu, 16 Apr 2009 11:48:31 am Steven Rostedt wrote:
> From: Steven Rostedt <[email protected]>
>
> Impact: fix and clean up
>
> The hooks in the module code for the function tracer must be called
> before any of that module code runs. The function tracer hooks
> modify the module (replacing calls to mcount to nops). If the code
> is executed while the change occurs, then the CPU can take a GPF.
>
> To handle the above with a bit of paranoia, I originally implemented
> the hooks as calls directly from the module code.
>
> After examining the notifier calls, it looks as though the start up
> notify is called before any of the module's code is executed. This makes
> the use of the notify safe with ftrace.

Hi Steven,

Unfortunately not: we do parse_args, which can call into the module code.
(Though it shouldn't do anything "significant", as it won't get a chance to
clean up if module load fails later).

I think you need to do something else in general. Share the module_mutex for
the ftrace code? The ksplice guys have a similar issue, so maybe we should
generalize this into a "kernel_text" mutex?

Thanks,
Rusty.

2009-04-20 13:58:09

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer



On Sun, 19 Apr 2009, Rusty Russell wrote:

> On Thu, 16 Apr 2009 11:48:31 am Steven Rostedt wrote:
> > From: Steven Rostedt <[email protected]>
> >
> > Impact: fix and clean up
> >
> > The hooks in the module code for the function tracer must be called
> > before any of that module code runs. The function tracer hooks
> > modify the module (replacing calls to mcount to nops). If the code
> > is executed while the change occurs, then the CPU can take a GPF.
> >
> > To handle the above with a bit of paranoia, I originally implemented
> > the hooks as calls directly from the module code.
> >
> > After examining the notifier calls, it looks as though the start up
> > notify is called before any of the module's code is executed. This makes
> > the use of the notify safe with ftrace.
>
> Hi Steven,
>
> Unfortunately not: we do parse_args, which can call into the module code.
> (Though it shouldn't do anything "significant", as it won't get a chance to
> clean up if module load fails later).
>
> I think you need to do something else in general. Share the module_mutex for
> the ftrace code? The ksplice guys have a similar issue, so maybe we should
> generalize this into a "kernel_text" mutex?

Hi Rusty,

Thanks, for the update. I think we may still be OK.

The thing that dynamic ftrace must watch out for is not running of code
per say, but the executing of code on one cpu that is being modified on
another.

Can those parse_args kick off threads? Hmm, probably. Sounds nasty to
me.

The other thing is, if the parse_args code is only in "__init" then they
also will not be touched.

I guess we can keep the code as notify if we never expect to run kthreads
from parse_args. Or is it possible to move parse_args after notify?

-- Steve

2009-04-21 05:16:05

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Mon, 20 Apr 2009 11:27:35 pm Steven Rostedt wrote:
>
> On Sun, 19 Apr 2009, Rusty Russell wrote:
>
> > On Thu, 16 Apr 2009 11:48:31 am Steven Rostedt wrote:
> > > From: Steven Rostedt <[email protected]>
> > >
> > > Impact: fix and clean up
> > >
> > > The hooks in the module code for the function tracer must be called
> > > before any of that module code runs. The function tracer hooks
> > > modify the module (replacing calls to mcount to nops). If the code
> > > is executed while the change occurs, then the CPU can take a GPF.
> > >
> > > To handle the above with a bit of paranoia, I originally implemented
> > > the hooks as calls directly from the module code.
> > >
> > > After examining the notifier calls, it looks as though the start up
> > > notify is called before any of the module's code is executed. This makes
> > > the use of the notify safe with ftrace.
> >
> > Hi Steven,
> >
> > Unfortunately not: we do parse_args, which can call into the module code.
> > (Though it shouldn't do anything "significant", as it won't get a chance to
> > clean up if module load fails later).
> >
> > I think you need to do something else in general. Share the module_mutex for
> > the ftrace code? The ksplice guys have a similar issue, so maybe we should
> > generalize this into a "kernel_text" mutex?
>
> Hi Rusty,
>
> Thanks, for the update. I think we may still be OK.

Agreed, just wanted to make sure you were aware.

> Can those parse_args kick off threads? Hmm, probably. Sounds nasty to
> me.

Not without a bug. Imagine you have a "create_threads" module_param, someone
loads the module with two args "create_threads crap". We call the
create_threads parse function via parse_args, then hit the crap parameter
and free the module. Oops.

> The other thing is, if the parse_args code is only in "__init" then they
> also will not be touched.

It can be non-init for sysfs access.

FWIW:
Acked-by: Rusty Russell <[email protected]>

Thanks,
Rusty.

2009-04-21 13:13:19

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer


On Tue, 21 Apr 2009, Rusty Russell wrote:

> On Mon, 20 Apr 2009 11:27:35 pm Steven Rostedt wrote:
> >
> > On Sun, 19 Apr 2009, Rusty Russell wrote:
> >
> > > On Thu, 16 Apr 2009 11:48:31 am Steven Rostedt wrote:
> > > > From: Steven Rostedt <[email protected]>
> > > >
> > > > Impact: fix and clean up
> > > >
> > > > The hooks in the module code for the function tracer must be called
> > > > before any of that module code runs. The function tracer hooks
> > > > modify the module (replacing calls to mcount to nops). If the code
> > > > is executed while the change occurs, then the CPU can take a GPF.
> > > >
> > > > To handle the above with a bit of paranoia, I originally implemented
> > > > the hooks as calls directly from the module code.
> > > >
> > > > After examining the notifier calls, it looks as though the start up
> > > > notify is called before any of the module's code is executed. This makes
> > > > the use of the notify safe with ftrace.
> > >
> > > Hi Steven,
> > >
> > > Unfortunately not: we do parse_args, which can call into the module code.
> > > (Though it shouldn't do anything "significant", as it won't get a chance to
> > > clean up if module load fails later).
> > >
> > > I think you need to do something else in general. Share the module_mutex for
> > > the ftrace code? The ksplice guys have a similar issue, so maybe we should
> > > generalize this into a "kernel_text" mutex?
> >
> > Hi Rusty,
> >
> > Thanks, for the update. I think we may still be OK.
>
> Agreed, just wanted to make sure you were aware.
>
> > Can those parse_args kick off threads? Hmm, probably. Sounds nasty to
> > me.
>
> Not without a bug. Imagine you have a "create_threads" module_param, someone
> loads the module with two args "create_threads crap". We call the
> create_threads parse function via parse_args, then hit the crap parameter
> and free the module. Oops.
>
> > The other thing is, if the parse_args code is only in "__init" then they
> > also will not be touched.
>
> It can be non-init for sysfs access.
>
> FWIW:
> Acked-by: Rusty Russell <[email protected]>

Thanks Rusty!

Ingo,

This change also fixes a possible deadlock in mainline between the
ftrace_lock and the module_mutex. Perhaps we should push this to Linus?

The possible deadlock is if a user unloads/loads modules at the same time
starts the function graph tracer. Highly unlikely to happen, but it does
spit out a lockdep warning.

-- Steve

2009-04-21 13:59:00

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer


* Steven Rostedt <[email protected]> wrote:

>
> On Tue, 21 Apr 2009, Rusty Russell wrote:
>
> > On Mon, 20 Apr 2009 11:27:35 pm Steven Rostedt wrote:
> > >
> > > On Sun, 19 Apr 2009, Rusty Russell wrote:
> > >
> > > > On Thu, 16 Apr 2009 11:48:31 am Steven Rostedt wrote:
> > > > > From: Steven Rostedt <[email protected]>
> > > > >
> > > > > Impact: fix and clean up
> > > > >
> > > > > The hooks in the module code for the function tracer must be called
> > > > > before any of that module code runs. The function tracer hooks
> > > > > modify the module (replacing calls to mcount to nops). If the code
> > > > > is executed while the change occurs, then the CPU can take a GPF.
> > > > >
> > > > > To handle the above with a bit of paranoia, I originally implemented
> > > > > the hooks as calls directly from the module code.
> > > > >
> > > > > After examining the notifier calls, it looks as though the start up
> > > > > notify is called before any of the module's code is executed. This makes
> > > > > the use of the notify safe with ftrace.
> > > >
> > > > Hi Steven,
> > > >
> > > > Unfortunately not: we do parse_args, which can call into the module code.
> > > > (Though it shouldn't do anything "significant", as it won't get a chance to
> > > > clean up if module load fails later).
> > > >
> > > > I think you need to do something else in general. Share the module_mutex for
> > > > the ftrace code? The ksplice guys have a similar issue, so maybe we should
> > > > generalize this into a "kernel_text" mutex?
> > >
> > > Hi Rusty,
> > >
> > > Thanks, for the update. I think we may still be OK.
> >
> > Agreed, just wanted to make sure you were aware.
> >
> > > Can those parse_args kick off threads? Hmm, probably. Sounds nasty to
> > > me.
> >
> > Not without a bug. Imagine you have a "create_threads" module_param, someone
> > loads the module with two args "create_threads crap". We call the
> > create_threads parse function via parse_args, then hit the crap parameter
> > and free the module. Oops.
> >
> > > The other thing is, if the parse_args code is only in "__init" then they
> > > also will not be touched.
> >
> > It can be non-init for sysfs access.
> >
> > FWIW:
> > Acked-by: Rusty Russell <[email protected]>
>
> Thanks Rusty!
>
> Ingo,
>
> This change also fixes a possible deadlock in mainline between the
> ftrace_lock and the module_mutex. Perhaps we should push this to
> Linus?
>
> The possible deadlock is if a user unloads/loads modules at the
> same time starts the function graph tracer. Highly unlikely to
> happen, but it does spit out a lockdep warning.

It impacts the module code so it would be nicer if Rusty pushed it.

To me it's a bit too large:

4 files changed, 75 insertions(+), 45 deletions(-)

and the race too narrow. (has it been reported in practice?)

Ingo

2009-04-21 17:56:18

by Tim Abbott

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Sun, 19 Apr 2009, Rusty Russell wrote:

> I think you need to do something else in general. Share the module_mutex for
> the ftrace code? The ksplice guys have a similar issue, so maybe we should
> generalize this into a "kernel_text" mutex?

Yes, a kernel_text mutex is on my list of things to propose once Ksplice
gets merged.

There are at present several systems that modify the kernel text after a
machine is booted (e.g. dynamic ftrace and toggling of smp_locks when
hotplugging a cpu). Currently, they avoid stepping on each other by only
making changes inside stop_machine.

However, Ksplice does its run-pre matching checks outside stop_machine,
and Ksplice needs a way to prevent e.g. dynamic ftrace from changing the
code out from under it between those checks and actually applying the
patches [1]. A kernel_text mutex would be a reasonable solution to this
problem.

-Tim Abbott

[1] At present this isn't a real problem because Ksplice and ftrace
conflict with each other for unrelated reasons.

2009-04-21 18:17:47

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer


[ added Arjan ]

On Tue, 21 Apr 2009, Tim Abbott wrote:

> On Sun, 19 Apr 2009, Rusty Russell wrote:
>
> > I think you need to do something else in general. Share the module_mutex for
> > the ftrace code? The ksplice guys have a similar issue, so maybe we should
> > generalize this into a "kernel_text" mutex?
>
> Yes, a kernel_text mutex is on my list of things to propose once Ksplice
> gets merged.
>
> There are at present several systems that modify the kernel text after a
> machine is booted (e.g. dynamic ftrace and toggling of smp_locks when
> hotplugging a cpu). Currently, they avoid stepping on each other by only
> making changes inside stop_machine.
>
> However, Ksplice does its run-pre matching checks outside stop_machine,

Does Ksplice use break points to do the modification? I'd be interested in
knowing how Ksplice protects against the nastiness of code executing on
one CPU that is being modified by another CPU.

-- Steve


> and Ksplice needs a way to prevent e.g. dynamic ftrace from changing the
> code out from under it between those checks and actually applying the
> patches [1]. A kernel_text mutex would be a reasonable solution to this
> problem.
>
> -Tim Abbott
>
> [1] At present this isn't a real problem because Ksplice and ftrace
> conflict with each other for unrelated reasons.
>

2009-04-21 18:49:07

by Tim Abbott

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Tue, 21 Apr 2009, Steven Rostedt wrote:

> > However, Ksplice does its run-pre matching checks outside stop_machine,
>
> Does Ksplice use break points to do the modification?

No, we use jump instructions.

> I'd be interested in knowing how Ksplice protects against the nastiness
> of code executing on one CPU that is being modified by another CPU.

The run-pre matching checks, which verify that the pre-patch code the user
gave Ksplice matches the running kernel, occur before stop_machine with
the module_mutex held. Then, inside stop_machine, we check that no thread
is in the process of executing a to-be-patched function before inserting
the jump instructions. We need the transition from the old code to the
new code to be atomic (i.e. no old code can run after any new code is
run). So, we need to check all the return addresses on each thread's
kernel stack as well as the instruction pointers.

-Tim Abbott

2009-04-22 09:17:33

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer


* Tim Abbott <[email protected]> wrote:

> On Sun, 19 Apr 2009, Rusty Russell wrote:
>
> > I think you need to do something else in general. Share the module_mutex for
> > the ftrace code? The ksplice guys have a similar issue, so maybe we should
> > generalize this into a "kernel_text" mutex?
>
> Yes, a kernel_text mutex is on my list of things to propose once
> Ksplice gets merged.

too late :-) These bits were written 1.5 months ago and are already
upstream:

3945dab: tracing, Text Edit Lock - SMP alternatives support
4460fda: tracing, Text Edit Lock - kprobes architecture independent support
0e39ac4: tracing, Text Edit Lock - Architecture Independent Code

I suspect ksplice could make use of text_mutex?

Ingo

2009-04-22 22:22:15

by Tim Abbott

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Wed, 22 Apr 2009, Ingo Molnar wrote:

> I suspect ksplice could make use of text_mutex?

Yes, this is what I was imagining. Thanks for pointing this out! I plan
to make Ksplice use this for the next version of the Ksplice patch series.

However, since Ksplice can be built as a module, we'll want to
EXPORT_SYMBOL_GPL(text_mutex). The comment above text_mutex's definition
suggests some people would prefer not to have modules patching the kernel
text, and so this point might be contentious. I've found support for
building Ksplice as a module to be a useful feature in case one needs to
use multiple different versions of Ksplice on the same system (e.g.
because the version built in to the kernel doesn't support a particular
patch).

-Tim Abbott

2009-04-22 22:58:38

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

Tim Abbott wrote:
> On Wed, 22 Apr 2009, Ingo Molnar wrote:
>
>> I suspect ksplice could make use of text_mutex?
>
> Yes, this is what I was imagining. Thanks for pointing this out! I plan
> to make Ksplice use this for the next version of the Ksplice patch series.

Could you also add "use text_poke() on x86" to your plan? :-)

> However, since Ksplice can be built as a module, we'll want to
> EXPORT_SYMBOL_GPL(text_mutex). The comment above text_mutex's definition
> suggests some people would prefer not to have modules patching the kernel
> text, and so this point might be contentious. I've found support for
> building Ksplice as a module to be a useful feature in case one needs to
> use multiple different versions of Ksplice on the same system (e.g.
> because the version built in to the kernel doesn't support a particular
> patch).

Hmm, I can't agree that we allow module to modify kernel text...
Is that possible to separate kernel-text swapping routine from Ksplice
module? In that case, we don't need to export text_mutex.

Thank you,

--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: [email protected]

2009-04-23 19:42:19

by Anders Kaseorg

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Wed, 22 Apr 2009, Masami Hiramatsu wrote:
> Could you also add "use text_poke() on x86" to your plan? :-)

That should be possible now that it is usable inside stop_machine(). It
would be nicer to have an API that isn’t x86-specific, though.

Another issue is that Ksplice supports patching rodata as well as text,
and text_poke() does not support changes bigger than PAGE_SIZE. Though
perhaps text_poke() is not the right function for rodata patches anyway.

> Hmm, I can't agree that we allow module to modify kernel text...
> Is that possible to separate kernel-text swapping routine from Ksplice
> module? In that case, we don't need to export text_mutex.

No, it’s not enough for Ksplice to lock the kernel text only while
actively swapping it. We also need to prevent changes to the kernel text
while Ksplice is doing run-pre matching to ensure safety. This means that
Ksplice wants to hold text_mutex for essentially the entire time it’s
running.

Anders

2009-04-23 19:58:16

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

* Anders Kaseorg ([email protected]) wrote:
> On Wed, 22 Apr 2009, Masami Hiramatsu wrote:
> > Could you also add "use text_poke() on x86" to your plan? :-)
>
> That should be possible now that it is usable inside stop_machine(). It
> would be nicer to have an API that isn’t x86-specific, though.
>
> Another issue is that Ksplice supports patching rodata as well as text,
> and text_poke() does not support changes bigger than PAGE_SIZE. Though
> perhaps text_poke() is not the right function for rodata patches anyway.
>
> > Hmm, I can't agree that we allow module to modify kernel text...
> > Is that possible to separate kernel-text swapping routine from Ksplice
> > module? In that case, we don't need to export text_mutex.
>
> No, it’s not enough for Ksplice to lock the kernel text only while
> actively swapping it. We also need to prevent changes to the kernel text
> while Ksplice is doing run-pre matching to ensure safety. This means that
> Ksplice wants to hold text_mutex for essentially the entire time it’s
> running.
>

how about a kernel Ksplice API which lets your patch modules get
their handlers executed under the right execution context ? E.g. :

int do_ksplice(void (*pre_hook)(struct blah *context),
void (*code_update_hook)(struct blah *context),
void (*post_hook)(struct blah *context));

Which returns either 0 or -ESOMETHING. It would take care of locking and
everything within core kernel code. The "patch" modules would simply
provide the functions that does the code updates, assuming that the
do_ksplice function makes sure to provide correct locking semantics.
This way, the kernel code-base would contain the tricky locking bits.
It seems much better than exporting the text_mutex to modules.

Mathieu



> Anders
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-04-23 20:08:01

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

Anders Kaseorg wrote:
> On Wed, 22 Apr 2009, Masami Hiramatsu wrote:
>> Could you also add "use text_poke() on x86" to your plan? :-)
>
> That should be possible now that it is usable inside stop_machine(). It
> would be nicer to have an API that isn’t x86-specific, though.

Now, text_poke() become atomic and can be called inside stop_machine().
I agree with that text_poke() is currently implemented only on x86,
but adding a generic text_poke() which just do memcpy() is easy.

> Another issue is that Ksplice supports patching rodata as well as text,
> and text_poke() does not support changes bigger than PAGE_SIZE. Though
> perhaps text_poke() is not the right function for rodata patches anyway.

PAGE_SIZE limitation is not a problem, because you can call text_poke()
repeatedly.
But indeed, handling rodata in text_poke() is a bit odd...

I assume that you are using alias pages (mapping ro-pages to rw-area) in
Ksplice, and if so, I think we can make it as a generic function and
share it with text_poke().

>> Hmm, I can't agree that we allow module to modify kernel text...
>> Is that possible to separate kernel-text swapping routine from Ksplice
>> module? In that case, we don't need to export text_mutex.
>
> No, it’s not enough for Ksplice to lock the kernel text only while
> actively swapping it. We also need to prevent changes to the kernel text
> while Ksplice is doing run-pre matching to ensure safety. This means that
> Ksplice wants to hold text_mutex for essentially the entire time it’s
> running.

Then, why can't you move that "matching" routine into the kernel too? :-)

Thank you,


--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: [email protected]

2009-04-23 22:32:59

by Tim Abbott

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

On Thu, 23 Apr 2009, Mathieu Desnoyers wrote:
> how about a kernel Ksplice API which lets your patch modules get
> their handlers executed under the right execution context ? E.g. :
[...]
> This way, the kernel code-base would contain the tricky locking bits.
> It seems much better than exporting the text_mutex to modules.

On Thu, 23 Apr 2009, Masami Hiramatsu wrote:
> Then, why can't you move that "matching" routine into the kernel too? :-)

I think both of you are assuming that Ksplice is grabbing text_mutex in
the patch modules for each update, which isn't what's going on. There are
two types of modules associated with Ksplice, the per-update patch modules
and the Ksplice core, which could be built as a module.

The patch modules for each update are already stub modules that just load
some data into a data structure and call into the Ksplice core at
module_init/cleanup (by Ksplice core, I mean kernel/ksplice.c). All the
tricky bits including the run-pre matching routine and the locking code
are in the Ksplice core.

The reason that we want to export text_mutex is that it can be useful for
the Ksplice core (still kernel/ksplice.c) to be built as a module. This
is desirable when a different version of the kernel/ksplice.c code is
needed in order to apply a particular update (e.g. because there was a bug
in Ksplice).


I agree that great care should be taken when writing code that patches the
kernel's text. However, I don't think failing to export text_mutex to GPL
modules helps ensure that. In particular, not exporting text_mutex
doesn't prevent modules from patching the kernel text. It just prevents
any modules that do so from grabbing the appropriate lock.

-Tim Abbott

2009-04-24 03:13:17

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/5] ftrace: use module notifier for function tracer

Tim Abbott wrote:
> The reason that we want to export text_mutex is that it can be useful for
> the Ksplice core (still kernel/ksplice.c) to be built as a module. This
> is desirable when a different version of the kernel/ksplice.c code is
> needed in order to apply a particular update (e.g. because there was a bug
> in Ksplice).

Ah, OK. Certainly, Ksplice can't patch itself. It might be a good reason for
exporting text_mutex.


> I agree that great care should be taken when writing code that patches the
> kernel's text. However, I don't think failing to export text_mutex to GPL
> modules helps ensure that. In particular, not exporting text_mutex
> doesn't prevent modules from patching the kernel text. It just prevents
> any modules that do so from grabbing the appropriate lock.

What I'm concerned about exporting locking staffs is that may cause
a dead lock. However, we've already exported module_mutex.
I don't think that exporting text_mutex is more considerable than that. :-)

Thank you,

--
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America) Inc.
Software Solutions Division

e-mail: [email protected]