2006-09-19 18:34:51

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Good afternoon,

Following some very interesting ideas from Martin that shows that even
static function calls and inlined functions can be used in interesting
ways with markers to deploy dynamic tracers with easy access to local
function variables, I send this slightly improved version of Linux Kernel
Markers.

It has the same capabilities as the previous one and additionnaly checks
for string format consistency in every kernel configuration. The idea
behind this is to be told be the compiler as soon as a marker is broken.

These last emails convince me even more that a markup mechanism must
interface with every kind of instrumentation hooking we can think about,
both dynamic and static.

Mathieu

--- BEGIN ---

--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1082,6 +1082,8 @@ config KPROBES
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".

+source "kernel/Kconfig.marker"
+
source "ltt/Kconfig"

endmenu
--- /dev/null
+++ b/include/asm-i386/marker.h
@@ -0,0 +1,12 @@
+/*****************************************************************************
+ * marker.h
+ *
+ * Code markup for dynamic and static tracing. i386 support.
+ *
+ * Mathieu Desnoyers <[email protected]>
+ *
+ * September 2006
+ */
+
+#define JPROBE_TARGET \
+ __asm__ ( GENERIC_NOP5 )
--- /dev/null
+++ b/include/linux/marker.h
@@ -0,0 +1,93 @@
+/*****************************************************************************
+ * marker.h
+ *
+ * Code markup for dynamic and static tracing.
+ *
+ * Use either :
+ * MARK
+ * MARK_NOPRINT (will never call printk)
+ * MARK_STATIC (not dynamically instrumentable, will never call printk)
+ *
+ * Example :
+ *
+ * MARK(subsystem_event, "%d %s", someint, somestring);
+ * Where :
+ * - Subsystem is the name of your subsystem.
+ * - event is the name of the event to mark.
+ * - "%d %s" is the formatted string for printk.
+ * - someint is an integer.
+ * - somestring is a char *.
+ * - subsystem_event must be unique thorough the kernel!
+ *
+ * Mathieu Desnoyers <[email protected]>
+ *
+ * September 2006
+ */
+
+#include <asm/marker.h>
+
+#define MARK_SYM(event) \
+ __asm__ ( "__mark_" #event ":" )
+
+#define MARK_INACTIVE(event, format, args...) \
+ __mark_check_format(format, ## args)
+
+#define MARK_PRINT(event, format, args...) \
+ do { \
+ __mark_check_format(format, ## args); \
+ printk(#event ": " format, ## args); \
+ } while(0)
+
+#define MARK_FPROBE(event, format, args...) \
+ do { \
+ __mark_check_format(format, ## args); \
+ fprobe_##event(args); \
+ } while(0)
+
+#define MARK_KPROBE(event, format, args...) \
+ do { \
+ __mark_check_format(format, ## args); \
+ MARK_SYM(event); \
+ } while(0)
+
+#define MARK_JPROBE(event, format, args...) \
+ do { \
+ __mark_check_format(format, ## args); \
+ MARK_SYM(event); \
+ JPROBE_TARGET; \
+ } while(0)
+
+/* Menu configured markers */
+#ifndef CONFIG_MARK
+#define MARK MARK_INACTIVE
+#elif defined(CONFIG_MARK_PRINT)
+#define MARK MARK_PRINT
+#elif defined(CONFIG_MARK_FPROBE)
+#define MARK MARK_FPROBE
+#elif defined(CONFIG_MARK_KPROBE)
+#define MARK MARK_KPROBE
+#elif defined(CONFIG_MARK_JPROBE)
+#define MARK MARK_JPROBE
+#endif
+
+#ifndef CONFIG_MARK_NOPRINT
+#define MARK_NOPRINT MARK_INACTIVE
+#elif defined(CONFIG_MARK_NOPRINT_FPROBE)
+#define MARK_NOPRINT MARK_FPROBE
+#elif defined(CONFIG_MARK_NOPRINT_KPROBE)
+#define MARK_NOPRINT MARK_KPROBE
+#elif defined(CONFIG_MARK_NOPRINT_JPROBE)
+#define MARK_NOPRINT MARK_JPROBE
+#endif
+
+#ifndef CONFIG_MARK_STATIC
+#define MARK_STATIC MARK_INACTIVE
+#else
+#define MARK_STATIC MARK_FPROBE
+#endif
+
+static inline void __mark_check_format(const char *fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+void __mark_check_format(const char *fmt, ...) { }
+
+
--- /dev/null
+++ b/kernel/Kconfig.marker
@@ -0,0 +1,75 @@
+# Code markers configuration
+
+menu "Marker configuration"
+
+
+config MARK
+ bool "Enable MARK code markers"
+ default y
+ help
+ Activate markers that can call printk or can be instrumented
+ dynamically.
+
+choice
+ prompt "MARK code marker behavior"
+ default MARK_KPROBE
+ depends on MARK
+ help
+ Configuration of markers that can call printk or can be
+ instrumented dynamically.
+
+config MARK_KPROBE
+ bool "KPROBE"
+ ---help---
+ Change markers for a symbol "__mark_modulename_event".
+config MARK_JPROBE
+ bool "JPROBE"
+ ---help---
+ Change markers for a symbol "__mark_modulename_event"
+ and create a target for a high speed dynamic probe.
+config MARK_FPROBE
+ bool "FPROBE"
+ ---help---
+ Change markers for a function call.
+config MARK_PRINT
+ bool "PRINT"
+ ---help---
+ Call printk from the marker.
+endchoice
+
+config MARK_NOPRINT
+ bool "Enable MARK_NOPRINT code markers"
+ default y
+ help
+ Activate markers that cannot call printk.
+
+choice
+ prompt "MARK_NOPRINT code marker behavior"
+ default MARK_NOPRINT_KPROBE
+ depends on MARK_NOPRINT
+ help
+ Configuration of markers that cannot call printk.
+
+config MARK_NOPRINT_KPROBE
+ bool "KPROBE"
+ ---help---
+ Change markers for a symbol "__mark_modulename_event".
+config MARK_NOPRINT_JPROBE
+ bool "JPROBE"
+ ---help---
+ Change markers for a symbol "__mark_modulename_event"
+ and create a target for a high speed dynamic probe.
+config MARK_NOPRINT_FPROBE
+ bool "FPROBE"
+ ---help---
+ Change markers for a function call.
+endchoice
+
+config MARK_STATIC
+ bool "Enable MARK_STATIC code markers"
+ default n
+ help
+ Activate markers that cannot be instrumented dynamically. They will
+ generate function calls to each function-style probe.
+
+endmenu

--- END ---

OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


2006-09-19 18:51:47

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

On Tue, 19 Sep 2006, Mathieu Desnoyers wrote:

> Good afternoon,
>
> Following some very interesting ideas from Martin that shows that even
> static function calls and inlined functions can be used in interesting
> ways with markers to deploy dynamic tracers with easy access to local
> function variables, I send this slightly improved version of Linux Kernel
> Markers.
>
> It has the same capabilities as the previous one and additionnaly checks
> for string format consistency in every kernel configuration. The idea
> behind this is to be told be the compiler as soon as a marker is broken.
>
> These last emails convince me even more that a markup mechanism must
> interface with every kind of instrumentation hooking we can think about,
> both dynamic and static.
>
> Mathieu
>
> --- BEGIN ---
>
> --- a/arch/i386/Kconfig
> +++ b/arch/i386/Kconfig
> @@ -1082,6 +1082,8 @@ config KPROBES
> for kernel debugging, non-intrusive instrumentation and testing.
> If in doubt, say "N".
>
> +source "kernel/Kconfig.marker"
> +
> source "ltt/Kconfig"
>
> endmenu
> --- /dev/null
> +++ b/include/asm-i386/marker.h
> @@ -0,0 +1,12 @@
> +/*****************************************************************************
> + * marker.h
> + *
> + * Code markup for dynamic and static tracing. i386 support.
> + *
> + * Mathieu Desnoyers <[email protected]>
> + *
> + * September 2006
> + */
> +
> +#define JPROBE_TARGET \
> + __asm__ ( GENERIC_NOP5 )

Too many spaces; use:
__asm__ (GENERIC_NOP5)

> --- /dev/null
> +++ b/include/linux/marker.h
> @@ -0,0 +1,93 @@
> +/*****************************************************************************
> + * marker.h
> + *
> + * Code markup for dynamic and static tracing.
> + *
> + * Use either :
> + * MARK
> + * MARK_NOPRINT (will never call printk)
> + * MARK_STATIC (not dynamically instrumentable, will never call printk)
> + *
> + * Example :
> + *
> + * MARK(subsystem_event, "%d %s", someint, somestring);
> + * Where :
> + * - Subsystem is the name of your subsystem.
> + * - event is the name of the event to mark.
> + * - "%d %s" is the formatted string for printk.
> + * - someint is an integer.
> + * - somestring is a char *.
> + * - subsystem_event must be unique thorough the kernel!
> + *
> + * Mathieu Desnoyers <[email protected]>
> + *
> + * September 2006
> + */
> +
> +#include <asm/marker.h>
> +
> +#define MARK_SYM(event) \
> + __asm__ ( "__mark_" #event ":" )
> +
> +#define MARK_INACTIVE(event, format, args...) \
> + __mark_check_format(format, ## args)
> +
> +#define MARK_PRINT(event, format, args...) \
> + do { \
> + __mark_check_format(format, ## args); \
> + printk(#event ": " format, ## args); \
> + } while(0)
> +
> +#define MARK_FPROBE(event, format, args...) \
> + do { \
> + __mark_check_format(format, ## args); \
> + fprobe_##event(args); \
> + } while(0)
> +
> +#define MARK_KPROBE(event, format, args...) \
> + do { \
> + __mark_check_format(format, ## args); \
> + MARK_SYM(event); \
> + } while(0)
> +
> +#define MARK_JPROBE(event, format, args...) \
> + do { \
> + __mark_check_format(format, ## args); \
> + MARK_SYM(event); \
> + JPROBE_TARGET; \
> + } while(0)
> +
> +/* Menu configured markers */
> +#ifndef CONFIG_MARK
> +#define MARK MARK_INACTIVE
> +#elif defined(CONFIG_MARK_PRINT)
> +#define MARK MARK_PRINT
> +#elif defined(CONFIG_MARK_FPROBE)
> +#define MARK MARK_FPROBE
> +#elif defined(CONFIG_MARK_KPROBE)
> +#define MARK MARK_KPROBE
> +#elif defined(CONFIG_MARK_JPROBE)
> +#define MARK MARK_JPROBE
> +#endif
> +
> +#ifndef CONFIG_MARK_NOPRINT
> +#define MARK_NOPRINT MARK_INACTIVE
> +#elif defined(CONFIG_MARK_NOPRINT_FPROBE)
> +#define MARK_NOPRINT MARK_FPROBE
> +#elif defined(CONFIG_MARK_NOPRINT_KPROBE)
> +#define MARK_NOPRINT MARK_KPROBE
> +#elif defined(CONFIG_MARK_NOPRINT_JPROBE)
> +#define MARK_NOPRINT MARK_JPROBE
> +#endif
> +
> +#ifndef CONFIG_MARK_STATIC
> +#define MARK_STATIC MARK_INACTIVE
> +#else
> +#define MARK_STATIC MARK_FPROBE
> +#endif
> +
> +static inline void __mark_check_format(const char *fmt, ...)
> + __attribute__ ((format (printf, 1, 2)));
> +void __mark_check_format(const char *fmt, ...) { }

That last line is confusing (to me). What's it for?
Is it just an empty (inline) function definition?
If so, why repeat the void + function name?

> --- /dev/null
> +++ b/kernel/Kconfig.marker
> @@ -0,0 +1,75 @@
> +# Code markers configuration
> +
> +menu "Marker configuration"
> +
> +
> +config MARK
> + bool "Enable MARK code markers"
> + default y

Please justify using 'y' as the default value.

> + help
> + Activate markers that can call printk or can be instrumented
> + dynamically.
> +
> +choice
> + prompt "MARK code marker behavior"
> + default MARK_KPROBE
> + depends on MARK
> + help
> + Configuration of markers that can call printk or can be
> + instrumented dynamically.
> +
> +config MARK_KPROBE
> + bool "KPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event".

Indent help text by 2 spaces (above & below here).

> +config MARK_JPROBE
> + bool "JPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event"
> + and create a target for a high speed dynamic probe.
> +config MARK_FPROBE
> + bool "FPROBE"
> + ---help---
> + Change markers for a function call.
> +config MARK_PRINT
> + bool "PRINT"
> + ---help---
> + Call printk from the marker.
> +endchoice
> +
> +config MARK_NOPRINT
> + bool "Enable MARK_NOPRINT code markers"
> + default y
> + help
> + Activate markers that cannot call printk.
> +
> +choice
> + prompt "MARK_NOPRINT code marker behavior"
> + default MARK_NOPRINT_KPROBE
> + depends on MARK_NOPRINT
> + help
> + Configuration of markers that cannot call printk.
> +
> +config MARK_NOPRINT_KPROBE
> + bool "KPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event".
> +config MARK_NOPRINT_JPROBE
> + bool "JPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event"
> + and create a target for a high speed dynamic probe.
> +config MARK_NOPRINT_FPROBE
> + bool "FPROBE"
> + ---help---
> + Change markers for a function call.
> +endchoice
> +
> +config MARK_STATIC
> + bool "Enable MARK_STATIC code markers"
> + default n
> + help
> + Activate markers that cannot be instrumented dynamically. They will
> + generate function calls to each function-style probe.
> +
> +endmenu
>
> --- END ---

--
~Randy

2006-09-19 19:08:53

by S. P. Prasanna

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

On Tue, Sep 19, 2006 at 02:34:47PM -0400, Mathieu Desnoyers wrote:
[....]
> --- /dev/null
> +++ b/kernel/Kconfig.marker
> @@ -0,0 +1,75 @@
> +# Code markers configuration
> +
> +menu "Marker configuration"
> +
> +
> +config MARK
> + bool "Enable MARK code markers"
> + default y
> + help
> + Activate markers that can call printk or can be instrumented
> + dynamically.
> +
> +choice
> + prompt "MARK code marker behavior"
> + default MARK_KPROBE
> + depends on MARK
> + help
> + Configuration of markers that can call printk or can be
> + instrumented dynamically.
> +
> +config MARK_KPROBE
> + bool "KPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event".
> +config MARK_JPROBE
> + bool "JPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event"
> + and create a target for a high speed dynamic probe.
> +config MARK_FPROBE
> + bool "FPROBE"
> + ---help---
> + Change markers for a function call.
> +config MARK_PRINT
> + bool "PRINT"
> + ---help---
> + Call printk from the marker.
> +endchoice
> +
> +config MARK_NOPRINT
> + bool "Enable MARK_NOPRINT code markers"
> + default y
> + help
> + Activate markers that cannot call printk.
> +
> +choice
> + prompt "MARK_NOPRINT code marker behavior"
> + default MARK_NOPRINT_KPROBE
> + depends on MARK_NOPRINT
> + help
> + Configuration of markers that cannot call printk.
> +
> +config MARK_NOPRINT_KPROBE
> + bool "KPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event".
> +config MARK_NOPRINT_JPROBE
> + bool "JPROBE"
> + ---help---
> + Change markers for a symbol "__mark_modulename_event"
> + and create a target for a high speed dynamic probe.
> +config MARK_NOPRINT_FPROBE
> + bool "FPROBE"
> + ---help---
> + Change markers for a function call.
> +endchoice
> +
> +config MARK_STATIC
> + bool "Enable MARK_STATIC code markers"
> + default n
> + help
> + Activate markers that cannot be instrumented dynamically. They will
> + generate function calls to each function-style probe.
> +
> +endmenu

I think having so many config options is not a good idea, you can group them
and reduce the number of config options.

Thanks
Prasanna

--
Prasanna S.P.
Linux Technology Center
India Software Labs, IBM Bangalore
Email: [email protected]
Ph: 91-80-41776329

2006-09-19 19:13:23

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi Randy,

Thanks for the comments, see below :

* Randy.Dunlap ([email protected]) wrote:
> > +static inline void __mark_check_format(const char *fmt, ...)
> > + __attribute__ ((format (printf, 1, 2)));
> > +void __mark_check_format(const char *fmt, ...) { }
>
> That last line is confusing (to me). What's it for?
> Is it just an empty (inline) function definition?
> If so, why repeat the void + function name?
>

The goal of this "empty" function is just to have to compiler check the string
format consistency.

I separated the function declaration and implementation because I have seen some
compilers complain about having the two merged together.

I will change it to

static inline __attribute__ ((format (printf, 1, 2)))
void __mark_check_format(const char *fmt, ...)
{ }

And hope every compiler will like it.

The empty implementation is because the function is called (must therefore be
implemented), but I expect the compiler to completely optimize it away.

> > --- /dev/null
> > +++ b/kernel/Kconfig.marker
> > @@ -0,0 +1,75 @@
> > +# Code markers configuration
> > +
> > +menu "Marker configuration"
> > +
> > +
> > +config MARK
> > + bool "Enable MARK code markers"
> > + default y
>
> Please justify using 'y' as the default value.
>

It has to be debated. The default for markers will put a symbol for all the
markers, so that kprobe can easily attach to it. It has no impact that I am
aware of except to boost the number of symbols.

Mathieu

OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2006-09-19 19:24:11

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17


Mathieu Desnoyers <[email protected]> writes:

> Following some very interesting ideas from Martin that shows that
> even static function calls and inlined functions can be used in
> interesting ways with markers to deploy dynamic tracers with easy
> access to local function variables [...]

While that has been an interesting idea, its implementation would need
to be sketched out far beyond the speculative, in order to override
what I perceive as the rough consensus reached during earlier in the
thread.

> [...] These last emails convince me even more that a markup
> mechanism must interface with every kind of instrumentation hooking
> we can think about, both dynamic and static. [...]

If you don't allow yourself to presume on-the-fly function
recompilation, then these markers would need to be made run-time
rather than compile-time configurable. That is, not like this:

> +/* Menu configured markers */
> +#ifndef CONFIG_MARK
> +#define MARK MARK_INACTIVE
> +#elif defined(CONFIG_MARK_PRINT)
> +#define MARK MARK_PRINT
> +#elif defined(CONFIG_MARK_FPROBE)
> +#define MARK MARK_FPROBE
> +#elif defined(CONFIG_MARK_KPROBE)
> +#define MARK MARK_KPROBE
> +#elif defined(CONFIG_MARK_JPROBE)
> +#define MARK MARK_JPROBE
> +#endif

- FChE

2006-09-19 19:36:28

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

* Frank Ch. Eigler ([email protected]) wrote:
> If you don't allow yourself to presume on-the-fly function
> recompilation, then these markers would need to be made run-time
> rather than compile-time configurable. That is, not like this:
>
> > +/* Menu configured markers */
> > +#ifndef CONFIG_MARK
> > +#define MARK MARK_INACTIVE
> > +#elif defined(CONFIG_MARK_PRINT)
> > +#define MARK MARK_PRINT
> > +#elif defined(CONFIG_MARK_FPROBE)
> > +#define MARK MARK_FPROBE
> > +#elif defined(CONFIG_MARK_KPROBE)
> > +#define MARK MARK_KPROBE
> > +#elif defined(CONFIG_MARK_JPROBE)
> > +#define MARK MARK_JPROBE
> > +#endif

By making them run-time configurable, I don't see any whay not to bloat the
kernel. How can be embed calls to printk+function+kprobe+djprobe without
having some kind of performance impact ?

Do you have any suggestion for this ? (or maybe am I missing your point ?)

Mathieu


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2006-09-19 19:46:09

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi -

On Tue, Sep 19, 2006 at 03:36:24PM -0400, Mathieu Desnoyers wrote:
> [...]
> > If you don't allow yourself to presume on-the-fly function
> > recompilation, then these markers would need to be made run-time
> > rather than compile-time configurable. That is, not like this:
> > [...]

> By making them run-time configurable, I don't see any whay not to bloat the
> kernel. How can be embed calls to printk+function+kprobe+djprobe without
> having some kind of performance impact ?

In order to have what we appear to need, we cannot avoid having some
impact. (Even NOPs have impact.)

Suppose that mbligh's clever but speculative idea has some nasty flaw,
once someone tried to reduce it to code. Do you see that markers
along the lines you've posted would be unsatisfactory? With that in
mind, is there point adding such markers now?

- FChE


Attachments:
(No filename) (853.00 B)
(No filename) (189.00 B)
Download all attachments

2006-09-19 20:10:28

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi,

* S. P. Prasanna ([email protected]) wrote:
> I think having so many config options is not a good idea, you can group them
> and reduce the number of config options.
>

Then we would have to determine what the scenarios are. The problem is to cover
all interesting instrumentation mixes efficiently.

I think it could be a good enough list :

Fprobes only
Dynamic + Fprobes (supports dynamic probes and uses fprobes for non probable
code)
Dynamic only
Printk only

Which would be expressed in the following menu :

choice Marker behavior
* Inactive
* Dynamic probes
* Function probes (Fprobes)
* Dynamic probes complemented with Fprobes
* Printk

if selected "Dynamic probes" or "Dynamic probes complemented with Fprobes"
choice2 Dynamic probes behavior
* Kprobes
* Jprobes

Any thoughts ?

Mathieu


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2006-09-19 20:28:07

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

* Frank Ch. Eigler ([email protected]) wrote:
> Hi -
>
> On Tue, Sep 19, 2006 at 03:36:24PM -0400, Mathieu Desnoyers wrote:
> > [...]
> > > If you don't allow yourself to presume on-the-fly function
> > > recompilation, then these markers would need to be made run-time
> > > rather than compile-time configurable. That is, not like this:
> > > [...]
>
> > By making them run-time configurable, I don't see any whay not to bloat the
> > kernel. How can be embed calls to printk+function+kprobe+djprobe without
> > having some kind of performance impact ?
>
> In order to have what we appear to need, we cannot avoid having some
> impact. (Even NOPs have impact.)
>

I am all for giving this decision to the end-user or the distribution which will
configure the kernel. There is no "perfect" or "for all" solution that I am
aware of.

* Users debugging servers will more likely want the kprobe or jprobe option.
* Users interested in high performance tracing will want fprobe and/or jprobe.
* Users interested in embedded systems will want to avoid tools outside the
kernel that rely on module loading : their kernel often not even support
modules. -> fprobe

> Suppose that mbligh's clever but speculative idea has some nasty flaw,
> once someone tried to reduce it to code. Do you see that markers
> along the lines you've posted would be unsatisfactory? With that in
> mind, is there point adding such markers now?
>

M. Bligh's idea is an interesting use of fprobes through modules that could make
dynamic tracing more effective for accessing local variables. It does not
change anything to the various needs of the above-mentioned class of users,
except that it may make life of high performance and server users easier. With
or without his idea, the goal of this marker mechanism is to meet all those
user's different needs.

Mathieu


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


Attachments:
(No filename) (1.96 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments

2006-09-19 21:09:38

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi -

On Tue, Sep 19, 2006 at 04:28:02PM -0400, Mathieu Desnoyers wrote:

> [...]
> > In order to have what we appear to need, we cannot avoid having some
> > impact. (Even NOPs have impact.)

> I am all for giving this decision to the end-user or the
> distribution which will configure the kernel. [...]

If the decision you're talking about is whether all markers in the
system should behave one way or another, then this is a degree of
central control that we have not contemplated during the entire
thread, until now.

It is an end-user such as an administrator who will figure out which
probes/markers/tracing elements need what kind of processing attached.
They don't want to recompile the kernel to switch. They will want
different types of processing, or none at all, for different markers
during a system lifetime.


> * Users debugging servers will more likely want the kprobe or jprobe option.
> * Users interested in high performance tracing will want fprobe
> and/or jprobe.
> * Users interested in embedded systems will want to avoid tools
> outside the kernel that rely on module loading: their kernel often
> not even support modules. -> fprobe

This line of thinking makes me worry that we've forgotten all that we
learned during the weekend. Amongst the insights apparently agreed
was that on *any given system*, a mixture of static an dynamic probing
was likely necessary. For the static part of the instrumentation, a
marker that could be hooked up to either type of probing system was
desirable, which implies some sort of run-time changeability.

(Regarding module loading being considered a blocker for a tool like
systemtap, don't. We will support pre-compiled boot-time
instrumentation loaded from e.g. initrd or linked right into vmlinux.)


> M. Bligh's idea is an interesting use of fprobes through modules
> that could make dynamic tracing more effective for accessing local
> variables. [...]

That's if it works, if it can be implemented, if it does not create
conflicts between multiple tracing/probing systems, if ...

Yes, in theory it might bridge the gulf between compile-time and
run-time configuration, but aren't these all big "if"s right now?


> With or without his idea, the goal of this marker mechanism is to
> meet all those user's different needs.

I don't understand how this new compile-time configured style of
marker is to serve anyone who wants to use something other than a
single distribution-picked tracing/probing tool. I though we had
abandoned that model some time ago.


- FChE


Attachments:
(No filename) (2.49 kB)
(No filename) (189.00 B)
Download all attachments

2006-09-19 22:01:32

by Karim Yaghmour

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17


Frank Ch. Eigler wrote:
> If the decision you're talking about is whether all markers in the
> system should behave one way or another, then this is a degree of
> central control that we have not contemplated during the entire
> thread, until now.
>
> It is an end-user such as an administrator who will figure out which
> probes/markers/tracing elements need what kind of processing attached.
> They don't want to recompile the kernel to switch. They will want
> different types of processing, or none at all, for different markers
> during a system lifetime.

Sure. I'm sure there's just a slight miscommunication here because
my understanding of Mathieu's attempts goes in the direction of
what you're saying here. He might not have gotten it right, but
that can be worked on.

> This line of thinking makes me worry that we've forgotten all that we
> learned during the weekend. Amongst the insights apparently agreed
> was that on *any given system*, a mixture of static an dynamic probing
> was likely necessary. For the static part of the instrumentation, a
> marker that could be hooked up to either type of probing system was
> desirable, which implies some sort of run-time changeability.

Ok. So if I get what you're saying here, you'd like to be able to
overload a marker? Can you suggest a macro that can do what you'd
like. I'm sure Mathieu would gladly take a close look at it.

> That's if it works, if it can be implemented, if it does not create
> conflicts between multiple tracing/probing systems, if ...
>
> Yes, in theory it might bridge the gulf between compile-time and
> run-time configuration, but aren't these all big "if"s right now?

Lots of "if"s in this thread, and this weekend does teach us that
highlighting the problems with other peoples' "if"s is dangerous.
I'm sure you'd agree that concentrating on the areas where there
is agreement would be best.

> I don't understand how this new compile-time configured style of
> marker is to serve anyone who wants to use something other than a
> single distribution-picked tracing/probing tool. I though we had
> abandoned that model some time ago.

We did, and I'm sure there's a fundamental misunderstanding here.
It would likely help if you could give a concrete example of how
you would like Mathieu's proposal be changed or, if you don't
like it at all, what you would like to see. Anything purely
technical that will avoid any of the pitfalls generated by
differences in perspective.

Thanks,

Karim

2006-09-20 13:21:16

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi -

> > [...] For the static part of the instrumentation, a
> > marker that could be hooked up to either type of probing system was
> > desirable, which implies some sort of run-time changeability.
>
> Ok. So if I get what you're saying here, you'd like to be able to
> overload a marker?

Sort of. Remember, we discussed markers as *marking* places and
things, with the intent that they be decoupled from the actual
*action* that is taken when the marker is hit.

> Can you suggest a macro that can do what you'd like. [...]

Compare the kind of marker I showed at OLS and presently supported by
systemtap. Its unparametrized version looks like this:

#define STAP_MARK(name) do { \
static void (*__mark_##name##_)(); \
if (unlikely (__mark_##name##_)) \
(void) (__mark_##name##_()); \
} while (0)

A tracing/probing tool would hook up to a particular and specific
marker at run time by locating the __mark_NAME static variable (a
function pointer) in the data segment, for example using the ordinary
symbol table, and swapping into it the address of a compatible
back-end handler function. When a particular tracing/probing session
ends, the function pointer is reset to null.

Note that this technique:

- operates at run time
- is portable
- in its parametrized variants, is type-safe
- does not require any future technology
- does impose some overhead even when a marker is not active


- FChE


Attachments:
(No filename) (1.38 kB)
(No filename) (189.00 B)
Download all attachments

2006-09-20 13:43:48

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

* Frank Ch. Eigler ([email protected]) wrote:
> Hi -
>
> > > [...] For the static part of the instrumentation, a
> > > marker that could be hooked up to either type of probing system was
> > > desirable, which implies some sort of run-time changeability.
> >
> > Ok. So if I get what you're saying here, you'd like to be able to
> > overload a marker?
>
> Sort of. Remember, we discussed markers as *marking* places and
> things, with the intent that they be decoupled from the actual
> *action* that is taken when the marker is hit.
>
> > Can you suggest a macro that can do what you'd like. [...]
>
> Compare the kind of marker I showed at OLS and presently supported by
> systemtap. Its unparametrized version looks like this:
>
> #define STAP_MARK(name) do { \
> static void (*__mark_##name##_)(); \
> if (unlikely (__mark_##name##_)) \
> (void) (__mark_##name##_()); \
> } while (0)
>
> A tracing/probing tool would hook up to a particular and specific
> marker at run time by locating the __mark_NAME static variable (a
> function pointer) in the data segment, for example using the ordinary
> symbol table, and swapping into it the address of a compatible
> back-end handler function. When a particular tracing/probing session
> ends, the function pointer is reset to null.
>
> Note that this technique:
>
> - operates at run time
> - is portable
> - in its parametrized variants, is type-safe
> - does not require any future technology
> - does impose some overhead even when a marker is not active
>
>
Hi Frank,

Yes, I think there is much to gain to switch from the 5 nops "jumpprobe" to
this scheme. In its parametrized variant, the jump will probably jump over a
stack setup and function call. Do you think I should simply switch from the
5 nops marker to this technique ? I guess the performance impact of a
predicted branch will be similar to 5 nops anyway...

The clear advantage I see in the parametrized variant is that the parameters
will be ready for the called function : it makes it trivial to access any
variable from the traced function.

Mathieu


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


Attachments:
(No filename) (2.20 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments

2006-09-20 13:51:20

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Thinking about it, I would leave a choice of two options :

1 - the probe mechanism you described at OLS + passing arguments
2 - a simple symbol for kprobe

Any thoughts ?

Mathieu

* Frank Ch. Eigler ([email protected]) wrote:
> Hi -
>
> > > [...] For the static part of the instrumentation, a
> > > marker that could be hooked up to either type of probing system was
> > > desirable, which implies some sort of run-time changeability.
> >
> > Ok. So if I get what you're saying here, you'd like to be able to
> > overload a marker?
>
> Sort of. Remember, we discussed markers as *marking* places and
> things, with the intent that they be decoupled from the actual
> *action* that is taken when the marker is hit.
>
> > Can you suggest a macro that can do what you'd like. [...]
>
> Compare the kind of marker I showed at OLS and presently supported by
> systemtap. Its unparametrized version looks like this:
>
> #define STAP_MARK(name) do { \
> static void (*__mark_##name##_)(); \
> if (unlikely (__mark_##name##_)) \
> (void) (__mark_##name##_()); \
> } while (0)
>
> A tracing/probing tool would hook up to a particular and specific
> marker at run time by locating the __mark_NAME static variable (a
> function pointer) in the data segment, for example using the ordinary
> symbol table, and swapping into it the address of a compatible
> back-end handler function. When a particular tracing/probing session
> ends, the function pointer is reset to null.
>
> Note that this technique:
>
> - operates at run time
> - is portable
> - in its parametrized variants, is type-safe
> - does not require any future technology
> - does impose some overhead even when a marker is not active
>
>
> - FChE


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


Attachments:
(No filename) (1.84 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments

2006-09-20 14:57:42

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi Frank,

Here is a revised proposal (just the marker.h). Do you have ideas on how we can
export the function symbol ? (is it necessary ?)

Any thoughts ?

----- BEGIN -----


#include <asm/marker.h>

#ifdef CONFIG_MARK_SYMBOL
#define MARK_SYM(name) \
do { \
__asm__ ( "__mark_kprobe_" #name ":" ); \
} while(0)
#else
#define MARK_SYM(name)
#endif


#ifdef CONFIG_MARK_CALL
#define MARK_CALL(name, format, args...) \
do {\
static void (*__mark_call_##name##_)(const char *fmt, ...) \
asm ("__mark_call_"#name); \
if (unlikely (__mark_call_##name##_)) \
(void) (__mark_call_##name##_(format, ## args)); \
} while(0)
#else
#define MARK_CALL(name, format, args...)
#endif

#define MARK(name, format, args...) \
do { \
__mark_check_format(format, ## args); \
MARK_SYM(name); \
MARK_CALL(name, format, ## args); \
} while(0)

static inline __attribute__ ((format (printf, 1, 2)))
void __mark_check_format(const char *fmt, ...)
{ }


---- END ----



* Mathieu Desnoyers ([email protected]) wrote:
> * Frank Ch. Eigler ([email protected]) wrote:
> > Hi -
> >
> > > > [...] For the static part of the instrumentation, a
> > > > marker that could be hooked up to either type of probing system was
> > > > desirable, which implies some sort of run-time changeability.
> > >
> > > Ok. So if I get what you're saying here, you'd like to be able to
> > > overload a marker?
> >
> > Sort of. Remember, we discussed markers as *marking* places and
> > things, with the intent that they be decoupled from the actual
> > *action* that is taken when the marker is hit.
> >
> > > Can you suggest a macro that can do what you'd like. [...]
> >
> > Compare the kind of marker I showed at OLS and presently supported by
> > systemtap. Its unparametrized version looks like this:
> >
> > #define STAP_MARK(name) do { \
> > static void (*__mark_##name##_)(); \
> > if (unlikely (__mark_##name##_)) \
> > (void) (__mark_##name##_()); \
> > } while (0)
> >
> > A tracing/probing tool would hook up to a particular and specific
> > marker at run time by locating the __mark_NAME static variable (a
> > function pointer) in the data segment, for example using the ordinary
> > symbol table, and swapping into it the address of a compatible
> > back-end handler function. When a particular tracing/probing session
> > ends, the function pointer is reset to null.
> >
> > Note that this technique:
> >
> > - operates at run time
> > - is portable
> > - in its parametrized variants, is type-safe
> > - does not require any future technology
> > - does impose some overhead even when a marker is not active
> >
> >
> Hi Frank,
>
> Yes, I think there is much to gain to switch from the 5 nops "jumpprobe" to
> this scheme. In its parametrized variant, the jump will probably jump over a
> stack setup and function call. Do you think I should simply switch from the
> 5 nops marker to this technique ? I guess the performance impact of a
> predicted branch will be similar to 5 nops anyway...
>
> The clear advantage I see in the parametrized variant is that the parameters
> will be ready for the called function : it makes it trivial to access any
> variable from the traced function.
>
> Mathieu
>
>
> OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
> Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


Attachments:
(No filename) (3.63 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments

2006-09-20 15:55:13

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

Hi -

Mathieu Desnoyers wrote:

> [...] Do you have ideas on how we can export the function symbol?
> (is it necessary ?)

It turns out that static variables like that get included in the
ordinary symbol tables along with other (un)initialized globals - it
has been making it into /proc/kallsyms. If the normal symbol table is
not available, then some other measure would be needed to find the
variable containing the function pointer.

> [...]
> #define MARK(name, format, args...) \
> do { \
> __mark_check_format(format, ## args); \
> MARK_SYM(name); \
> MARK_CALL(name, format, ## args); \
> } while(0)

While varargs simplify some things, it sacrifices type-safety, in that
a handler function would have to be varargs too. For the systemtap
marker prototype, parametrized variants use scores of (automatically
generated) macros, with different arity/type permutations, each
self-describing and type-safe.

Regarding a marker variant that would require kprobes (inserting a
labelled NOP or few), it may be an appropriate choice where dormant
marker overhead must be minimal and robust parameter passing is less
important.

- FChE


Attachments:
(No filename) (1.17 kB)
(No filename) (189.00 B)
Download all attachments

2006-09-20 16:37:34

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

* Frank Ch. Eigler ([email protected]) wrote:
> While varargs simplify some things, it sacrifices type-safety, in that
> a handler function would have to be varargs too. For the systemtap
> marker prototype, parametrized variants use scores of (automatically
> generated) macros, with different arity/type permutations, each
> self-describing and type-safe.
>
The format string could be used to provide some kind of type safety : the
compiler will check that arguments match the format string provided. From there,
a simple script can parse the format string and generate a function prototype
accordingly. Correct me if I am wrong, but I think that if the called function
has the exact same parameter layout as the varargs caller stack, the function
call should work (without the called function having a variable arguments list).

> Regarding a marker variant that would require kprobes (inserting a
> labelled NOP or few), it may be an appropriate choice where dormant
> marker overhead must be minimal and robust parameter passing is less
> important.
>

I even came with the following idea :

Instead of using a test + conditional predicted branch, we could jump to an
address locate just after the probe.

jmp to over_symbol address
call_symbol
call function pointer
over_symbol

This way, we could have portable :
- direct inconditional jump to an address following the marked site when
disabled
- Enable stack setup and function call by setting the function pointer and
changing the jmp target to be "call_symbol"
- Enable "direct jump to arbitrary assembly" by setting the jump target to
arbitrary code, where this code can end by jumping to over_symbol.

The generated binary on x86 looks like :

10: a1 24 00 00 00 mov 0x24,%eax
15: ff e0 jmp *%eax
17: c7 44 24 04 01 00 00 movl $0x1,0x4(%esp)
1e: 00
1f: c7 04 24 00 00 00 00 movl $0x0,(%esp)
26: ff 15 1c 00 00 00 call *0x1c

With those symbols :

f8875c08 b __mark_subsys_mark1_call [test_mark] (function pointer)
f8875620 d __mark_subsys_mark1_jump_call [test_mark]
f8875624 d __mark_subsys_mark1_jump_over [test_mark]

The macro doing that :

#define MARK_CALL(name, format, args...) \
do {\
__label__ call_label, over_label; \
static void *__mark_##name##_jump_over \
asm ("__mark_"#name"_jump_over") = \
&&over_label; \
static void *__mark_##name##_jump_call \
asm ("__mark_"#name"_jump_call") \
__attribute__((unused)) = \
&&call_label; \
static void (*__mark_##name##_call)(const char *fmt, ...) \
asm ("__mark_"#name"_call") = __mark_empty_function; \
goto *__mark_##name##_jump_over; \
call_label: \
(void) (__mark_##name##_call(format, ## args)); \
over_label: \
do {} while(0); \
} while(0)

A problem I saw in your approach was that there was no way to remove the
function pointer without taking the risk to break everything.

The solution I came up with is to set the function to an empty
__mark_empty_function when disabled, and set another function pointer to enable
it.

Any thoughts ?

Mathieu


OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


Attachments:
(No filename) (3.45 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments

2006-09-20 17:19:26

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] Linux Kernel Markers 0.2 for Linux 2.6.17

And here is the sample module to use my jump-marker symbols :

(yes, it works!)

Adresses are taken by hand from /proc/kallsyms for now.

---BEGIN---

/* test-mark.c
*
*/

#include <linux/marker.h>
#include <linux/module.h>

static void **__mark_subsys_mark1_call = (void**)0xf887580c;
static void **__mark_subsys_mark1_jump_over = (void**)0xf8875814;
static void **__mark_subsys_mark1_jump_call = (void*)0xf8875810;
static void *__this_mark_empty_function = (void*)0xf8875000;

static void *saved_over;

void do_mark1(const char *format, int value)
{
printk("value is %d\n", value);
}

int init_module(void)
{
*__mark_subsys_mark1_call = (void*)do_mark1;
saved_over = *__mark_subsys_mark1_jump_over;
*__mark_subsys_mark1_jump_over = *__mark_subsys_mark1_jump_call;

return 0;
}

void cleanup_module(void)
{
*__mark_subsys_mark1_jump_over = saved_over;
*__mark_subsys_mark1_call = __this_mark_empty_function;
}

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mathieu Desnoyers");
MODULE_DESCRIPTION("Marker Test");

---END---




OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


Attachments:
(No filename) (1.15 kB)
signature.asc (189.00 B)
Digital signature
Download all attachments