Subject: [PATCH -tip tracing/kprobes v2] Powerpc port of the kprobe-based event tracer

This patch ports the kprobe-based event tracer to powerpc. This patch
is based in x86 port. This brings powerpc on par with x86.

ChangeLog - v2:
- Removed regs_get_argument_nth() API as function argument access syntax
is dropped from kprobe-tracer. Please refer to patches below on Linux PPC
mailing list:
http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-January/079331.html
http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-January/079332.html
- Rebased to commit f61d6b1dcb06d62bc20d40e51c7a1e80275a80ab of -tip

This patch is dependent on following patch:
http://lists.ozlabs.org/pipermail/linuxppc-dev/2010-January/079331.html

Port the following API's to ppc for accessing registers and stack entries
from pt_regs.

- regs_query_register_offset(const char *name)
Query the offset of "name" register.

- regs_query_register_name(unsigned int offset)
Query the name of register by its offset.

- regs_get_register(struct pt_regs *regs, unsigned int offset)
Get the value of a register by its offset.

- regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
Check the address is in the kernel stack.

- regs_get_kernel_stack_nth(struct pt_regs *reg, unsigned int nth)
Get Nth entry of the kernel stack. (N >= 0)

Signed-off-by: Mahesh Salgaonkar <[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
---

arch/powerpc/include/asm/ptrace.h | 61 ++++++++++++++++++++++
arch/powerpc/kernel/ptrace.c | 102 +++++++++++++++++++++++++++++++++++++
kernel/trace/Kconfig | 2 -
3 files changed, 164 insertions(+), 1 deletions(-)


diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index cbd759e..cb2eeda 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -83,6 +83,7 @@ struct pt_regs {

#define instruction_pointer(regs) ((regs)->nip)
#define user_stack_pointer(regs) ((regs)->gpr[1])
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
#define regs_return_value(regs) ((regs)->gpr[3])

#ifdef CONFIG_SMP
@@ -131,6 +132,66 @@ do { \
} while (0)
#endif /* __powerpc64__ */

+/* Query offset/name of register from its name/offset */
+#include <linux/stddef.h>
+#include <linux/thread_info.h>
+extern int regs_query_register_offset(const char *name);
+extern const char *regs_query_register_name(unsigned int offset);
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, result))
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs: pt_regs from which register value is gotten
+ * @offset: offset number of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+ unsigned int offset)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return 0;
+ return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+
+static inline bool regs_within_kernel_stack(struct pt_regs *regs,
+ unsigned long addr)
+{
+ return ((addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+ unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return *addr;
+ else
+ return 0;
+}
+
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index ef14988..e816aba 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -39,6 +39,108 @@
#include <asm/system.h>

/*
+ * The parameter save area on the stack is used to store arguments being passed
+ * to callee function and is located at fixed offset from stack pointer.
+ */
+#ifdef CONFIG_PPC32
+#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
+#else /* CONFIG_PPC32 */
+#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
+#endif
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ REG_OFFSET_NAME(gpr[0]),
+ REG_OFFSET_NAME(gpr[1]),
+ REG_OFFSET_NAME(gpr[2]),
+ REG_OFFSET_NAME(gpr[3]),
+ REG_OFFSET_NAME(gpr[4]),
+ REG_OFFSET_NAME(gpr[5]),
+ REG_OFFSET_NAME(gpr[6]),
+ REG_OFFSET_NAME(gpr[7]),
+ REG_OFFSET_NAME(gpr[8]),
+ REG_OFFSET_NAME(gpr[9]),
+ REG_OFFSET_NAME(gpr[10]),
+ REG_OFFSET_NAME(gpr[11]),
+ REG_OFFSET_NAME(gpr[12]),
+ REG_OFFSET_NAME(gpr[13]),
+ REG_OFFSET_NAME(gpr[14]),
+ REG_OFFSET_NAME(gpr[15]),
+ REG_OFFSET_NAME(gpr[16]),
+ REG_OFFSET_NAME(gpr[17]),
+ REG_OFFSET_NAME(gpr[18]),
+ REG_OFFSET_NAME(gpr[19]),
+ REG_OFFSET_NAME(gpr[20]),
+ REG_OFFSET_NAME(gpr[21]),
+ REG_OFFSET_NAME(gpr[22]),
+ REG_OFFSET_NAME(gpr[23]),
+ REG_OFFSET_NAME(gpr[24]),
+ REG_OFFSET_NAME(gpr[25]),
+ REG_OFFSET_NAME(gpr[26]),
+ REG_OFFSET_NAME(gpr[27]),
+ REG_OFFSET_NAME(gpr[28]),
+ REG_OFFSET_NAME(gpr[29]),
+ REG_OFFSET_NAME(gpr[30]),
+ REG_OFFSET_NAME(gpr[31]),
+ REG_OFFSET_NAME(nip),
+ REG_OFFSET_NAME(msr),
+ REG_OFFSET_NAME(orig_gpr3),
+ REG_OFFSET_NAME(ctr),
+ REG_OFFSET_NAME(link),
+ REG_OFFSET_NAME(xer),
+ REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+ REG_OFFSET_NAME(softe),
+#else
+ REG_OFFSET_NAME(mq),
+#endif
+ REG_OFFSET_NAME(trap),
+ REG_OFFSET_NAME(dar),
+ REG_OFFSET_NAME(dsisr),
+ REG_OFFSET_NAME(result),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
*/
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a..4d95966 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -451,7 +451,7 @@ config BLK_DEV_IO_TRACE

config KPROBE_EVENT
depends on KPROBES
- depends on X86
+ depends on X86 || PPC
bool "Enable kprobes-based dynamic events"
select TRACING
default y


Subject: Re: [PATCH -tip tracing/kprobes v2] Powerpc port of the kprobe-based event tracer

Benjamin Herrenschmidt wrote:
> Hi Mahesh !
>
Hi Benjamin,

>> +/**
>> + * regs_within_kernel_stack() - check the address in the stack
>> + * @regs: pt_regs which contains kernel stack pointer.
>> + * @addr: address which is checked.
>> + *
>> + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
>> + * If @addr is within the kernel stack, it returns true. If not, returns false.
>> + */
>> +
>> +static inline bool regs_within_kernel_stack(struct pt_regs *regs,
>> + unsigned long addr)
>> +{
>> + return ((addr & ~(THREAD_SIZE - 1)) ==
>> + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
>> +}
>
> Out of curiosity, what is the above meant for ? I'm trying to understand
> because it will not work with such things as interrupt or softirq
> stack...
>
This function is used to validate whether the given address falls within
the stack boundary. This gets invoked through kprobe pre/post handler
routine's while fetching nth entry from the stack. Even though it is
called in interrupt context, the pt_regs that we are using is in context
of kernel routine where the probe was hit.

>> +/**
>> + * regs_get_kernel_stack_nth() - get Nth entry of the stack
>> + * @regs: pt_regs which contains kernel stack pointer.
>> + * @n: stack entry number.
>> + *
>> + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
>> + * is specified by @regs. If the @n th entry is NOT in the kernel stack,
>> + * this returns 0.
>> + */
>> +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
>> + unsigned int n)
>> +{
>> + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
>> + addr += n;
>> + if (regs_within_kernel_stack(regs, (unsigned long)addr))
>> + return *addr;
>> + else
>> + return 0;
>> +}
>
> Is this meant to fetch stack based arguments or do backtraces or similar
> or does this have a different purpose ?
>
The purpose is to allow kernel developer to fetch/monitor n'th entry on
the stack when the probe is hit, through ftrace plugin. It can also be
used to fetch stack based arguments.

Following is the example how kernel developer may use it to fetch n'th
entry from the stack:

# echo p:myprobe do_sys_open S1=\$stack1 S2=\$stack2 >
/sys/kernel/debug/tracing/kprobe_events
# echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable

# cat trace
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
pcscd-4159 [001] 668343.548210: myprobe:
(.do_sys_open+0x0/0x1a4) S1=c000000000c29d20 S2=c00000000019d774
pcscd-4159 [001] 668343.548308: myprobe:
(.do_sys_open+0x0/0x1a4) S1=c000000000c29d20 S2=c00000000019d774
pcscd-4159 [001] 668343.548331: myprobe:
(.do_sys_open+0x0/0x1a4) S1=c000000000c29d20 S2=c0000000001f1f30
pcscd-4159 [001] 668343.548377: myprobe:
(.do_sys_open+0x0/0x1a4) S1=c000000000c29d20 S2=c00000000019d774
pcscd-4159 [001] 668343.548399: myprobe:
(.do_sys_open+0x0/0x1a4) S1=c000000000c29d20 S2=c00000000019d774


>> /*
>> * These are defined as per linux/ptrace.h, which see.
>> */
>> diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
>> index ef14988..e816aba 100644
>> --- a/arch/powerpc/kernel/ptrace.c
>> +++ b/arch/powerpc/kernel/ptrace.c
>> @@ -39,6 +39,108 @@
>> #include <asm/system.h>
>>
>> /*
>> + * The parameter save area on the stack is used to store arguments being passed
>> + * to callee function and is located at fixed offset from stack pointer.
>> + */
>> +#ifdef CONFIG_PPC32
>> +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
>> +#else /* CONFIG_PPC32 */
>> +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
>> +#endif
>> +
>> +struct pt_regs_offset {
>> + const char *name;
>> + int offset;
>> +};
>> +
>> +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
>> +#define REG_OFFSET_END {.name = NULL, .offset = 0}
>> +
>> +static const struct pt_regs_offset regoffset_table[] = {
>> + REG_OFFSET_NAME(gpr[0]),
>> + REG_OFFSET_NAME(gpr[1]),
>> + REG_OFFSET_NAME(gpr[2]),
>> + REG_OFFSET_NAME(gpr[3]),
>> + REG_OFFSET_NAME(gpr[4]),
>> + REG_OFFSET_NAME(gpr[5]),
>> + REG_OFFSET_NAME(gpr[6]),
>> + REG_OFFSET_NAME(gpr[7]),
>> + REG_OFFSET_NAME(gpr[8]),
>> + REG_OFFSET_NAME(gpr[9]),
>> + REG_OFFSET_NAME(gpr[10]),
>> + REG_OFFSET_NAME(gpr[11]),
>> + REG_OFFSET_NAME(gpr[12]),
>> + REG_OFFSET_NAME(gpr[13]),
>> + REG_OFFSET_NAME(gpr[14]),
>> + REG_OFFSET_NAME(gpr[15]),
>> + REG_OFFSET_NAME(gpr[16]),
>> + REG_OFFSET_NAME(gpr[17]),
>> + REG_OFFSET_NAME(gpr[18]),
>> + REG_OFFSET_NAME(gpr[19]),
>> + REG_OFFSET_NAME(gpr[20]),
>> + REG_OFFSET_NAME(gpr[21]),
>> + REG_OFFSET_NAME(gpr[22]),
>> + REG_OFFSET_NAME(gpr[23]),
>> + REG_OFFSET_NAME(gpr[24]),
>> + REG_OFFSET_NAME(gpr[25]),
>> + REG_OFFSET_NAME(gpr[26]),
>> + REG_OFFSET_NAME(gpr[27]),
>> + REG_OFFSET_NAME(gpr[28]),
>> + REG_OFFSET_NAME(gpr[29]),
>> + REG_OFFSET_NAME(gpr[30]),
>> + REG_OFFSET_NAME(gpr[31]),
>
> I find it weird that you have the [ an ] in the name ... We usually name
> these guys gpr0...gpr31 or even r0...r31. Is that name user visible ?
> Maybe you should use a different macro GPR_OFFSET_NAME(num) that
> generates both gpr[num] for the offsetof and r##num for the register
> name.
>
Yup, make sense. I will make the change. Will use gpr0...gpr31 to be
consistent with pt_regs structure member name "gpr".

>> + REG_OFFSET_NAME(nip),
>> + REG_OFFSET_NAME(msr),
>> + REG_OFFSET_NAME(orig_gpr3),
>> + REG_OFFSET_NAME(ctr),
>> + REG_OFFSET_NAME(link),
>> + REG_OFFSET_NAME(xer),
>> + REG_OFFSET_NAME(ccr),
>> +#ifdef CONFIG_PPC64
>> + REG_OFFSET_NAME(softe),
>> +#else
>> + REG_OFFSET_NAME(mq),
>> +#endif
>> + REG_OFFSET_NAME(trap),
>> + REG_OFFSET_NAME(dar),
>> + REG_OFFSET_NAME(dsisr),
>> + REG_OFFSET_NAME(result),
>> + REG_OFFSET_END,
>> +};
>
> Do you need to expose orig_gpr3 and result there ?
>
I had included them to give an exact representation of pt_regs
structure. However, I am ok with dropping them.
> Cheers,
> Ben.
>
>

Thanks for reviewing.
-Mahesh.

2010-01-15 03:13:22

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH -tip tracing/kprobes v2] Powerpc port of the kprobe-based event tracer

Hi Mahesh !

> +/**
> + * regs_within_kernel_stack() - check the address in the stack
> + * @regs: pt_regs which contains kernel stack pointer.
> + * @addr: address which is checked.
> + *
> + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
> + * If @addr is within the kernel stack, it returns true. If not, returns false.
> + */
> +
> +static inline bool regs_within_kernel_stack(struct pt_regs *regs,
> + unsigned long addr)
> +{
> + return ((addr & ~(THREAD_SIZE - 1)) ==
> + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
> +}

Out of curiosity, what is the above meant for ? I'm trying to understand
because it will not work with such things as interrupt or softirq
stack...

> +/**
> + * regs_get_kernel_stack_nth() - get Nth entry of the stack
> + * @regs: pt_regs which contains kernel stack pointer.
> + * @n: stack entry number.
> + *
> + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
> + * is specified by @regs. If the @n th entry is NOT in the kernel stack,
> + * this returns 0.
> + */
> +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
> + unsigned int n)
> +{
> + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
> + addr += n;
> + if (regs_within_kernel_stack(regs, (unsigned long)addr))
> + return *addr;
> + else
> + return 0;
> +}

Is this meant to fetch stack based arguments or do backtraces or similar
or does this have a different purpose ?

> /*
> * These are defined as per linux/ptrace.h, which see.
> */
> diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
> index ef14988..e816aba 100644
> --- a/arch/powerpc/kernel/ptrace.c
> +++ b/arch/powerpc/kernel/ptrace.c
> @@ -39,6 +39,108 @@
> #include <asm/system.h>
>
> /*
> + * The parameter save area on the stack is used to store arguments being passed
> + * to callee function and is located at fixed offset from stack pointer.
> + */
> +#ifdef CONFIG_PPC32
> +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
> +#else /* CONFIG_PPC32 */
> +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
> +#endif
> +
> +struct pt_regs_offset {
> + const char *name;
> + int offset;
> +};
> +
> +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
> +#define REG_OFFSET_END {.name = NULL, .offset = 0}
> +
> +static const struct pt_regs_offset regoffset_table[] = {
> + REG_OFFSET_NAME(gpr[0]),
> + REG_OFFSET_NAME(gpr[1]),
> + REG_OFFSET_NAME(gpr[2]),
> + REG_OFFSET_NAME(gpr[3]),
> + REG_OFFSET_NAME(gpr[4]),
> + REG_OFFSET_NAME(gpr[5]),
> + REG_OFFSET_NAME(gpr[6]),
> + REG_OFFSET_NAME(gpr[7]),
> + REG_OFFSET_NAME(gpr[8]),
> + REG_OFFSET_NAME(gpr[9]),
> + REG_OFFSET_NAME(gpr[10]),
> + REG_OFFSET_NAME(gpr[11]),
> + REG_OFFSET_NAME(gpr[12]),
> + REG_OFFSET_NAME(gpr[13]),
> + REG_OFFSET_NAME(gpr[14]),
> + REG_OFFSET_NAME(gpr[15]),
> + REG_OFFSET_NAME(gpr[16]),
> + REG_OFFSET_NAME(gpr[17]),
> + REG_OFFSET_NAME(gpr[18]),
> + REG_OFFSET_NAME(gpr[19]),
> + REG_OFFSET_NAME(gpr[20]),
> + REG_OFFSET_NAME(gpr[21]),
> + REG_OFFSET_NAME(gpr[22]),
> + REG_OFFSET_NAME(gpr[23]),
> + REG_OFFSET_NAME(gpr[24]),
> + REG_OFFSET_NAME(gpr[25]),
> + REG_OFFSET_NAME(gpr[26]),
> + REG_OFFSET_NAME(gpr[27]),
> + REG_OFFSET_NAME(gpr[28]),
> + REG_OFFSET_NAME(gpr[29]),
> + REG_OFFSET_NAME(gpr[30]),
> + REG_OFFSET_NAME(gpr[31]),

I find it weird that you have the [ an ] in the name ... We usually name
these guys gpr0...gpr31 or even r0...r31. Is that name user visible ?
Maybe you should use a different macro GPR_OFFSET_NAME(num) that
generates both gpr[num] for the offsetof and r##num for the register
name.

> + REG_OFFSET_NAME(nip),
> + REG_OFFSET_NAME(msr),
> + REG_OFFSET_NAME(orig_gpr3),
> + REG_OFFSET_NAME(ctr),
> + REG_OFFSET_NAME(link),
> + REG_OFFSET_NAME(xer),
> + REG_OFFSET_NAME(ccr),
> +#ifdef CONFIG_PPC64
> + REG_OFFSET_NAME(softe),
> +#else
> + REG_OFFSET_NAME(mq),
> +#endif
> + REG_OFFSET_NAME(trap),
> + REG_OFFSET_NAME(dar),
> + REG_OFFSET_NAME(dsisr),
> + REG_OFFSET_NAME(result),
> + REG_OFFSET_END,
> +};

Do you need to expose orig_gpr3 and result there ?

Cheers,
Ben.