2022-11-16 12:24:18

by Song Chen

[permalink] [raw]
Subject: [PATCH 1/4] kernel/trace: Introduce new APIs to process probe arguments

Introduce 3 new APIs:
1. trace_probe_get_data_size: get arguments' data size
2. trace_probe_store_args: store aruguments into ring buffer
3. trace_probe_print_args: print arguments into trace file

Those APIs are going to merge similar implementations respectively
in kprobe/uprobe/eprobe.

Signed-off-by: Song Chen <[email protected]>
---
kernel/trace/trace_probe.c | 305 ++++++++++++++++++++++++++++++
kernel/trace/trace_probe.h | 5 +
kernel/trace/trace_probe_common.h | 69 +++++++
kernel/trace/trace_probe_user.h | 95 ++++++++++
4 files changed, 474 insertions(+)
create mode 100644 kernel/trace/trace_probe_common.h
create mode 100644 kernel/trace/trace_probe_user.h

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 36dff277de46..303f057bd2f7 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -12,6 +12,9 @@
#define pr_fmt(fmt) "trace_probe: " fmt

#include "trace_probe.h"
+#include "trace_probe_kernel.h"
+#include "trace_probe_user.h"
+#include "trace_probe_common.h"

#undef C
#define C(a, b) b
@@ -1218,3 +1221,305 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char

return ret;
}
+
+/* From the 2nd stage, routine is same */
+static nokprobe_inline int
+process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
+ void *dest, void *base, int flags)
+{
+ struct fetch_insn *s3 = NULL;
+ int total = 0, ret = 0, i = 0;
+ u32 loc = 0;
+ unsigned long lval = val;
+ int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
+
+stage2:
+ /* 2nd stage: dereference memory if needed */
+ do {
+ if (code->op == FETCH_OP_DEREF) {
+ lval = val;
+ ret = probe_mem_read(&val, (void *)val + code->offset,
+ sizeof(val));
+ } else if (code->op == FETCH_OP_UDEREF) {
+ lval = val;
+ ret = probe_mem_read_user(&val,
+ (void *)val + code->offset, sizeof(val));
+ } else
+ break;
+ if (ret)
+ return ret;
+ code++;
+ } while (1);
+
+ s3 = code;
+stage3:
+ /* 3rd stage: store value to buffer */
+ if (unlikely(!dest)) {
+ if (code->op == FETCH_OP_ST_STRING) {
+ if (is_uprobe)
+ ret = user_fetch_store_strlen(val + code->offset);
+ else
+ ret = kern_fetch_store_strlen(val + code->offset);
+ code++;
+ goto array;
+ } else if (code->op == FETCH_OP_ST_USTRING) {
+ if (is_uprobe)
+ ret += user_fetch_store_strlen_user(val + code->offset);
+ else
+ ret += kern_fetch_store_strlen_user(val + code->offset);
+ code++;
+ goto array;
+ } else
+ return -EILSEQ;
+ }
+
+ switch (code->op) {
+ case FETCH_OP_ST_RAW:
+ fetch_store_raw(val, code, dest);
+ break;
+ case FETCH_OP_ST_MEM:
+ probe_mem_read(dest, (void *)val + code->offset, code->size);
+ break;
+ case FETCH_OP_ST_UMEM:
+ probe_mem_read_user(dest, (void *)val + code->offset, code->size);
+ break;
+ case FETCH_OP_ST_STRING:
+ loc = *(u32 *)dest;
+ if (is_uprobe)
+ ret = user_fetch_store_string(val + code->offset, dest, base);
+ else
+ ret = kern_fetch_store_string(val + code->offset, dest, base);
+ break;
+ case FETCH_OP_ST_USTRING:
+ loc = *(u32 *)dest;
+ if (is_uprobe)
+ ret = user_fetch_store_string_user(val + code->offset, dest, base);
+ else
+ ret = kern_fetch_store_string_user(val + code->offset, dest, base);
+ break;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ /* 4th stage: modify stored value if needed */
+ if (code->op == FETCH_OP_MOD_BF) {
+ fetch_apply_bitfield(code, dest);
+ code++;
+ }
+
+array:
+ /* the last stage: Loop on array */
+ if (code->op == FETCH_OP_LP_ARRAY) {
+ total += ret;
+ if (++i < code->param) {
+ code = s3;
+ if (s3->op != FETCH_OP_ST_STRING &&
+ s3->op != FETCH_OP_ST_USTRING) {
+ dest += s3->size;
+ val += s3->size;
+ goto stage3;
+ }
+ code--;
+ val = lval + sizeof(char *);
+ if (dest) {
+ dest += sizeof(u32);
+ *(u32 *)dest = update_data_loc(loc, ret);
+ }
+ goto stage2;
+ }
+ code++;
+ ret = total;
+ }
+
+ return code->op == FETCH_OP_END ? ret : -EILSEQ;
+}
+
+static unsigned long get_event_field(struct fetch_insn *code, void *rec)
+{
+ struct ftrace_event_field *field = code->data;
+ unsigned long val;
+ void *addr;
+
+ addr = rec + field->offset;
+
+ if (is_string_field(field)) {
+ switch (field->filter_type) {
+ case FILTER_DYN_STRING:
+ val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
+ break;
+ case FILTER_RDYN_STRING:
+ val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
+ break;
+ case FILTER_STATIC_STRING:
+ val = (unsigned long)addr;
+ break;
+ case FILTER_PTR_STRING:
+ val = (unsigned long)(*(char *)addr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+ return val;
+ }
+
+ switch (field->size) {
+ case 1:
+ if (field->is_signed)
+ val = *(char *)addr;
+ else
+ val = *(unsigned char *)addr;
+ break;
+ case 2:
+ if (field->is_signed)
+ val = *(short *)addr;
+ else
+ val = *(unsigned short *)addr;
+ break;
+ case 4:
+ if (field->is_signed)
+ val = *(int *)addr;
+ else
+ val = *(unsigned int *)addr;
+ break;
+ default:
+ if (field->is_signed)
+ val = *(long *)addr;
+ else
+ val = *(unsigned long *)addr;
+ break;
+ }
+ return val;
+}
+
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+ void *base, int flags)
+{
+ struct pt_regs *regs = rec;
+ unsigned long val;
+ int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
+
+retry:
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_REG:
+ val = regs_get_register(regs, code->param);
+ break;
+ case FETCH_OP_STACK:
+ if (is_uprobe)
+ val = get_user_stack_nth(regs, code->param);
+ else
+ val = regs_get_kernel_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ if (is_uprobe)
+ val = user_stack_pointer(regs);
+ else
+ val = kernel_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = (unsigned long)current->comm;
+ break;
+ case FETCH_OP_DATA:
+ val = (unsigned long)code->data;
+ break;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ case FETCH_OP_ARG:
+ val = regs_get_kernel_argument(regs, code->param);
+ break;
+#endif
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ case FETCH_OP_TP_ARG:
+ val = get_event_field(code, rec);
+ break;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base, flags);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
+/* Sum up total data length for dynamic arrays (strings) */
+int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+{
+ struct probe_arg *arg;
+ int i, len, ret = 0;
+ struct trace_event_call *call = trace_probe_event_call(tp);
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+ if (unlikely(arg->dynamic)) {
+ len = process_fetch_insn(arg->code, regs, NULL, NULL, call->flags);
+ if (len > 0)
+ ret += len;
+ }
+ }
+
+ return ret;
+}
+
+void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
+ int header_size, int maxlen)
+{
+ struct probe_arg *arg;
+ void *base = data - header_size;
+ void *dyndata = data + tp->size;
+ u32 *dl; /* Data location */
+ int ret, i;
+ struct trace_event_call *call = trace_probe_event_call(tp);
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+ dl = data + arg->offset;
+ /* Point the dynamic data area if needed */
+ if (unlikely(arg->dynamic))
+ *dl = make_data_loc(maxlen, dyndata - base);
+ ret = process_fetch_insn(arg->code, rec, dl, base, call->flags);
+ if (unlikely(ret < 0 && arg->dynamic)) {
+ *dl = make_data_loc(0, dyndata - base);
+ } else {
+ dyndata += ret;
+ maxlen -= ret;
+ }
+ }
+}
+
+int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
+ u8 *data, void *field)
+{
+ void *p;
+ int i, j;
+
+ for (i = 0; i < nr_args; i++) {
+ struct probe_arg *a = args + i;
+
+ trace_seq_printf(s, " %s=", a->name);
+ if (likely(!a->count)) {
+ if (!a->type->print(s, data + a->offset, field))
+ return -ENOMEM;
+ continue;
+ }
+ trace_seq_putc(s, '{');
+ p = data + a->offset;
+ for (j = 0; j < a->count; j++) {
+ if (!a->type->print(s, p, field))
+ return -ENOMEM;
+ trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
+ p += a->type->size;
+ }
+ }
+ return 0;
+}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index de38f1c03776..4f044047b748 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -343,6 +343,11 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
bool trace_probe_match_command_args(struct trace_probe *tp,
int argc, const char **argv);
int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
+int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs);
+void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
+ int header_size, int maxlen);
+int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
+ u8 *data, void *field);

#define trace_probe_for_each_link(pos, tp) \
list_for_each_entry(pos, &(tp)->event->files, list)
diff --git a/kernel/trace/trace_probe_common.h b/kernel/trace/trace_probe_common.h
new file mode 100644
index 000000000000..b8d77447fe0c
--- /dev/null
+++ b/kernel/trace/trace_probe_common.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_PROBE_COMMON_H_
+#define __TRACE_PROBE_COMMON_H_
+
+#define FAULT_STRING "(fault)"
+
+static nokprobe_inline void
+fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
+{
+ switch (code->size) {
+ case 1:
+ *(u8 *)buf = (u8)val;
+ break;
+ case 2:
+ *(u16 *)buf = (u16)val;
+ break;
+ case 4:
+ *(u32 *)buf = (u32)val;
+ break;
+ case 8:
+ //TBD: 32bit signed
+ *(u64 *)buf = (u64)val;
+ break;
+ default:
+ *(unsigned long *)buf = val;
+ }
+}
+
+static nokprobe_inline void
+fetch_apply_bitfield(struct fetch_insn *code, void *buf)
+{
+ switch (code->basesize) {
+ case 1:
+ *(u8 *)buf <<= code->lshift;
+ *(u8 *)buf >>= code->rshift;
+ break;
+ case 2:
+ *(u16 *)buf <<= code->lshift;
+ *(u16 *)buf >>= code->rshift;
+ break;
+ case 4:
+ *(u32 *)buf <<= code->lshift;
+ *(u32 *)buf >>= code->rshift;
+ break;
+ case 8:
+ *(u64 *)buf <<= code->lshift;
+ *(u64 *)buf >>= code->rshift;
+ break;
+ }
+}
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ const void __user *uaddr = (__force const void __user *)src;
+
+ return copy_from_user_nofault(dest, uaddr, size);
+}
+
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ if ((unsigned long)src < TASK_SIZE)
+ return probe_mem_read_user(dest, src, size);
+#endif
+ return copy_from_kernel_nofault(dest, src, size);
+}
+#endif /* __TRACE_PROBE_COMMON_H_ */
diff --git a/kernel/trace/trace_probe_user.h b/kernel/trace/trace_probe_user.h
new file mode 100644
index 000000000000..2104ccb44d56
--- /dev/null
+++ b/kernel/trace/trace_probe_user.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_PROBE_USER_H_
+#define __TRACE_PROBE_USER_H_
+
+#define FAULT_STRING "(fault)"
+
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
+ * length and relative data location.
+ */
+static nokprobe_inline int
+user_fetch_store_string(unsigned long addr, void *dest, void *base)
+{
+ long ret;
+ u32 loc = *(u32 *)dest;
+ int maxlen = get_loc_len(loc);
+ u8 *dst = get_loc_data(dest, base);
+ void __user *src = (void __force __user *) addr;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+
+ if (addr == FETCH_TOKEN_COMM)
+ ret = strlcpy(dst, current->comm, maxlen);
+ else
+ ret = strncpy_from_user(dst, src, maxlen);
+ if (ret >= 0) {
+ if (ret == maxlen)
+ dst[ret - 1] = '\0';
+ else
+ /*
+ * Include the terminating null byte. In this case it
+ * was copied by strncpy_from_user but not accounted
+ * for in ret.
+ */
+ ret++;
+ *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+ }
+
+ return ret;
+}
+
+static nokprobe_inline int
+user_fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ return user_fetch_store_string(addr, dest, base);
+}
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+user_fetch_store_strlen(unsigned long addr)
+{
+ int len;
+ void __user *vaddr = (void __force __user *) addr;
+
+ if (addr == FETCH_TOKEN_COMM)
+ len = strlen(current->comm) + 1;
+ else
+ len = strnlen_user(vaddr, MAX_STRING_SIZE);
+
+ return (len > MAX_STRING_SIZE) ? 0 : len;
+}
+
+static nokprobe_inline int
+user_fetch_store_strlen_user(unsigned long addr)
+{
+ return user_fetch_store_strlen(addr);
+}
+
+#ifdef CONFIG_STACK_GROWSUP
+static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
+{
+ return addr - (n * sizeof(long));
+}
+#else
+static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
+{
+ return addr + (n * sizeof(long));
+}
+#endif
+
+static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long ret;
+ unsigned long addr = user_stack_pointer(regs);
+
+ addr = adjust_stack_addr(addr, n);
+
+ if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
+ return 0;
+
+ return ret;
+}
+
+#endif /* __TRACE_PROBE_USER_H_ */
--
2.25.1



2022-11-21 22:59:04

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/4] kernel/trace: Introduce new APIs to process probe arguments

Hi Song,

On Wed, 16 Nov 2022 20:25:07 +0800
Song Chen <[email protected]> wrote:

> Introduce 3 new APIs:
> 1. trace_probe_get_data_size: get arguments' data size
> 2. trace_probe_store_args: store aruguments into ring buffer
> 3. trace_probe_print_args: print arguments into trace file

Ah, I meant that split the patches into

Introduce trace_probe_get_data_size() and use it in *probes
Introduce trace_probe_store_args() and use it in *probes
Introduce trace_probe_print_args and use it in *probes

Then we can easily understand by its meaning.

Can you reform this series again?

Thank you,

>
> Those APIs are going to merge similar implementations respectively
> in kprobe/uprobe/eprobe.
>
> Signed-off-by: Song Chen <[email protected]>
> ---
> kernel/trace/trace_probe.c | 305 ++++++++++++++++++++++++++++++
> kernel/trace/trace_probe.h | 5 +
> kernel/trace/trace_probe_common.h | 69 +++++++
> kernel/trace/trace_probe_user.h | 95 ++++++++++
> 4 files changed, 474 insertions(+)
> create mode 100644 kernel/trace/trace_probe_common.h
> create mode 100644 kernel/trace/trace_probe_user.h
>
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 36dff277de46..303f057bd2f7 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -12,6 +12,9 @@
> #define pr_fmt(fmt) "trace_probe: " fmt
>
> #include "trace_probe.h"
> +#include "trace_probe_kernel.h"
> +#include "trace_probe_user.h"
> +#include "trace_probe_common.h"
>
> #undef C
> #define C(a, b) b
> @@ -1218,3 +1221,305 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
>
> return ret;
> }
> +
> +/* From the 2nd stage, routine is same */
> +static nokprobe_inline int
> +process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
> + void *dest, void *base, int flags)
> +{
> + struct fetch_insn *s3 = NULL;
> + int total = 0, ret = 0, i = 0;
> + u32 loc = 0;
> + unsigned long lval = val;
> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
> +
> +stage2:
> + /* 2nd stage: dereference memory if needed */
> + do {
> + if (code->op == FETCH_OP_DEREF) {
> + lval = val;
> + ret = probe_mem_read(&val, (void *)val + code->offset,
> + sizeof(val));
> + } else if (code->op == FETCH_OP_UDEREF) {
> + lval = val;
> + ret = probe_mem_read_user(&val,
> + (void *)val + code->offset, sizeof(val));
> + } else
> + break;
> + if (ret)
> + return ret;
> + code++;
> + } while (1);
> +
> + s3 = code;
> +stage3:
> + /* 3rd stage: store value to buffer */
> + if (unlikely(!dest)) {
> + if (code->op == FETCH_OP_ST_STRING) {
> + if (is_uprobe)
> + ret = user_fetch_store_strlen(val + code->offset);
> + else
> + ret = kern_fetch_store_strlen(val + code->offset);
> + code++;
> + goto array;
> + } else if (code->op == FETCH_OP_ST_USTRING) {
> + if (is_uprobe)
> + ret += user_fetch_store_strlen_user(val + code->offset);
> + else
> + ret += kern_fetch_store_strlen_user(val + code->offset);
> + code++;
> + goto array;
> + } else
> + return -EILSEQ;
> + }
> +
> + switch (code->op) {
> + case FETCH_OP_ST_RAW:
> + fetch_store_raw(val, code, dest);
> + break;
> + case FETCH_OP_ST_MEM:
> + probe_mem_read(dest, (void *)val + code->offset, code->size);
> + break;
> + case FETCH_OP_ST_UMEM:
> + probe_mem_read_user(dest, (void *)val + code->offset, code->size);
> + break;
> + case FETCH_OP_ST_STRING:
> + loc = *(u32 *)dest;
> + if (is_uprobe)
> + ret = user_fetch_store_string(val + code->offset, dest, base);
> + else
> + ret = kern_fetch_store_string(val + code->offset, dest, base);
> + break;
> + case FETCH_OP_ST_USTRING:
> + loc = *(u32 *)dest;
> + if (is_uprobe)
> + ret = user_fetch_store_string_user(val + code->offset, dest, base);
> + else
> + ret = kern_fetch_store_string_user(val + code->offset, dest, base);
> + break;
> + default:
> + return -EILSEQ;
> + }
> + code++;
> +
> + /* 4th stage: modify stored value if needed */
> + if (code->op == FETCH_OP_MOD_BF) {
> + fetch_apply_bitfield(code, dest);
> + code++;
> + }
> +
> +array:
> + /* the last stage: Loop on array */
> + if (code->op == FETCH_OP_LP_ARRAY) {
> + total += ret;
> + if (++i < code->param) {
> + code = s3;
> + if (s3->op != FETCH_OP_ST_STRING &&
> + s3->op != FETCH_OP_ST_USTRING) {
> + dest += s3->size;
> + val += s3->size;
> + goto stage3;
> + }
> + code--;
> + val = lval + sizeof(char *);
> + if (dest) {
> + dest += sizeof(u32);
> + *(u32 *)dest = update_data_loc(loc, ret);
> + }
> + goto stage2;
> + }
> + code++;
> + ret = total;
> + }
> +
> + return code->op == FETCH_OP_END ? ret : -EILSEQ;
> +}
> +
> +static unsigned long get_event_field(struct fetch_insn *code, void *rec)
> +{
> + struct ftrace_event_field *field = code->data;
> + unsigned long val;
> + void *addr;
> +
> + addr = rec + field->offset;
> +
> + if (is_string_field(field)) {
> + switch (field->filter_type) {
> + case FILTER_DYN_STRING:
> + val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
> + break;
> + case FILTER_RDYN_STRING:
> + val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
> + break;
> + case FILTER_STATIC_STRING:
> + val = (unsigned long)addr;
> + break;
> + case FILTER_PTR_STRING:
> + val = (unsigned long)(*(char *)addr);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + return 0;
> + }
> + return val;
> + }
> +
> + switch (field->size) {
> + case 1:
> + if (field->is_signed)
> + val = *(char *)addr;
> + else
> + val = *(unsigned char *)addr;
> + break;
> + case 2:
> + if (field->is_signed)
> + val = *(short *)addr;
> + else
> + val = *(unsigned short *)addr;
> + break;
> + case 4:
> + if (field->is_signed)
> + val = *(int *)addr;
> + else
> + val = *(unsigned int *)addr;
> + break;
> + default:
> + if (field->is_signed)
> + val = *(long *)addr;
> + else
> + val = *(unsigned long *)addr;
> + break;
> + }
> + return val;
> +}
> +
> +/* Note that we don't verify it, since the code does not come from user space */
> +static int
> +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
> + void *base, int flags)
> +{
> + struct pt_regs *regs = rec;
> + unsigned long val;
> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
> +
> +retry:
> + /* 1st stage: get value from context */
> + switch (code->op) {
> + case FETCH_OP_REG:
> + val = regs_get_register(regs, code->param);
> + break;
> + case FETCH_OP_STACK:
> + if (is_uprobe)
> + val = get_user_stack_nth(regs, code->param);
> + else
> + val = regs_get_kernel_stack_nth(regs, code->param);
> + break;
> + case FETCH_OP_STACKP:
> + if (is_uprobe)
> + val = user_stack_pointer(regs);
> + else
> + val = kernel_stack_pointer(regs);
> + break;
> + case FETCH_OP_RETVAL:
> + val = regs_return_value(regs);
> + break;
> + case FETCH_OP_IMM:
> + val = code->immediate;
> + break;
> + case FETCH_OP_COMM:
> + val = (unsigned long)current->comm;
> + break;
> + case FETCH_OP_DATA:
> + val = (unsigned long)code->data;
> + break;
> +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
> + case FETCH_OP_ARG:
> + val = regs_get_kernel_argument(regs, code->param);
> + break;
> +#endif
> + case FETCH_NOP_SYMBOL: /* Ignore a place holder */
> + code++;
> + goto retry;
> + case FETCH_OP_TP_ARG:
> + val = get_event_field(code, rec);
> + break;
> + default:
> + return -EILSEQ;
> + }
> + code++;
> +
> + return process_fetch_insn_bottom(code, val, dest, base, flags);
> +}
> +NOKPROBE_SYMBOL(process_fetch_insn)
> +
> +/* Sum up total data length for dynamic arrays (strings) */
> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs)
> +{
> + struct probe_arg *arg;
> + int i, len, ret = 0;
> + struct trace_event_call *call = trace_probe_event_call(tp);
> +
> + for (i = 0; i < tp->nr_args; i++) {
> + arg = tp->args + i;
> + if (unlikely(arg->dynamic)) {
> + len = process_fetch_insn(arg->code, regs, NULL, NULL, call->flags);
> + if (len > 0)
> + ret += len;
> + }
> + }
> +
> + return ret;
> +}
> +
> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
> + int header_size, int maxlen)
> +{
> + struct probe_arg *arg;
> + void *base = data - header_size;
> + void *dyndata = data + tp->size;
> + u32 *dl; /* Data location */
> + int ret, i;
> + struct trace_event_call *call = trace_probe_event_call(tp);
> +
> + for (i = 0; i < tp->nr_args; i++) {
> + arg = tp->args + i;
> + dl = data + arg->offset;
> + /* Point the dynamic data area if needed */
> + if (unlikely(arg->dynamic))
> + *dl = make_data_loc(maxlen, dyndata - base);
> + ret = process_fetch_insn(arg->code, rec, dl, base, call->flags);
> + if (unlikely(ret < 0 && arg->dynamic)) {
> + *dl = make_data_loc(0, dyndata - base);
> + } else {
> + dyndata += ret;
> + maxlen -= ret;
> + }
> + }
> +}
> +
> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> + u8 *data, void *field)
> +{
> + void *p;
> + int i, j;
> +
> + for (i = 0; i < nr_args; i++) {
> + struct probe_arg *a = args + i;
> +
> + trace_seq_printf(s, " %s=", a->name);
> + if (likely(!a->count)) {
> + if (!a->type->print(s, data + a->offset, field))
> + return -ENOMEM;
> + continue;
> + }
> + trace_seq_putc(s, '{');
> + p = data + a->offset;
> + for (j = 0; j < a->count; j++) {
> + if (!a->type->print(s, p, field))
> + return -ENOMEM;
> + trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
> + p += a->type->size;
> + }
> + }
> + return 0;
> +}
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index de38f1c03776..4f044047b748 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -343,6 +343,11 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
> bool trace_probe_match_command_args(struct trace_probe *tp,
> int argc, const char **argv);
> int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs);
> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
> + int header_size, int maxlen);
> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> + u8 *data, void *field);
>
> #define trace_probe_for_each_link(pos, tp) \
> list_for_each_entry(pos, &(tp)->event->files, list)
> diff --git a/kernel/trace/trace_probe_common.h b/kernel/trace/trace_probe_common.h
> new file mode 100644
> index 000000000000..b8d77447fe0c
> --- /dev/null
> +++ b/kernel/trace/trace_probe_common.h
> @@ -0,0 +1,69 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __TRACE_PROBE_COMMON_H_
> +#define __TRACE_PROBE_COMMON_H_
> +
> +#define FAULT_STRING "(fault)"
> +
> +static nokprobe_inline void
> +fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
> +{
> + switch (code->size) {
> + case 1:
> + *(u8 *)buf = (u8)val;
> + break;
> + case 2:
> + *(u16 *)buf = (u16)val;
> + break;
> + case 4:
> + *(u32 *)buf = (u32)val;
> + break;
> + case 8:
> + //TBD: 32bit signed
> + *(u64 *)buf = (u64)val;
> + break;
> + default:
> + *(unsigned long *)buf = val;
> + }
> +}
> +
> +static nokprobe_inline void
> +fetch_apply_bitfield(struct fetch_insn *code, void *buf)
> +{
> + switch (code->basesize) {
> + case 1:
> + *(u8 *)buf <<= code->lshift;
> + *(u8 *)buf >>= code->rshift;
> + break;
> + case 2:
> + *(u16 *)buf <<= code->lshift;
> + *(u16 *)buf >>= code->rshift;
> + break;
> + case 4:
> + *(u32 *)buf <<= code->lshift;
> + *(u32 *)buf >>= code->rshift;
> + break;
> + case 8:
> + *(u64 *)buf <<= code->lshift;
> + *(u64 *)buf >>= code->rshift;
> + break;
> + }
> +}
> +
> +static nokprobe_inline int
> +probe_mem_read_user(void *dest, void *src, size_t size)
> +{
> + const void __user *uaddr = (__force const void __user *)src;
> +
> + return copy_from_user_nofault(dest, uaddr, size);
> +}
> +
> +static nokprobe_inline int
> +probe_mem_read(void *dest, void *src, size_t size)
> +{
> +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> + if ((unsigned long)src < TASK_SIZE)
> + return probe_mem_read_user(dest, src, size);
> +#endif
> + return copy_from_kernel_nofault(dest, src, size);
> +}
> +#endif /* __TRACE_PROBE_COMMON_H_ */
> diff --git a/kernel/trace/trace_probe_user.h b/kernel/trace/trace_probe_user.h
> new file mode 100644
> index 000000000000..2104ccb44d56
> --- /dev/null
> +++ b/kernel/trace/trace_probe_user.h
> @@ -0,0 +1,95 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __TRACE_PROBE_USER_H_
> +#define __TRACE_PROBE_USER_H_
> +
> +#define FAULT_STRING "(fault)"
> +
> +/*
> + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
> + * length and relative data location.
> + */
> +static nokprobe_inline int
> +user_fetch_store_string(unsigned long addr, void *dest, void *base)
> +{
> + long ret;
> + u32 loc = *(u32 *)dest;
> + int maxlen = get_loc_len(loc);
> + u8 *dst = get_loc_data(dest, base);
> + void __user *src = (void __force __user *) addr;
> +
> + if (unlikely(!maxlen))
> + return -ENOMEM;
> +
> + if (addr == FETCH_TOKEN_COMM)
> + ret = strlcpy(dst, current->comm, maxlen);
> + else
> + ret = strncpy_from_user(dst, src, maxlen);
> + if (ret >= 0) {
> + if (ret == maxlen)
> + dst[ret - 1] = '\0';
> + else
> + /*
> + * Include the terminating null byte. In this case it
> + * was copied by strncpy_from_user but not accounted
> + * for in ret.
> + */
> + ret++;
> + *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
> + }
> +
> + return ret;
> +}
> +
> +static nokprobe_inline int
> +user_fetch_store_string_user(unsigned long addr, void *dest, void *base)
> +{
> + return user_fetch_store_string(addr, dest, base);
> +}
> +
> +/* Return the length of string -- including null terminal byte */
> +static nokprobe_inline int
> +user_fetch_store_strlen(unsigned long addr)
> +{
> + int len;
> + void __user *vaddr = (void __force __user *) addr;
> +
> + if (addr == FETCH_TOKEN_COMM)
> + len = strlen(current->comm) + 1;
> + else
> + len = strnlen_user(vaddr, MAX_STRING_SIZE);
> +
> + return (len > MAX_STRING_SIZE) ? 0 : len;
> +}
> +
> +static nokprobe_inline int
> +user_fetch_store_strlen_user(unsigned long addr)
> +{
> + return user_fetch_store_strlen(addr);
> +}
> +
> +#ifdef CONFIG_STACK_GROWSUP
> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> +{
> + return addr - (n * sizeof(long));
> +}
> +#else
> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> +{
> + return addr + (n * sizeof(long));
> +}
> +#endif
> +
> +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
> +{
> + unsigned long ret;
> + unsigned long addr = user_stack_pointer(regs);
> +
> + addr = adjust_stack_addr(addr, n);
> +
> + if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
> + return 0;
> +
> + return ret;
> +}
> +
> +#endif /* __TRACE_PROBE_USER_H_ */
> --
> 2.25.1
>


--
Masami Hiramatsu (Google) <[email protected]>

2022-11-21 23:05:35

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/4] kernel/trace: Introduce new APIs to process probe arguments

On Wed, 16 Nov 2022 20:25:07 +0800
Song Chen <[email protected]> wrote:

> Introduce 3 new APIs:
> 1. trace_probe_get_data_size: get arguments' data size
> 2. trace_probe_store_args: store aruguments into ring buffer
> 3. trace_probe_print_args: print arguments into trace file
>
> Those APIs are going to merge similar implementations respectively
> in kprobe/uprobe/eprobe.
>
> Signed-off-by: Song Chen <[email protected]>
> ---
> kernel/trace/trace_probe.c | 305 ++++++++++++++++++++++++++++++
> kernel/trace/trace_probe.h | 5 +
> kernel/trace/trace_probe_common.h | 69 +++++++
> kernel/trace/trace_probe_user.h | 95 ++++++++++
> 4 files changed, 474 insertions(+)
> create mode 100644 kernel/trace/trace_probe_common.h
> create mode 100644 kernel/trace/trace_probe_user.h
>
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 36dff277de46..303f057bd2f7 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -12,6 +12,9 @@
> #define pr_fmt(fmt) "trace_probe: " fmt
>
> #include "trace_probe.h"
> +#include "trace_probe_kernel.h"
> +#include "trace_probe_user.h"
> +#include "trace_probe_common.h"
>
> #undef C
> #define C(a, b) b
> @@ -1218,3 +1221,305 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
>
> return ret;
> }
> +
> +/* From the 2nd stage, routine is same */
> +static nokprobe_inline int
> +process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
> + void *dest, void *base, int flags)
> +{
> + struct fetch_insn *s3 = NULL;
> + int total = 0, ret = 0, i = 0;
> + u32 loc = 0;
> + unsigned long lval = val;
> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
> +
> +stage2:
> + /* 2nd stage: dereference memory if needed */
> + do {
> + if (code->op == FETCH_OP_DEREF) {
> + lval = val;
> + ret = probe_mem_read(&val, (void *)val + code->offset,
> + sizeof(val));
> + } else if (code->op == FETCH_OP_UDEREF) {
> + lval = val;
> + ret = probe_mem_read_user(&val,
> + (void *)val + code->offset, sizeof(val));
> + } else
> + break;
> + if (ret)
> + return ret;
> + code++;
> + } while (1);
> +
> + s3 = code;
> +stage3:
> + /* 3rd stage: store value to buffer */
> + if (unlikely(!dest)) {
> + if (code->op == FETCH_OP_ST_STRING) {
> + if (is_uprobe)
> + ret = user_fetch_store_strlen(val + code->offset);
> + else
> + ret = kern_fetch_store_strlen(val + code->offset);
> + code++;
> + goto array;
> + } else if (code->op == FETCH_OP_ST_USTRING) {
> + if (is_uprobe)
> + ret += user_fetch_store_strlen_user(val + code->offset);
> + else
> + ret += kern_fetch_store_strlen_user(val + code->offset);

Also, I would not like to see this "is_uprobe" check in the code. That is useless
when running this in the kernel. This is why I did this in trace_probe_tmpl.h as
an inlined function.

So, sorting print_args function is OK for me, but the process_fetch_insn* should be
kept as inlined. Maybe some common code can be move to the trace_probe_tmpl.h as
inlined functions?


> + code++;
> + goto array;
> + } else
> + return -EILSEQ;
> + }
> +
> + switch (code->op) {
> + case FETCH_OP_ST_RAW:
> + fetch_store_raw(val, code, dest);
> + break;
> + case FETCH_OP_ST_MEM:
> + probe_mem_read(dest, (void *)val + code->offset, code->size);
> + break;
> + case FETCH_OP_ST_UMEM:
> + probe_mem_read_user(dest, (void *)val + code->offset, code->size);
> + break;
> + case FETCH_OP_ST_STRING:
> + loc = *(u32 *)dest;
> + if (is_uprobe)
> + ret = user_fetch_store_string(val + code->offset, dest, base);
> + else
> + ret = kern_fetch_store_string(val + code->offset, dest, base);
> + break;
> + case FETCH_OP_ST_USTRING:
> + loc = *(u32 *)dest;
> + if (is_uprobe)
> + ret = user_fetch_store_string_user(val + code->offset, dest, base);
> + else
> + ret = kern_fetch_store_string_user(val + code->offset, dest, base);
> + break;
> + default:
> + return -EILSEQ;
> + }
> + code++;
> +
> + /* 4th stage: modify stored value if needed */
> + if (code->op == FETCH_OP_MOD_BF) {
> + fetch_apply_bitfield(code, dest);
> + code++;
> + }
> +
> +array:
> + /* the last stage: Loop on array */
> + if (code->op == FETCH_OP_LP_ARRAY) {
> + total += ret;
> + if (++i < code->param) {
> + code = s3;
> + if (s3->op != FETCH_OP_ST_STRING &&
> + s3->op != FETCH_OP_ST_USTRING) {
> + dest += s3->size;
> + val += s3->size;
> + goto stage3;
> + }
> + code--;
> + val = lval + sizeof(char *);
> + if (dest) {
> + dest += sizeof(u32);
> + *(u32 *)dest = update_data_loc(loc, ret);
> + }
> + goto stage2;
> + }
> + code++;
> + ret = total;
> + }
> +
> + return code->op == FETCH_OP_END ? ret : -EILSEQ;
> +}
> +
> +static unsigned long get_event_field(struct fetch_insn *code, void *rec)
> +{
> + struct ftrace_event_field *field = code->data;
> + unsigned long val;
> + void *addr;
> +
> + addr = rec + field->offset;
> +
> + if (is_string_field(field)) {
> + switch (field->filter_type) {
> + case FILTER_DYN_STRING:
> + val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
> + break;
> + case FILTER_RDYN_STRING:
> + val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
> + break;
> + case FILTER_STATIC_STRING:
> + val = (unsigned long)addr;
> + break;
> + case FILTER_PTR_STRING:
> + val = (unsigned long)(*(char *)addr);
> + break;
> + default:
> + WARN_ON_ONCE(1);
> + return 0;
> + }
> + return val;
> + }
> +
> + switch (field->size) {
> + case 1:
> + if (field->is_signed)
> + val = *(char *)addr;
> + else
> + val = *(unsigned char *)addr;
> + break;
> + case 2:
> + if (field->is_signed)
> + val = *(short *)addr;
> + else
> + val = *(unsigned short *)addr;
> + break;
> + case 4:
> + if (field->is_signed)
> + val = *(int *)addr;
> + else
> + val = *(unsigned int *)addr;
> + break;
> + default:
> + if (field->is_signed)
> + val = *(long *)addr;
> + else
> + val = *(unsigned long *)addr;
> + break;
> + }
> + return val;
> +}
> +
> +/* Note that we don't verify it, since the code does not come from user space */
> +static int
> +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
> + void *base, int flags)
> +{
> + struct pt_regs *regs = rec;
> + unsigned long val;
> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
> +
> +retry:
> + /* 1st stage: get value from context */
> + switch (code->op) {
> + case FETCH_OP_REG:
> + val = regs_get_register(regs, code->param);
> + break;
> + case FETCH_OP_STACK:
> + if (is_uprobe)
> + val = get_user_stack_nth(regs, code->param);
> + else
> + val = regs_get_kernel_stack_nth(regs, code->param);
> + break;

Ditto.

> + case FETCH_OP_STACKP:
> + if (is_uprobe)
> + val = user_stack_pointer(regs);
> + else
> + val = kernel_stack_pointer(regs);
> + break;
> + case FETCH_OP_RETVAL:
> + val = regs_return_value(regs);
> + break;
> + case FETCH_OP_IMM:
> + val = code->immediate;
> + break;
> + case FETCH_OP_COMM:
> + val = (unsigned long)current->comm;
> + break;
> + case FETCH_OP_DATA:
> + val = (unsigned long)code->data;
> + break;
> +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
> + case FETCH_OP_ARG:
> + val = regs_get_kernel_argument(regs, code->param);
> + break;
> +#endif
> + case FETCH_NOP_SYMBOL: /* Ignore a place holder */
> + code++;
> + goto retry;
> + case FETCH_OP_TP_ARG:
> + val = get_event_field(code, rec);
> + break;
> + default:
> + return -EILSEQ;
> + }
> + code++;
> +
> + return process_fetch_insn_bottom(code, val, dest, base, flags);
> +}
> +NOKPROBE_SYMBOL(process_fetch_insn)
> +
> +/* Sum up total data length for dynamic arrays (strings) */
> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs)
> +{
> + struct probe_arg *arg;
> + int i, len, ret = 0;
> + struct trace_event_call *call = trace_probe_event_call(tp);
> +
> + for (i = 0; i < tp->nr_args; i++) {
> + arg = tp->args + i;
> + if (unlikely(arg->dynamic)) {
> + len = process_fetch_insn(arg->code, regs, NULL, NULL, call->flags);
> + if (len > 0)
> + ret += len;
> + }
> + }
> +
> + return ret;
> +}
> +
> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
> + int header_size, int maxlen)
> +{
> + struct probe_arg *arg;
> + void *base = data - header_size;
> + void *dyndata = data + tp->size;
> + u32 *dl; /* Data location */
> + int ret, i;
> + struct trace_event_call *call = trace_probe_event_call(tp);
> +
> + for (i = 0; i < tp->nr_args; i++) {
> + arg = tp->args + i;
> + dl = data + arg->offset;
> + /* Point the dynamic data area if needed */
> + if (unlikely(arg->dynamic))
> + *dl = make_data_loc(maxlen, dyndata - base);
> + ret = process_fetch_insn(arg->code, rec, dl, base, call->flags);
> + if (unlikely(ret < 0 && arg->dynamic)) {
> + *dl = make_data_loc(0, dyndata - base);
> + } else {
> + dyndata += ret;
> + maxlen -= ret;
> + }
> + }
> +}
> +
> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> + u8 *data, void *field)
> +{
> + void *p;
> + int i, j;
> +
> + for (i = 0; i < nr_args; i++) {
> + struct probe_arg *a = args + i;
> +
> + trace_seq_printf(s, " %s=", a->name);
> + if (likely(!a->count)) {
> + if (!a->type->print(s, data + a->offset, field))
> + return -ENOMEM;
> + continue;
> + }
> + trace_seq_putc(s, '{');
> + p = data + a->offset;
> + for (j = 0; j < a->count; j++) {
> + if (!a->type->print(s, p, field))
> + return -ENOMEM;
> + trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
> + p += a->type->size;
> + }
> + }
> + return 0;
> +}
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index de38f1c03776..4f044047b748 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -343,6 +343,11 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
> bool trace_probe_match_command_args(struct trace_probe *tp,
> int argc, const char **argv);
> int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs);
> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
> + int header_size, int maxlen);
> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> + u8 *data, void *field);
>
> #define trace_probe_for_each_link(pos, tp) \
> list_for_each_entry(pos, &(tp)->event->files, list)
> diff --git a/kernel/trace/trace_probe_common.h b/kernel/trace/trace_probe_common.h
> new file mode 100644
> index 000000000000..b8d77447fe0c
> --- /dev/null
> +++ b/kernel/trace/trace_probe_common.h

Please do not add an internal header file only just for 1 file.

> @@ -0,0 +1,69 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __TRACE_PROBE_COMMON_H_
> +#define __TRACE_PROBE_COMMON_H_
> +
> +#define FAULT_STRING "(fault)"
> +
> +static nokprobe_inline void
> +fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
> +{
> + switch (code->size) {
> + case 1:
> + *(u8 *)buf = (u8)val;
> + break;
> + case 2:
> + *(u16 *)buf = (u16)val;
> + break;
> + case 4:
> + *(u32 *)buf = (u32)val;
> + break;
> + case 8:
> + //TBD: 32bit signed
> + *(u64 *)buf = (u64)val;
> + break;
> + default:
> + *(unsigned long *)buf = val;
> + }
> +}
> +
> +static nokprobe_inline void
> +fetch_apply_bitfield(struct fetch_insn *code, void *buf)
> +{
> + switch (code->basesize) {
> + case 1:
> + *(u8 *)buf <<= code->lshift;
> + *(u8 *)buf >>= code->rshift;
> + break;
> + case 2:
> + *(u16 *)buf <<= code->lshift;
> + *(u16 *)buf >>= code->rshift;
> + break;
> + case 4:
> + *(u32 *)buf <<= code->lshift;
> + *(u32 *)buf >>= code->rshift;
> + break;
> + case 8:
> + *(u64 *)buf <<= code->lshift;
> + *(u64 *)buf >>= code->rshift;
> + break;
> + }
> +}
> +
> +static nokprobe_inline int
> +probe_mem_read_user(void *dest, void *src, size_t size)
> +{
> + const void __user *uaddr = (__force const void __user *)src;
> +
> + return copy_from_user_nofault(dest, uaddr, size);
> +}
> +
> +static nokprobe_inline int
> +probe_mem_read(void *dest, void *src, size_t size)
> +{
> +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> + if ((unsigned long)src < TASK_SIZE)
> + return probe_mem_read_user(dest, src, size);
> +#endif
> + return copy_from_kernel_nofault(dest, src, size);
> +}
> +#endif /* __TRACE_PROBE_COMMON_H_ */
> diff --git a/kernel/trace/trace_probe_user.h b/kernel/trace/trace_probe_user.h
> new file mode 100644
> index 000000000000..2104ccb44d56
> --- /dev/null
> +++ b/kernel/trace/trace_probe_user.h

Ditto.

Thank you,

> @@ -0,0 +1,95 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __TRACE_PROBE_USER_H_
> +#define __TRACE_PROBE_USER_H_
> +
> +#define FAULT_STRING "(fault)"
> +
> +/*
> + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
> + * length and relative data location.
> + */
> +static nokprobe_inline int
> +user_fetch_store_string(unsigned long addr, void *dest, void *base)
> +{
> + long ret;
> + u32 loc = *(u32 *)dest;
> + int maxlen = get_loc_len(loc);
> + u8 *dst = get_loc_data(dest, base);
> + void __user *src = (void __force __user *) addr;
> +
> + if (unlikely(!maxlen))
> + return -ENOMEM;
> +
> + if (addr == FETCH_TOKEN_COMM)
> + ret = strlcpy(dst, current->comm, maxlen);
> + else
> + ret = strncpy_from_user(dst, src, maxlen);
> + if (ret >= 0) {
> + if (ret == maxlen)
> + dst[ret - 1] = '\0';
> + else
> + /*
> + * Include the terminating null byte. In this case it
> + * was copied by strncpy_from_user but not accounted
> + * for in ret.
> + */
> + ret++;
> + *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
> + }
> +
> + return ret;
> +}
> +
> +static nokprobe_inline int
> +user_fetch_store_string_user(unsigned long addr, void *dest, void *base)
> +{
> + return user_fetch_store_string(addr, dest, base);
> +}
> +
> +/* Return the length of string -- including null terminal byte */
> +static nokprobe_inline int
> +user_fetch_store_strlen(unsigned long addr)
> +{
> + int len;
> + void __user *vaddr = (void __force __user *) addr;
> +
> + if (addr == FETCH_TOKEN_COMM)
> + len = strlen(current->comm) + 1;
> + else
> + len = strnlen_user(vaddr, MAX_STRING_SIZE);
> +
> + return (len > MAX_STRING_SIZE) ? 0 : len;
> +}
> +
> +static nokprobe_inline int
> +user_fetch_store_strlen_user(unsigned long addr)
> +{
> + return user_fetch_store_strlen(addr);
> +}
> +
> +#ifdef CONFIG_STACK_GROWSUP
> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> +{
> + return addr - (n * sizeof(long));
> +}
> +#else
> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> +{
> + return addr + (n * sizeof(long));
> +}
> +#endif
> +
> +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
> +{
> + unsigned long ret;
> + unsigned long addr = user_stack_pointer(regs);
> +
> + addr = adjust_stack_addr(addr, n);
> +
> + if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
> + return 0;
> +
> + return ret;
> +}
> +
> +#endif /* __TRACE_PROBE_USER_H_ */
> --
> 2.25.1
>

Thanks,

--
Masami Hiramatsu (Google) <[email protected]>

2022-11-22 02:05:40

by Song Chen

[permalink] [raw]
Subject: Re: [PATCH 1/4] kernel/trace: Introduce new APIs to process probe arguments

Hi,

在 2022/11/22 06:01, Masami Hiramatsu (Google) 写道:
> Hi Song,
>
> On Wed, 16 Nov 2022 20:25:07 +0800
> Song Chen <[email protected]> wrote:
>
>> Introduce 3 new APIs:
>> 1. trace_probe_get_data_size: get arguments' data size
>> 2. trace_probe_store_args: store aruguments into ring buffer
>> 3. trace_probe_print_args: print arguments into trace file
>
> Ah, I meant that split the patches into
>
> Introduce trace_probe_get_data_size() and use it in *probes
> Introduce trace_probe_store_args() and use it in *probes
> Introduce trace_probe_print_args and use it in *probes
>
> Then we can easily understand by its meaning.
>
> Can you reform this series again?
>
> Thank you,
>

understood, will do , thanks.

/Song

>>
>> Those APIs are going to merge similar implementations respectively
>> in kprobe/uprobe/eprobe.
>>
>> Signed-off-by: Song Chen <[email protected]>
>> ---
>> kernel/trace/trace_probe.c | 305 ++++++++++++++++++++++++++++++
>> kernel/trace/trace_probe.h | 5 +
>> kernel/trace/trace_probe_common.h | 69 +++++++
>> kernel/trace/trace_probe_user.h | 95 ++++++++++
>> 4 files changed, 474 insertions(+)
>> create mode 100644 kernel/trace/trace_probe_common.h
>> create mode 100644 kernel/trace/trace_probe_user.h
>>
>> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
>> index 36dff277de46..303f057bd2f7 100644
>> --- a/kernel/trace/trace_probe.c
>> +++ b/kernel/trace/trace_probe.c
>> @@ -12,6 +12,9 @@
>> #define pr_fmt(fmt) "trace_probe: " fmt
>>
>> #include "trace_probe.h"
>> +#include "trace_probe_kernel.h"
>> +#include "trace_probe_user.h"
>> +#include "trace_probe_common.h"
>>
>> #undef C
>> #define C(a, b) b
>> @@ -1218,3 +1221,305 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
>>
>> return ret;
>> }
>> +
>> +/* From the 2nd stage, routine is same */
>> +static nokprobe_inline int
>> +process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
>> + void *dest, void *base, int flags)
>> +{
>> + struct fetch_insn *s3 = NULL;
>> + int total = 0, ret = 0, i = 0;
>> + u32 loc = 0;
>> + unsigned long lval = val;
>> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
>> +
>> +stage2:
>> + /* 2nd stage: dereference memory if needed */
>> + do {
>> + if (code->op == FETCH_OP_DEREF) {
>> + lval = val;
>> + ret = probe_mem_read(&val, (void *)val + code->offset,
>> + sizeof(val));
>> + } else if (code->op == FETCH_OP_UDEREF) {
>> + lval = val;
>> + ret = probe_mem_read_user(&val,
>> + (void *)val + code->offset, sizeof(val));
>> + } else
>> + break;
>> + if (ret)
>> + return ret;
>> + code++;
>> + } while (1);
>> +
>> + s3 = code;
>> +stage3:
>> + /* 3rd stage: store value to buffer */
>> + if (unlikely(!dest)) {
>> + if (code->op == FETCH_OP_ST_STRING) {
>> + if (is_uprobe)
>> + ret = user_fetch_store_strlen(val + code->offset);
>> + else
>> + ret = kern_fetch_store_strlen(val + code->offset);
>> + code++;
>> + goto array;
>> + } else if (code->op == FETCH_OP_ST_USTRING) {
>> + if (is_uprobe)
>> + ret += user_fetch_store_strlen_user(val + code->offset);
>> + else
>> + ret += kern_fetch_store_strlen_user(val + code->offset);
>> + code++;
>> + goto array;
>> + } else
>> + return -EILSEQ;
>> + }
>> +
>> + switch (code->op) {
>> + case FETCH_OP_ST_RAW:
>> + fetch_store_raw(val, code, dest);
>> + break;
>> + case FETCH_OP_ST_MEM:
>> + probe_mem_read(dest, (void *)val + code->offset, code->size);
>> + break;
>> + case FETCH_OP_ST_UMEM:
>> + probe_mem_read_user(dest, (void *)val + code->offset, code->size);
>> + break;
>> + case FETCH_OP_ST_STRING:
>> + loc = *(u32 *)dest;
>> + if (is_uprobe)
>> + ret = user_fetch_store_string(val + code->offset, dest, base);
>> + else
>> + ret = kern_fetch_store_string(val + code->offset, dest, base);
>> + break;
>> + case FETCH_OP_ST_USTRING:
>> + loc = *(u32 *)dest;
>> + if (is_uprobe)
>> + ret = user_fetch_store_string_user(val + code->offset, dest, base);
>> + else
>> + ret = kern_fetch_store_string_user(val + code->offset, dest, base);
>> + break;
>> + default:
>> + return -EILSEQ;
>> + }
>> + code++;
>> +
>> + /* 4th stage: modify stored value if needed */
>> + if (code->op == FETCH_OP_MOD_BF) {
>> + fetch_apply_bitfield(code, dest);
>> + code++;
>> + }
>> +
>> +array:
>> + /* the last stage: Loop on array */
>> + if (code->op == FETCH_OP_LP_ARRAY) {
>> + total += ret;
>> + if (++i < code->param) {
>> + code = s3;
>> + if (s3->op != FETCH_OP_ST_STRING &&
>> + s3->op != FETCH_OP_ST_USTRING) {
>> + dest += s3->size;
>> + val += s3->size;
>> + goto stage3;
>> + }
>> + code--;
>> + val = lval + sizeof(char *);
>> + if (dest) {
>> + dest += sizeof(u32);
>> + *(u32 *)dest = update_data_loc(loc, ret);
>> + }
>> + goto stage2;
>> + }
>> + code++;
>> + ret = total;
>> + }
>> +
>> + return code->op == FETCH_OP_END ? ret : -EILSEQ;
>> +}
>> +
>> +static unsigned long get_event_field(struct fetch_insn *code, void *rec)
>> +{
>> + struct ftrace_event_field *field = code->data;
>> + unsigned long val;
>> + void *addr;
>> +
>> + addr = rec + field->offset;
>> +
>> + if (is_string_field(field)) {
>> + switch (field->filter_type) {
>> + case FILTER_DYN_STRING:
>> + val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
>> + break;
>> + case FILTER_RDYN_STRING:
>> + val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
>> + break;
>> + case FILTER_STATIC_STRING:
>> + val = (unsigned long)addr;
>> + break;
>> + case FILTER_PTR_STRING:
>> + val = (unsigned long)(*(char *)addr);
>> + break;
>> + default:
>> + WARN_ON_ONCE(1);
>> + return 0;
>> + }
>> + return val;
>> + }
>> +
>> + switch (field->size) {
>> + case 1:
>> + if (field->is_signed)
>> + val = *(char *)addr;
>> + else
>> + val = *(unsigned char *)addr;
>> + break;
>> + case 2:
>> + if (field->is_signed)
>> + val = *(short *)addr;
>> + else
>> + val = *(unsigned short *)addr;
>> + break;
>> + case 4:
>> + if (field->is_signed)
>> + val = *(int *)addr;
>> + else
>> + val = *(unsigned int *)addr;
>> + break;
>> + default:
>> + if (field->is_signed)
>> + val = *(long *)addr;
>> + else
>> + val = *(unsigned long *)addr;
>> + break;
>> + }
>> + return val;
>> +}
>> +
>> +/* Note that we don't verify it, since the code does not come from user space */
>> +static int
>> +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
>> + void *base, int flags)
>> +{
>> + struct pt_regs *regs = rec;
>> + unsigned long val;
>> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
>> +
>> +retry:
>> + /* 1st stage: get value from context */
>> + switch (code->op) {
>> + case FETCH_OP_REG:
>> + val = regs_get_register(regs, code->param);
>> + break;
>> + case FETCH_OP_STACK:
>> + if (is_uprobe)
>> + val = get_user_stack_nth(regs, code->param);
>> + else
>> + val = regs_get_kernel_stack_nth(regs, code->param);
>> + break;
>> + case FETCH_OP_STACKP:
>> + if (is_uprobe)
>> + val = user_stack_pointer(regs);
>> + else
>> + val = kernel_stack_pointer(regs);
>> + break;
>> + case FETCH_OP_RETVAL:
>> + val = regs_return_value(regs);
>> + break;
>> + case FETCH_OP_IMM:
>> + val = code->immediate;
>> + break;
>> + case FETCH_OP_COMM:
>> + val = (unsigned long)current->comm;
>> + break;
>> + case FETCH_OP_DATA:
>> + val = (unsigned long)code->data;
>> + break;
>> +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
>> + case FETCH_OP_ARG:
>> + val = regs_get_kernel_argument(regs, code->param);
>> + break;
>> +#endif
>> + case FETCH_NOP_SYMBOL: /* Ignore a place holder */
>> + code++;
>> + goto retry;
>> + case FETCH_OP_TP_ARG:
>> + val = get_event_field(code, rec);
>> + break;
>> + default:
>> + return -EILSEQ;
>> + }
>> + code++;
>> +
>> + return process_fetch_insn_bottom(code, val, dest, base, flags);
>> +}
>> +NOKPROBE_SYMBOL(process_fetch_insn)
>> +
>> +/* Sum up total data length for dynamic arrays (strings) */
>> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs)
>> +{
>> + struct probe_arg *arg;
>> + int i, len, ret = 0;
>> + struct trace_event_call *call = trace_probe_event_call(tp);
>> +
>> + for (i = 0; i < tp->nr_args; i++) {
>> + arg = tp->args + i;
>> + if (unlikely(arg->dynamic)) {
>> + len = process_fetch_insn(arg->code, regs, NULL, NULL, call->flags);
>> + if (len > 0)
>> + ret += len;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
>> + int header_size, int maxlen)
>> +{
>> + struct probe_arg *arg;
>> + void *base = data - header_size;
>> + void *dyndata = data + tp->size;
>> + u32 *dl; /* Data location */
>> + int ret, i;
>> + struct trace_event_call *call = trace_probe_event_call(tp);
>> +
>> + for (i = 0; i < tp->nr_args; i++) {
>> + arg = tp->args + i;
>> + dl = data + arg->offset;
>> + /* Point the dynamic data area if needed */
>> + if (unlikely(arg->dynamic))
>> + *dl = make_data_loc(maxlen, dyndata - base);
>> + ret = process_fetch_insn(arg->code, rec, dl, base, call->flags);
>> + if (unlikely(ret < 0 && arg->dynamic)) {
>> + *dl = make_data_loc(0, dyndata - base);
>> + } else {
>> + dyndata += ret;
>> + maxlen -= ret;
>> + }
>> + }
>> +}
>> +
>> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
>> + u8 *data, void *field)
>> +{
>> + void *p;
>> + int i, j;
>> +
>> + for (i = 0; i < nr_args; i++) {
>> + struct probe_arg *a = args + i;
>> +
>> + trace_seq_printf(s, " %s=", a->name);
>> + if (likely(!a->count)) {
>> + if (!a->type->print(s, data + a->offset, field))
>> + return -ENOMEM;
>> + continue;
>> + }
>> + trace_seq_putc(s, '{');
>> + p = data + a->offset;
>> + for (j = 0; j < a->count; j++) {
>> + if (!a->type->print(s, p, field))
>> + return -ENOMEM;
>> + trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
>> + p += a->type->size;
>> + }
>> + }
>> + return 0;
>> +}
>> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
>> index de38f1c03776..4f044047b748 100644
>> --- a/kernel/trace/trace_probe.h
>> +++ b/kernel/trace/trace_probe.h
>> @@ -343,6 +343,11 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
>> bool trace_probe_match_command_args(struct trace_probe *tp,
>> int argc, const char **argv);
>> int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
>> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs);
>> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
>> + int header_size, int maxlen);
>> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
>> + u8 *data, void *field);
>>
>> #define trace_probe_for_each_link(pos, tp) \
>> list_for_each_entry(pos, &(tp)->event->files, list)
>> diff --git a/kernel/trace/trace_probe_common.h b/kernel/trace/trace_probe_common.h
>> new file mode 100644
>> index 000000000000..b8d77447fe0c
>> --- /dev/null
>> +++ b/kernel/trace/trace_probe_common.h
>> @@ -0,0 +1,69 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __TRACE_PROBE_COMMON_H_
>> +#define __TRACE_PROBE_COMMON_H_
>> +
>> +#define FAULT_STRING "(fault)"
>> +
>> +static nokprobe_inline void
>> +fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
>> +{
>> + switch (code->size) {
>> + case 1:
>> + *(u8 *)buf = (u8)val;
>> + break;
>> + case 2:
>> + *(u16 *)buf = (u16)val;
>> + break;
>> + case 4:
>> + *(u32 *)buf = (u32)val;
>> + break;
>> + case 8:
>> + //TBD: 32bit signed
>> + *(u64 *)buf = (u64)val;
>> + break;
>> + default:
>> + *(unsigned long *)buf = val;
>> + }
>> +}
>> +
>> +static nokprobe_inline void
>> +fetch_apply_bitfield(struct fetch_insn *code, void *buf)
>> +{
>> + switch (code->basesize) {
>> + case 1:
>> + *(u8 *)buf <<= code->lshift;
>> + *(u8 *)buf >>= code->rshift;
>> + break;
>> + case 2:
>> + *(u16 *)buf <<= code->lshift;
>> + *(u16 *)buf >>= code->rshift;
>> + break;
>> + case 4:
>> + *(u32 *)buf <<= code->lshift;
>> + *(u32 *)buf >>= code->rshift;
>> + break;
>> + case 8:
>> + *(u64 *)buf <<= code->lshift;
>> + *(u64 *)buf >>= code->rshift;
>> + break;
>> + }
>> +}
>> +
>> +static nokprobe_inline int
>> +probe_mem_read_user(void *dest, void *src, size_t size)
>> +{
>> + const void __user *uaddr = (__force const void __user *)src;
>> +
>> + return copy_from_user_nofault(dest, uaddr, size);
>> +}
>> +
>> +static nokprobe_inline int
>> +probe_mem_read(void *dest, void *src, size_t size)
>> +{
>> +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> + if ((unsigned long)src < TASK_SIZE)
>> + return probe_mem_read_user(dest, src, size);
>> +#endif
>> + return copy_from_kernel_nofault(dest, src, size);
>> +}
>> +#endif /* __TRACE_PROBE_COMMON_H_ */
>> diff --git a/kernel/trace/trace_probe_user.h b/kernel/trace/trace_probe_user.h
>> new file mode 100644
>> index 000000000000..2104ccb44d56
>> --- /dev/null
>> +++ b/kernel/trace/trace_probe_user.h
>> @@ -0,0 +1,95 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __TRACE_PROBE_USER_H_
>> +#define __TRACE_PROBE_USER_H_
>> +
>> +#define FAULT_STRING "(fault)"
>> +
>> +/*
>> + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
>> + * length and relative data location.
>> + */
>> +static nokprobe_inline int
>> +user_fetch_store_string(unsigned long addr, void *dest, void *base)
>> +{
>> + long ret;
>> + u32 loc = *(u32 *)dest;
>> + int maxlen = get_loc_len(loc);
>> + u8 *dst = get_loc_data(dest, base);
>> + void __user *src = (void __force __user *) addr;
>> +
>> + if (unlikely(!maxlen))
>> + return -ENOMEM;
>> +
>> + if (addr == FETCH_TOKEN_COMM)
>> + ret = strlcpy(dst, current->comm, maxlen);
>> + else
>> + ret = strncpy_from_user(dst, src, maxlen);
>> + if (ret >= 0) {
>> + if (ret == maxlen)
>> + dst[ret - 1] = '\0';
>> + else
>> + /*
>> + * Include the terminating null byte. In this case it
>> + * was copied by strncpy_from_user but not accounted
>> + * for in ret.
>> + */
>> + ret++;
>> + *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static nokprobe_inline int
>> +user_fetch_store_string_user(unsigned long addr, void *dest, void *base)
>> +{
>> + return user_fetch_store_string(addr, dest, base);
>> +}
>> +
>> +/* Return the length of string -- including null terminal byte */
>> +static nokprobe_inline int
>> +user_fetch_store_strlen(unsigned long addr)
>> +{
>> + int len;
>> + void __user *vaddr = (void __force __user *) addr;
>> +
>> + if (addr == FETCH_TOKEN_COMM)
>> + len = strlen(current->comm) + 1;
>> + else
>> + len = strnlen_user(vaddr, MAX_STRING_SIZE);
>> +
>> + return (len > MAX_STRING_SIZE) ? 0 : len;
>> +}
>> +
>> +static nokprobe_inline int
>> +user_fetch_store_strlen_user(unsigned long addr)
>> +{
>> + return user_fetch_store_strlen(addr);
>> +}
>> +
>> +#ifdef CONFIG_STACK_GROWSUP
>> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
>> +{
>> + return addr - (n * sizeof(long));
>> +}
>> +#else
>> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
>> +{
>> + return addr + (n * sizeof(long));
>> +}
>> +#endif
>> +
>> +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
>> +{
>> + unsigned long ret;
>> + unsigned long addr = user_stack_pointer(regs);
>> +
>> + addr = adjust_stack_addr(addr, n);
>> +
>> + if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
>> + return 0;
>> +
>> + return ret;
>> +}
>> +
>> +#endif /* __TRACE_PROBE_USER_H_ */
>> --
>> 2.25.1
>>
>
>

2022-11-22 02:30:02

by Song Chen

[permalink] [raw]
Subject: Re: [PATCH 1/4] kernel/trace: Introduce new APIs to process probe arguments

Hi,

在 2022/11/22 06:16, Masami Hiramatsu (Google) 写道:
> On Wed, 16 Nov 2022 20:25:07 +0800
> Song Chen <[email protected]> wrote:
>
>> Introduce 3 new APIs:
>> 1. trace_probe_get_data_size: get arguments' data size
>> 2. trace_probe_store_args: store aruguments into ring buffer
>> 3. trace_probe_print_args: print arguments into trace file
>>
>> Those APIs are going to merge similar implementations respectively
>> in kprobe/uprobe/eprobe.
>>
>> Signed-off-by: Song Chen <[email protected]>
>> ---
>> kernel/trace/trace_probe.c | 305 ++++++++++++++++++++++++++++++
>> kernel/trace/trace_probe.h | 5 +
>> kernel/trace/trace_probe_common.h | 69 +++++++
>> kernel/trace/trace_probe_user.h | 95 ++++++++++
>> 4 files changed, 474 insertions(+)
>> create mode 100644 kernel/trace/trace_probe_common.h
>> create mode 100644 kernel/trace/trace_probe_user.h
>>
>> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
>> index 36dff277de46..303f057bd2f7 100644
>> --- a/kernel/trace/trace_probe.c
>> +++ b/kernel/trace/trace_probe.c
>> @@ -12,6 +12,9 @@
>> #define pr_fmt(fmt) "trace_probe: " fmt
>>
>> #include "trace_probe.h"
>> +#include "trace_probe_kernel.h"
>> +#include "trace_probe_user.h"
>> +#include "trace_probe_common.h"
>>
>> #undef C
>> #define C(a, b) b
>> @@ -1218,3 +1221,305 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
>>
>> return ret;
>> }
>> +
>> +/* From the 2nd stage, routine is same */
>> +static nokprobe_inline int
>> +process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
>> + void *dest, void *base, int flags)
>> +{
>> + struct fetch_insn *s3 = NULL;
>> + int total = 0, ret = 0, i = 0;
>> + u32 loc = 0;
>> + unsigned long lval = val;
>> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
>> +
>> +stage2:
>> + /* 2nd stage: dereference memory if needed */
>> + do {
>> + if (code->op == FETCH_OP_DEREF) {
>> + lval = val;
>> + ret = probe_mem_read(&val, (void *)val + code->offset,
>> + sizeof(val));
>> + } else if (code->op == FETCH_OP_UDEREF) {
>> + lval = val;
>> + ret = probe_mem_read_user(&val,
>> + (void *)val + code->offset, sizeof(val));
>> + } else
>> + break;
>> + if (ret)
>> + return ret;
>> + code++;
>> + } while (1);
>> +
>> + s3 = code;
>> +stage3:
>> + /* 3rd stage: store value to buffer */
>> + if (unlikely(!dest)) {
>> + if (code->op == FETCH_OP_ST_STRING) {
>> + if (is_uprobe)
>> + ret = user_fetch_store_strlen(val + code->offset);
>> + else
>> + ret = kern_fetch_store_strlen(val + code->offset);
>> + code++;
>> + goto array;
>> + } else if (code->op == FETCH_OP_ST_USTRING) {
>> + if (is_uprobe)
>> + ret += user_fetch_store_strlen_user(val + code->offset);
>> + else
>> + ret += kern_fetch_store_strlen_user(val + code->offset);
>
> Also, I would not like to see this "is_uprobe" check in the code. That is useless
> when running this in the kernel. This is why I did this in trace_probe_tmpl.h as
> an inlined function.
>

one of the intention is to merge process_fetch_insn*, that's why
is_uprobe is introduced here, otherwise, process_fetch_insn* doesn't
know which kind of probe it's dealing with.

> So, sorting print_args function is OK for me, but the process_fetch_insn* should be
> kept as inlined. Maybe some common code can be move to the trace_probe_tmpl.h as
> inlined functions?
>

if i understand you correctly, you would like to remain
process_fetch_insn* as the way they are, no change.

Maybe i went too far, i will submit a new patchset to restore.

/Song

>
>> + code++;
>> + goto array;
>> + } else
>> + return -EILSEQ;
>> + }
>> +
>> + switch (code->op) {
>> + case FETCH_OP_ST_RAW:
>> + fetch_store_raw(val, code, dest);
>> + break;
>> + case FETCH_OP_ST_MEM:
>> + probe_mem_read(dest, (void *)val + code->offset, code->size);
>> + break;
>> + case FETCH_OP_ST_UMEM:
>> + probe_mem_read_user(dest, (void *)val + code->offset, code->size);
>> + break;
>> + case FETCH_OP_ST_STRING:
>> + loc = *(u32 *)dest;
>> + if (is_uprobe)
>> + ret = user_fetch_store_string(val + code->offset, dest, base);
>> + else
>> + ret = kern_fetch_store_string(val + code->offset, dest, base);
>> + break;
>> + case FETCH_OP_ST_USTRING:
>> + loc = *(u32 *)dest;
>> + if (is_uprobe)
>> + ret = user_fetch_store_string_user(val + code->offset, dest, base);
>> + else
>> + ret = kern_fetch_store_string_user(val + code->offset, dest, base);
>> + break;
>> + default:
>> + return -EILSEQ;
>> + }
>> + code++;
>> +
>> + /* 4th stage: modify stored value if needed */
>> + if (code->op == FETCH_OP_MOD_BF) {
>> + fetch_apply_bitfield(code, dest);
>> + code++;
>> + }
>> +
>> +array:
>> + /* the last stage: Loop on array */
>> + if (code->op == FETCH_OP_LP_ARRAY) {
>> + total += ret;
>> + if (++i < code->param) {
>> + code = s3;
>> + if (s3->op != FETCH_OP_ST_STRING &&
>> + s3->op != FETCH_OP_ST_USTRING) {
>> + dest += s3->size;
>> + val += s3->size;
>> + goto stage3;
>> + }
>> + code--;
>> + val = lval + sizeof(char *);
>> + if (dest) {
>> + dest += sizeof(u32);
>> + *(u32 *)dest = update_data_loc(loc, ret);
>> + }
>> + goto stage2;
>> + }
>> + code++;
>> + ret = total;
>> + }
>> +
>> + return code->op == FETCH_OP_END ? ret : -EILSEQ;
>> +}
>> +
>> +static unsigned long get_event_field(struct fetch_insn *code, void *rec)
>> +{
>> + struct ftrace_event_field *field = code->data;
>> + unsigned long val;
>> + void *addr;
>> +
>> + addr = rec + field->offset;
>> +
>> + if (is_string_field(field)) {
>> + switch (field->filter_type) {
>> + case FILTER_DYN_STRING:
>> + val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
>> + break;
>> + case FILTER_RDYN_STRING:
>> + val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
>> + break;
>> + case FILTER_STATIC_STRING:
>> + val = (unsigned long)addr;
>> + break;
>> + case FILTER_PTR_STRING:
>> + val = (unsigned long)(*(char *)addr);
>> + break;
>> + default:
>> + WARN_ON_ONCE(1);
>> + return 0;
>> + }
>> + return val;
>> + }
>> +
>> + switch (field->size) {
>> + case 1:
>> + if (field->is_signed)
>> + val = *(char *)addr;
>> + else
>> + val = *(unsigned char *)addr;
>> + break;
>> + case 2:
>> + if (field->is_signed)
>> + val = *(short *)addr;
>> + else
>> + val = *(unsigned short *)addr;
>> + break;
>> + case 4:
>> + if (field->is_signed)
>> + val = *(int *)addr;
>> + else
>> + val = *(unsigned int *)addr;
>> + break;
>> + default:
>> + if (field->is_signed)
>> + val = *(long *)addr;
>> + else
>> + val = *(unsigned long *)addr;
>> + break;
>> + }
>> + return val;
>> +}
>> +
>> +/* Note that we don't verify it, since the code does not come from user space */
>> +static int
>> +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
>> + void *base, int flags)
>> +{
>> + struct pt_regs *regs = rec;
>> + unsigned long val;
>> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
>> +
>> +retry:
>> + /* 1st stage: get value from context */
>> + switch (code->op) {
>> + case FETCH_OP_REG:
>> + val = regs_get_register(regs, code->param);
>> + break;
>> + case FETCH_OP_STACK:
>> + if (is_uprobe)
>> + val = get_user_stack_nth(regs, code->param);
>> + else
>> + val = regs_get_kernel_stack_nth(regs, code->param);
>> + break;
>
> Ditto.

will do.

/Song

>
>> + case FETCH_OP_STACKP:
>> + if (is_uprobe)
>> + val = user_stack_pointer(regs);
>> + else
>> + val = kernel_stack_pointer(regs);
>> + break;
>> + case FETCH_OP_RETVAL:
>> + val = regs_return_value(regs);
>> + break;
>> + case FETCH_OP_IMM:
>> + val = code->immediate;
>> + break;
>> + case FETCH_OP_COMM:
>> + val = (unsigned long)current->comm;
>> + break;
>> + case FETCH_OP_DATA:
>> + val = (unsigned long)code->data;
>> + break;
>> +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
>> + case FETCH_OP_ARG:
>> + val = regs_get_kernel_argument(regs, code->param);
>> + break;
>> +#endif
>> + case FETCH_NOP_SYMBOL: /* Ignore a place holder */
>> + code++;
>> + goto retry;
>> + case FETCH_OP_TP_ARG:
>> + val = get_event_field(code, rec);
>> + break;
>> + default:
>> + return -EILSEQ;
>> + }
>> + code++;
>> +
>> + return process_fetch_insn_bottom(code, val, dest, base, flags);
>> +}
>> +NOKPROBE_SYMBOL(process_fetch_insn)
>> +
>> +/* Sum up total data length for dynamic arrays (strings) */
>> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs)
>> +{
>> + struct probe_arg *arg;
>> + int i, len, ret = 0;
>> + struct trace_event_call *call = trace_probe_event_call(tp);
>> +
>> + for (i = 0; i < tp->nr_args; i++) {
>> + arg = tp->args + i;
>> + if (unlikely(arg->dynamic)) {
>> + len = process_fetch_insn(arg->code, regs, NULL, NULL, call->flags);
>> + if (len > 0)
>> + ret += len;
>> + }
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
>> + int header_size, int maxlen)
>> +{
>> + struct probe_arg *arg;
>> + void *base = data - header_size;
>> + void *dyndata = data + tp->size;
>> + u32 *dl; /* Data location */
>> + int ret, i;
>> + struct trace_event_call *call = trace_probe_event_call(tp);
>> +
>> + for (i = 0; i < tp->nr_args; i++) {
>> + arg = tp->args + i;
>> + dl = data + arg->offset;
>> + /* Point the dynamic data area if needed */
>> + if (unlikely(arg->dynamic))
>> + *dl = make_data_loc(maxlen, dyndata - base);
>> + ret = process_fetch_insn(arg->code, rec, dl, base, call->flags);
>> + if (unlikely(ret < 0 && arg->dynamic)) {
>> + *dl = make_data_loc(0, dyndata - base);
>> + } else {
>> + dyndata += ret;
>> + maxlen -= ret;
>> + }
>> + }
>> +}
>> +
>> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
>> + u8 *data, void *field)
>> +{
>> + void *p;
>> + int i, j;
>> +
>> + for (i = 0; i < nr_args; i++) {
>> + struct probe_arg *a = args + i;
>> +
>> + trace_seq_printf(s, " %s=", a->name);
>> + if (likely(!a->count)) {
>> + if (!a->type->print(s, data + a->offset, field))
>> + return -ENOMEM;
>> + continue;
>> + }
>> + trace_seq_putc(s, '{');
>> + p = data + a->offset;
>> + for (j = 0; j < a->count; j++) {
>> + if (!a->type->print(s, p, field))
>> + return -ENOMEM;
>> + trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
>> + p += a->type->size;
>> + }
>> + }
>> + return 0;
>> +}
>> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
>> index de38f1c03776..4f044047b748 100644
>> --- a/kernel/trace/trace_probe.h
>> +++ b/kernel/trace/trace_probe.h
>> @@ -343,6 +343,11 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
>> bool trace_probe_match_command_args(struct trace_probe *tp,
>> int argc, const char **argv);
>> int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
>> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs);
>> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
>> + int header_size, int maxlen);
>> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
>> + u8 *data, void *field);
>>
>> #define trace_probe_for_each_link(pos, tp) \
>> list_for_each_entry(pos, &(tp)->event->files, list)
>> diff --git a/kernel/trace/trace_probe_common.h b/kernel/trace/trace_probe_common.h
>> new file mode 100644
>> index 000000000000..b8d77447fe0c
>> --- /dev/null
>> +++ b/kernel/trace/trace_probe_common.h
>
> Please do not add an internal header file only just for 1 file.

Will remove.

/Song

>
>> @@ -0,0 +1,69 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __TRACE_PROBE_COMMON_H_
>> +#define __TRACE_PROBE_COMMON_H_
>> +
>> +#define FAULT_STRING "(fault)"
>> +
>> +static nokprobe_inline void
>> +fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
>> +{
>> + switch (code->size) {
>> + case 1:
>> + *(u8 *)buf = (u8)val;
>> + break;
>> + case 2:
>> + *(u16 *)buf = (u16)val;
>> + break;
>> + case 4:
>> + *(u32 *)buf = (u32)val;
>> + break;
>> + case 8:
>> + //TBD: 32bit signed
>> + *(u64 *)buf = (u64)val;
>> + break;
>> + default:
>> + *(unsigned long *)buf = val;
>> + }
>> +}
>> +
>> +static nokprobe_inline void
>> +fetch_apply_bitfield(struct fetch_insn *code, void *buf)
>> +{
>> + switch (code->basesize) {
>> + case 1:
>> + *(u8 *)buf <<= code->lshift;
>> + *(u8 *)buf >>= code->rshift;
>> + break;
>> + case 2:
>> + *(u16 *)buf <<= code->lshift;
>> + *(u16 *)buf >>= code->rshift;
>> + break;
>> + case 4:
>> + *(u32 *)buf <<= code->lshift;
>> + *(u32 *)buf >>= code->rshift;
>> + break;
>> + case 8:
>> + *(u64 *)buf <<= code->lshift;
>> + *(u64 *)buf >>= code->rshift;
>> + break;
>> + }
>> +}
>> +
>> +static nokprobe_inline int
>> +probe_mem_read_user(void *dest, void *src, size_t size)
>> +{
>> + const void __user *uaddr = (__force const void __user *)src;
>> +
>> + return copy_from_user_nofault(dest, uaddr, size);
>> +}
>> +
>> +static nokprobe_inline int
>> +probe_mem_read(void *dest, void *src, size_t size)
>> +{
>> +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> + if ((unsigned long)src < TASK_SIZE)
>> + return probe_mem_read_user(dest, src, size);
>> +#endif
>> + return copy_from_kernel_nofault(dest, src, size);
>> +}
>> +#endif /* __TRACE_PROBE_COMMON_H_ */
>> diff --git a/kernel/trace/trace_probe_user.h b/kernel/trace/trace_probe_user.h
>> new file mode 100644
>> index 000000000000..2104ccb44d56
>> --- /dev/null
>> +++ b/kernel/trace/trace_probe_user.h
>
> Ditto.
>
> Thank you,

Ditto, thanks.

/Song

>
>> @@ -0,0 +1,95 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __TRACE_PROBE_USER_H_
>> +#define __TRACE_PROBE_USER_H_
>> +
>> +#define FAULT_STRING "(fault)"
>> +
>> +/*
>> + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
>> + * length and relative data location.
>> + */
>> +static nokprobe_inline int
>> +user_fetch_store_string(unsigned long addr, void *dest, void *base)
>> +{
>> + long ret;
>> + u32 loc = *(u32 *)dest;
>> + int maxlen = get_loc_len(loc);
>> + u8 *dst = get_loc_data(dest, base);
>> + void __user *src = (void __force __user *) addr;
>> +
>> + if (unlikely(!maxlen))
>> + return -ENOMEM;
>> +
>> + if (addr == FETCH_TOKEN_COMM)
>> + ret = strlcpy(dst, current->comm, maxlen);
>> + else
>> + ret = strncpy_from_user(dst, src, maxlen);
>> + if (ret >= 0) {
>> + if (ret == maxlen)
>> + dst[ret - 1] = '\0';
>> + else
>> + /*
>> + * Include the terminating null byte. In this case it
>> + * was copied by strncpy_from_user but not accounted
>> + * for in ret.
>> + */
>> + ret++;
>> + *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static nokprobe_inline int
>> +user_fetch_store_string_user(unsigned long addr, void *dest, void *base)
>> +{
>> + return user_fetch_store_string(addr, dest, base);
>> +}
>> +
>> +/* Return the length of string -- including null terminal byte */
>> +static nokprobe_inline int
>> +user_fetch_store_strlen(unsigned long addr)
>> +{
>> + int len;
>> + void __user *vaddr = (void __force __user *) addr;
>> +
>> + if (addr == FETCH_TOKEN_COMM)
>> + len = strlen(current->comm) + 1;
>> + else
>> + len = strnlen_user(vaddr, MAX_STRING_SIZE);
>> +
>> + return (len > MAX_STRING_SIZE) ? 0 : len;
>> +}
>> +
>> +static nokprobe_inline int
>> +user_fetch_store_strlen_user(unsigned long addr)
>> +{
>> + return user_fetch_store_strlen(addr);
>> +}
>> +
>> +#ifdef CONFIG_STACK_GROWSUP
>> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
>> +{
>> + return addr - (n * sizeof(long));
>> +}
>> +#else
>> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
>> +{
>> + return addr + (n * sizeof(long));
>> +}
>> +#endif
>> +
>> +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
>> +{
>> + unsigned long ret;
>> + unsigned long addr = user_stack_pointer(regs);
>> +
>> + addr = adjust_stack_addr(addr, n);
>> +
>> + if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
>> + return 0;
>> +
>> + return ret;
>> +}
>> +
>> +#endif /* __TRACE_PROBE_USER_H_ */
>> --
>> 2.25.1
>>
>
> Thanks,
>

2022-11-22 04:53:25

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/4] kernel/trace: Introduce new APIs to process probe arguments

On Tue, 22 Nov 2022 10:05:07 +0800
Song Chen <[email protected]> wrote:

> Hi,
>
> 在 2022/11/22 06:16, Masami Hiramatsu (Google) 写道:
> > On Wed, 16 Nov 2022 20:25:07 +0800
> > Song Chen <[email protected]> wrote:
> >
> >> Introduce 3 new APIs:
> >> 1. trace_probe_get_data_size: get arguments' data size
> >> 2. trace_probe_store_args: store aruguments into ring buffer
> >> 3. trace_probe_print_args: print arguments into trace file
> >>
> >> Those APIs are going to merge similar implementations respectively
> >> in kprobe/uprobe/eprobe.
> >>
> >> Signed-off-by: Song Chen <[email protected]>
> >> ---
> >> kernel/trace/trace_probe.c | 305 ++++++++++++++++++++++++++++++
> >> kernel/trace/trace_probe.h | 5 +
> >> kernel/trace/trace_probe_common.h | 69 +++++++
> >> kernel/trace/trace_probe_user.h | 95 ++++++++++
> >> 4 files changed, 474 insertions(+)
> >> create mode 100644 kernel/trace/trace_probe_common.h
> >> create mode 100644 kernel/trace/trace_probe_user.h
> >>
> >> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> >> index 36dff277de46..303f057bd2f7 100644
> >> --- a/kernel/trace/trace_probe.c
> >> +++ b/kernel/trace/trace_probe.c
> >> @@ -12,6 +12,9 @@
> >> #define pr_fmt(fmt) "trace_probe: " fmt
> >>
> >> #include "trace_probe.h"
> >> +#include "trace_probe_kernel.h"
> >> +#include "trace_probe_user.h"
> >> +#include "trace_probe_common.h"
> >>
> >> #undef C
> >> #define C(a, b) b
> >> @@ -1218,3 +1221,305 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
> >>
> >> return ret;
> >> }
> >> +
> >> +/* From the 2nd stage, routine is same */
> >> +static nokprobe_inline int
> >> +process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
> >> + void *dest, void *base, int flags)
> >> +{
> >> + struct fetch_insn *s3 = NULL;
> >> + int total = 0, ret = 0, i = 0;
> >> + u32 loc = 0;
> >> + unsigned long lval = val;
> >> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
> >> +
> >> +stage2:
> >> + /* 2nd stage: dereference memory if needed */
> >> + do {
> >> + if (code->op == FETCH_OP_DEREF) {
> >> + lval = val;
> >> + ret = probe_mem_read(&val, (void *)val + code->offset,
> >> + sizeof(val));
> >> + } else if (code->op == FETCH_OP_UDEREF) {
> >> + lval = val;
> >> + ret = probe_mem_read_user(&val,
> >> + (void *)val + code->offset, sizeof(val));
> >> + } else
> >> + break;
> >> + if (ret)
> >> + return ret;
> >> + code++;
> >> + } while (1);
> >> +
> >> + s3 = code;
> >> +stage3:
> >> + /* 3rd stage: store value to buffer */
> >> + if (unlikely(!dest)) {
> >> + if (code->op == FETCH_OP_ST_STRING) {
> >> + if (is_uprobe)
> >> + ret = user_fetch_store_strlen(val + code->offset);
> >> + else
> >> + ret = kern_fetch_store_strlen(val + code->offset);
> >> + code++;
> >> + goto array;
> >> + } else if (code->op == FETCH_OP_ST_USTRING) {
> >> + if (is_uprobe)
> >> + ret += user_fetch_store_strlen_user(val + code->offset);
> >> + else
> >> + ret += kern_fetch_store_strlen_user(val + code->offset);
> >
> > Also, I would not like to see this "is_uprobe" check in the code. That is useless
> > when running this in the kernel. This is why I did this in trace_probe_tmpl.h as
> > an inlined function.
> >
>
> one of the intention is to merge process_fetch_insn*, that's why
> is_uprobe is introduced here, otherwise, process_fetch_insn* doesn't
> know which kind of probe it's dealing with.
>
> > So, sorting print_args function is OK for me, but the process_fetch_insn* should be
> > kept as inlined. Maybe some common code can be move to the trace_probe_tmpl.h as
> > inlined functions?
> >
>
> if i understand you correctly, you would like to remain
> process_fetch_insn* as the way they are, no change.

Yeah, that's the point. Maybe we can put more things in trace_probe_tmpl.h but
not completely make it a big function.

Thank you,

>
> Maybe i went too far, i will submit a new patchset to restore.
>
> /Song
>
> >
> >> + code++;
> >> + goto array;
> >> + } else
> >> + return -EILSEQ;
> >> + }
> >> +
> >> + switch (code->op) {
> >> + case FETCH_OP_ST_RAW:
> >> + fetch_store_raw(val, code, dest);
> >> + break;
> >> + case FETCH_OP_ST_MEM:
> >> + probe_mem_read(dest, (void *)val + code->offset, code->size);
> >> + break;
> >> + case FETCH_OP_ST_UMEM:
> >> + probe_mem_read_user(dest, (void *)val + code->offset, code->size);
> >> + break;
> >> + case FETCH_OP_ST_STRING:
> >> + loc = *(u32 *)dest;
> >> + if (is_uprobe)
> >> + ret = user_fetch_store_string(val + code->offset, dest, base);
> >> + else
> >> + ret = kern_fetch_store_string(val + code->offset, dest, base);
> >> + break;
> >> + case FETCH_OP_ST_USTRING:
> >> + loc = *(u32 *)dest;
> >> + if (is_uprobe)
> >> + ret = user_fetch_store_string_user(val + code->offset, dest, base);
> >> + else
> >> + ret = kern_fetch_store_string_user(val + code->offset, dest, base);
> >> + break;
> >> + default:
> >> + return -EILSEQ;
> >> + }
> >> + code++;
> >> +
> >> + /* 4th stage: modify stored value if needed */
> >> + if (code->op == FETCH_OP_MOD_BF) {
> >> + fetch_apply_bitfield(code, dest);
> >> + code++;
> >> + }
> >> +
> >> +array:
> >> + /* the last stage: Loop on array */
> >> + if (code->op == FETCH_OP_LP_ARRAY) {
> >> + total += ret;
> >> + if (++i < code->param) {
> >> + code = s3;
> >> + if (s3->op != FETCH_OP_ST_STRING &&
> >> + s3->op != FETCH_OP_ST_USTRING) {
> >> + dest += s3->size;
> >> + val += s3->size;
> >> + goto stage3;
> >> + }
> >> + code--;
> >> + val = lval + sizeof(char *);
> >> + if (dest) {
> >> + dest += sizeof(u32);
> >> + *(u32 *)dest = update_data_loc(loc, ret);
> >> + }
> >> + goto stage2;
> >> + }
> >> + code++;
> >> + ret = total;
> >> + }
> >> +
> >> + return code->op == FETCH_OP_END ? ret : -EILSEQ;
> >> +}
> >> +
> >> +static unsigned long get_event_field(struct fetch_insn *code, void *rec)
> >> +{
> >> + struct ftrace_event_field *field = code->data;
> >> + unsigned long val;
> >> + void *addr;
> >> +
> >> + addr = rec + field->offset;
> >> +
> >> + if (is_string_field(field)) {
> >> + switch (field->filter_type) {
> >> + case FILTER_DYN_STRING:
> >> + val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff));
> >> + break;
> >> + case FILTER_RDYN_STRING:
> >> + val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff));
> >> + break;
> >> + case FILTER_STATIC_STRING:
> >> + val = (unsigned long)addr;
> >> + break;
> >> + case FILTER_PTR_STRING:
> >> + val = (unsigned long)(*(char *)addr);
> >> + break;
> >> + default:
> >> + WARN_ON_ONCE(1);
> >> + return 0;
> >> + }
> >> + return val;
> >> + }
> >> +
> >> + switch (field->size) {
> >> + case 1:
> >> + if (field->is_signed)
> >> + val = *(char *)addr;
> >> + else
> >> + val = *(unsigned char *)addr;
> >> + break;
> >> + case 2:
> >> + if (field->is_signed)
> >> + val = *(short *)addr;
> >> + else
> >> + val = *(unsigned short *)addr;
> >> + break;
> >> + case 4:
> >> + if (field->is_signed)
> >> + val = *(int *)addr;
> >> + else
> >> + val = *(unsigned int *)addr;
> >> + break;
> >> + default:
> >> + if (field->is_signed)
> >> + val = *(long *)addr;
> >> + else
> >> + val = *(unsigned long *)addr;
> >> + break;
> >> + }
> >> + return val;
> >> +}
> >> +
> >> +/* Note that we don't verify it, since the code does not come from user space */
> >> +static int
> >> +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
> >> + void *base, int flags)
> >> +{
> >> + struct pt_regs *regs = rec;
> >> + unsigned long val;
> >> + int is_uprobe = flags & TRACE_EVENT_FL_UPROBE;
> >> +
> >> +retry:
> >> + /* 1st stage: get value from context */
> >> + switch (code->op) {
> >> + case FETCH_OP_REG:
> >> + val = regs_get_register(regs, code->param);
> >> + break;
> >> + case FETCH_OP_STACK:
> >> + if (is_uprobe)
> >> + val = get_user_stack_nth(regs, code->param);
> >> + else
> >> + val = regs_get_kernel_stack_nth(regs, code->param);
> >> + break;
> >
> > Ditto.
>
> will do.
>
> /Song
>
> >
> >> + case FETCH_OP_STACKP:
> >> + if (is_uprobe)
> >> + val = user_stack_pointer(regs);
> >> + else
> >> + val = kernel_stack_pointer(regs);
> >> + break;
> >> + case FETCH_OP_RETVAL:
> >> + val = regs_return_value(regs);
> >> + break;
> >> + case FETCH_OP_IMM:
> >> + val = code->immediate;
> >> + break;
> >> + case FETCH_OP_COMM:
> >> + val = (unsigned long)current->comm;
> >> + break;
> >> + case FETCH_OP_DATA:
> >> + val = (unsigned long)code->data;
> >> + break;
> >> +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
> >> + case FETCH_OP_ARG:
> >> + val = regs_get_kernel_argument(regs, code->param);
> >> + break;
> >> +#endif
> >> + case FETCH_NOP_SYMBOL: /* Ignore a place holder */
> >> + code++;
> >> + goto retry;
> >> + case FETCH_OP_TP_ARG:
> >> + val = get_event_field(code, rec);
> >> + break;
> >> + default:
> >> + return -EILSEQ;
> >> + }
> >> + code++;
> >> +
> >> + return process_fetch_insn_bottom(code, val, dest, base, flags);
> >> +}
> >> +NOKPROBE_SYMBOL(process_fetch_insn)
> >> +
> >> +/* Sum up total data length for dynamic arrays (strings) */
> >> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs)
> >> +{
> >> + struct probe_arg *arg;
> >> + int i, len, ret = 0;
> >> + struct trace_event_call *call = trace_probe_event_call(tp);
> >> +
> >> + for (i = 0; i < tp->nr_args; i++) {
> >> + arg = tp->args + i;
> >> + if (unlikely(arg->dynamic)) {
> >> + len = process_fetch_insn(arg->code, regs, NULL, NULL, call->flags);
> >> + if (len > 0)
> >> + ret += len;
> >> + }
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
> >> + int header_size, int maxlen)
> >> +{
> >> + struct probe_arg *arg;
> >> + void *base = data - header_size;
> >> + void *dyndata = data + tp->size;
> >> + u32 *dl; /* Data location */
> >> + int ret, i;
> >> + struct trace_event_call *call = trace_probe_event_call(tp);
> >> +
> >> + for (i = 0; i < tp->nr_args; i++) {
> >> + arg = tp->args + i;
> >> + dl = data + arg->offset;
> >> + /* Point the dynamic data area if needed */
> >> + if (unlikely(arg->dynamic))
> >> + *dl = make_data_loc(maxlen, dyndata - base);
> >> + ret = process_fetch_insn(arg->code, rec, dl, base, call->flags);
> >> + if (unlikely(ret < 0 && arg->dynamic)) {
> >> + *dl = make_data_loc(0, dyndata - base);
> >> + } else {
> >> + dyndata += ret;
> >> + maxlen -= ret;
> >> + }
> >> + }
> >> +}
> >> +
> >> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> >> + u8 *data, void *field)
> >> +{
> >> + void *p;
> >> + int i, j;
> >> +
> >> + for (i = 0; i < nr_args; i++) {
> >> + struct probe_arg *a = args + i;
> >> +
> >> + trace_seq_printf(s, " %s=", a->name);
> >> + if (likely(!a->count)) {
> >> + if (!a->type->print(s, data + a->offset, field))
> >> + return -ENOMEM;
> >> + continue;
> >> + }
> >> + trace_seq_putc(s, '{');
> >> + p = data + a->offset;
> >> + for (j = 0; j < a->count; j++) {
> >> + if (!a->type->print(s, p, field))
> >> + return -ENOMEM;
> >> + trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
> >> + p += a->type->size;
> >> + }
> >> + }
> >> + return 0;
> >> +}
> >> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> >> index de38f1c03776..4f044047b748 100644
> >> --- a/kernel/trace/trace_probe.h
> >> +++ b/kernel/trace/trace_probe.h
> >> @@ -343,6 +343,11 @@ int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
> >> bool trace_probe_match_command_args(struct trace_probe *tp,
> >> int argc, const char **argv);
> >> int trace_probe_create(const char *raw_command, int (*createfn)(int, const char **));
> >> +int trace_probe_get_data_size(struct trace_probe *tp, struct pt_regs *regs);
> >> +void trace_probe_store_args(void *data, struct trace_probe *tp, void *rec,
> >> + int header_size, int maxlen);
> >> +int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
> >> + u8 *data, void *field);
> >>
> >> #define trace_probe_for_each_link(pos, tp) \
> >> list_for_each_entry(pos, &(tp)->event->files, list)
> >> diff --git a/kernel/trace/trace_probe_common.h b/kernel/trace/trace_probe_common.h
> >> new file mode 100644
> >> index 000000000000..b8d77447fe0c
> >> --- /dev/null
> >> +++ b/kernel/trace/trace_probe_common.h
> >
> > Please do not add an internal header file only just for 1 file.
>
> Will remove.
>
> /Song
>
> >
> >> @@ -0,0 +1,69 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +#ifndef __TRACE_PROBE_COMMON_H_
> >> +#define __TRACE_PROBE_COMMON_H_
> >> +
> >> +#define FAULT_STRING "(fault)"
> >> +
> >> +static nokprobe_inline void
> >> +fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
> >> +{
> >> + switch (code->size) {
> >> + case 1:
> >> + *(u8 *)buf = (u8)val;
> >> + break;
> >> + case 2:
> >> + *(u16 *)buf = (u16)val;
> >> + break;
> >> + case 4:
> >> + *(u32 *)buf = (u32)val;
> >> + break;
> >> + case 8:
> >> + //TBD: 32bit signed
> >> + *(u64 *)buf = (u64)val;
> >> + break;
> >> + default:
> >> + *(unsigned long *)buf = val;
> >> + }
> >> +}
> >> +
> >> +static nokprobe_inline void
> >> +fetch_apply_bitfield(struct fetch_insn *code, void *buf)
> >> +{
> >> + switch (code->basesize) {
> >> + case 1:
> >> + *(u8 *)buf <<= code->lshift;
> >> + *(u8 *)buf >>= code->rshift;
> >> + break;
> >> + case 2:
> >> + *(u16 *)buf <<= code->lshift;
> >> + *(u16 *)buf >>= code->rshift;
> >> + break;
> >> + case 4:
> >> + *(u32 *)buf <<= code->lshift;
> >> + *(u32 *)buf >>= code->rshift;
> >> + break;
> >> + case 8:
> >> + *(u64 *)buf <<= code->lshift;
> >> + *(u64 *)buf >>= code->rshift;
> >> + break;
> >> + }
> >> +}
> >> +
> >> +static nokprobe_inline int
> >> +probe_mem_read_user(void *dest, void *src, size_t size)
> >> +{
> >> + const void __user *uaddr = (__force const void __user *)src;
> >> +
> >> + return copy_from_user_nofault(dest, uaddr, size);
> >> +}
> >> +
> >> +static nokprobe_inline int
> >> +probe_mem_read(void *dest, void *src, size_t size)
> >> +{
> >> +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> >> + if ((unsigned long)src < TASK_SIZE)
> >> + return probe_mem_read_user(dest, src, size);
> >> +#endif
> >> + return copy_from_kernel_nofault(dest, src, size);
> >> +}
> >> +#endif /* __TRACE_PROBE_COMMON_H_ */
> >> diff --git a/kernel/trace/trace_probe_user.h b/kernel/trace/trace_probe_user.h
> >> new file mode 100644
> >> index 000000000000..2104ccb44d56
> >> --- /dev/null
> >> +++ b/kernel/trace/trace_probe_user.h
> >
> > Ditto.
> >
> > Thank you,
>
> Ditto, thanks.
>
> /Song
>
> >
> >> @@ -0,0 +1,95 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +#ifndef __TRACE_PROBE_USER_H_
> >> +#define __TRACE_PROBE_USER_H_
> >> +
> >> +#define FAULT_STRING "(fault)"
> >> +
> >> +/*
> >> + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
> >> + * length and relative data location.
> >> + */
> >> +static nokprobe_inline int
> >> +user_fetch_store_string(unsigned long addr, void *dest, void *base)
> >> +{
> >> + long ret;
> >> + u32 loc = *(u32 *)dest;
> >> + int maxlen = get_loc_len(loc);
> >> + u8 *dst = get_loc_data(dest, base);
> >> + void __user *src = (void __force __user *) addr;
> >> +
> >> + if (unlikely(!maxlen))
> >> + return -ENOMEM;
> >> +
> >> + if (addr == FETCH_TOKEN_COMM)
> >> + ret = strlcpy(dst, current->comm, maxlen);
> >> + else
> >> + ret = strncpy_from_user(dst, src, maxlen);
> >> + if (ret >= 0) {
> >> + if (ret == maxlen)
> >> + dst[ret - 1] = '\0';
> >> + else
> >> + /*
> >> + * Include the terminating null byte. In this case it
> >> + * was copied by strncpy_from_user but not accounted
> >> + * for in ret.
> >> + */
> >> + ret++;
> >> + *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
> >> + }
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +static nokprobe_inline int
> >> +user_fetch_store_string_user(unsigned long addr, void *dest, void *base)
> >> +{
> >> + return user_fetch_store_string(addr, dest, base);
> >> +}
> >> +
> >> +/* Return the length of string -- including null terminal byte */
> >> +static nokprobe_inline int
> >> +user_fetch_store_strlen(unsigned long addr)
> >> +{
> >> + int len;
> >> + void __user *vaddr = (void __force __user *) addr;
> >> +
> >> + if (addr == FETCH_TOKEN_COMM)
> >> + len = strlen(current->comm) + 1;
> >> + else
> >> + len = strnlen_user(vaddr, MAX_STRING_SIZE);
> >> +
> >> + return (len > MAX_STRING_SIZE) ? 0 : len;
> >> +}
> >> +
> >> +static nokprobe_inline int
> >> +user_fetch_store_strlen_user(unsigned long addr)
> >> +{
> >> + return user_fetch_store_strlen(addr);
> >> +}
> >> +
> >> +#ifdef CONFIG_STACK_GROWSUP
> >> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> >> +{
> >> + return addr - (n * sizeof(long));
> >> +}
> >> +#else
> >> +static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> >> +{
> >> + return addr + (n * sizeof(long));
> >> +}
> >> +#endif
> >> +
> >> +static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
> >> +{
> >> + unsigned long ret;
> >> + unsigned long addr = user_stack_pointer(regs);
> >> +
> >> + addr = adjust_stack_addr(addr, n);
> >> +
> >> + if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret)))
> >> + return 0;
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +#endif /* __TRACE_PROBE_USER_H_ */
> >> --
> >> 2.25.1
> >>
> >
> > Thanks,
> >


--
Masami Hiramatsu (Google) <[email protected]>