2009-03-05 04:43:53

by K.Prasad

[permalink] [raw]
Subject: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

This patch adds an ftrace plugin to detect and profile memory access over
kernel variables. It uses HW Breakpoint interfaces to 'watch memory
addresses.

Signed-off-by: K.Prasad <[email protected]>
---
kernel/trace/Kconfig | 6
kernel/trace/Makefile | 1
kernel/trace/trace.h | 15 +
kernel/trace/trace_ksym.c | 399 ++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 421 insertions(+)

Index: linux-2.6-tip/kernel/trace/Kconfig
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Kconfig
+++ linux-2.6-tip/kernel/trace/Kconfig
@@ -249,6 +249,12 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.

+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.

config STACK_TRACER
bool "Trace max stack"
Index: linux-2.6-tip/kernel/trace/Makefile
===================================================================
--- linux-2.6-tip.orig/kernel/trace/Makefile
+++ linux-2.6-tip/kernel/trace/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_WORKQUEUE_TRACER) += trace_
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_EVENT_TRACER) += trace_events.o
obj-$(CONFIG_EVENT_TRACER) += events.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o

libftrace-y := ftrace.o
Index: linux-2.6-tip/kernel/trace/trace.h
===================================================================
--- linux-2.6-tip.orig/kernel/trace/trace.h
+++ linux-2.6-tip/kernel/trace/trace.h
@@ -12,6 +12,8 @@
#include <trace/kmemtrace.h>
#include <trace/power.h>

+#include <asm/hw_breakpoint.h>
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,

@@ -34,6 +36,7 @@ enum trace_type {
TRACE_KMEM_FREE,
TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,

__TRACE_LAST_TYPE,
};
@@ -191,6 +194,17 @@ struct kmemtrace_free_entry {
const void *ptr;
};

+struct trace_ksym {
+ struct trace_entry ent;
+ struct hw_breakpoint *ksym_hbkpt;
+ unsigned long ksym_addr;
+ unsigned long ip;
+ pid_t pid;
+ struct hlist_node ksym_hlist;
+ char ksym_name[KSYM_NAME_LEN];
+ char p_name[TASK_COMM_LEN];
+};
+
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
@@ -302,6 +316,7 @@ extern void __ftrace_bad_type(void);
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
+ IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
__ftrace_bad_type(); \
} while (0)

Index: linux-2.6-tip/kernel/trace/trace_ksym.c
===================================================================
--- /dev/null
+++ linux-2.6-tip/kernel/trace/trace_ksym.c
@@ -0,0 +1,399 @@
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+/* For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HB_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
+
+#define KSYM_DEBUG 1
+
+static struct trace_array *ksym_trace_array;
+
+DEFINE_MUTEX(ksym_tracer_mutex);
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+/* HW Breakpoint related callback functions */
+void ksym_hbkpt_installed(struct hw_breakpoint *temp, struct pt_regs
+ *temp_regs)
+{
+}
+
+void ksym_hbkpt_uninstalled(struct hw_breakpoint *temp, struct
+ pt_regs * temp_regs)
+{
+}
+
+void ksym_hbkpt_handler(struct hw_breakpoint *hbkpt, struct pt_regs *regs)
+{
+ struct ring_buffer_event *event;
+ struct trace_array *tr;
+ struct trace_ksym *entry;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ tr = ksym_trace_array;
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ strlcpy(entry->ksym_name, hbkpt->info.name, KSYM_SYMBOL_LEN);
+ entry->ksym_hbkpt = hbkpt;
+ entry->ip = instruction_pointer(regs);
+ strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
+
+ entry->pid = current->pid;
+ trace_buffer_unlock_commit(tr, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *access_str)
+{
+ int pos, access = 0;
+
+ for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
+ switch (access_str[pos]) {
+ case 'r':
+ access += (pos == 0) ? 4 : -1;
+ break;
+ case 'w':
+ access += (pos == 1) ? 2 : -1;
+ break;
+ case '-':
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (access) {
+ case 6:
+ access = HW_BREAKPOINT_RW;
+ break;
+ case 2:
+ access = HW_BREAKPOINT_WRITE;
+ break;
+ case 0:
+ access = 0;
+ }
+
+ return access;
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ char *delimiter = ":";
+ int ret;
+
+ ret = -EINVAL;
+ *ksymname = strsep(&input_string, delimiter);
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != KSYM_TRACER_OP_LEN + 1) ||
+ (*addr == 0))
+ goto return_code;
+
+ ret = ksym_trace_get_access_type(input_string);
+
+return_code:
+ return ret;
+}
+
+static int process_new_ksym_entry(struct trace_ksym *entry, char *ksymname,
+ int op, unsigned long addr)
+{
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (!entry->ksym_hbkpt)
+ return -ENOMEM;
+
+ entry->ksym_hbkpt->info.name = ksymname;
+ entry->ksym_hbkpt->info.type = op;
+ entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
+ entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
+ entry->ksym_hbkpt->priority = HW_BREAKPOINT_PRIO_NORMAL;
+
+ entry->ksym_hbkpt->installed = (void *)ksym_hbkpt_installed;
+ entry->ksym_hbkpt->uninstalled = (void *)ksym_hbkpt_uninstalled;
+ entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
+
+ if ((register_kernel_hw_breakpoint(entry->ksym_hbkpt)) < 0) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ kfree(entry);
+ return -EAGAIN;
+ }
+ hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
+ printk(KERN_INFO "ksym_tracer changes are now effective\n");
+
+ ksym_filter_entry_count++;
+
+ return 0;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
+ ssize_t ret, cnt = 0;
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
+ entry->ksym_hbkpt->info.name);
+ if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "-w-\n");
+ else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
+ cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
+ "rw-\n");
+ }
+ ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
+ mutex_unlock(&ksym_tracer_mutex);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ input_string = kzalloc(count, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ /* Ignore echo "" > ksym_trace_filter */
+ if (count == 0)
+ return 0;
+
+ if (copy_from_user(input_string, buffer, count))
+ return -EFAULT;
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+
+ if (ret < 0)
+ goto err_ret;
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->ksym_hbkpt->info.type != op)
+ changed = 1;
+ else
+ goto err_ret;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ entry->ksym_hbkpt->info.type = op;
+ if (op > 0) {
+ ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
+ if (ret > 0) {
+ ret = count;
+ goto unlock_ret_path;
+ }
+ if (ret == 0) {
+ ret = -ENOSPC;
+ unregister_kernel_hw_breakpoint(entry->\
+ ksym_hbkpt);
+ }
+ }
+ ksym_filter_entry_count--;
+ hlist_del(&(entry->ksym_hlist));
+ kfree(entry->ksym_hbkpt);
+ kfree(entry);
+ ret = count;
+ goto err_ret;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto err_ret;
+
+ ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);
+ if (ret)
+ goto err_ret;
+ }
+ ret = count;
+ goto unlock_ret_path;
+
+err_ret:
+ kfree(input_string);
+
+unlock_ret_path:
+ mutex_unlock(&ksym_tracer_mutex);
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+ return 0;
+}
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ ksym_tracing_enabled = 0;
+}
+
+#ifdef CONFIG_FTRACE_SELFTEST
+int trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ /* TODO: Will be implemented later */
+ return 0;
+}
+#endif /* CONFIG_FTRACE_SELFTEST */
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol Type "
+ "Function \n");
+ seq_puts(m,
+ "# | | | | "
+ "| \n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct trace_ksym *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
+ field->pid, iter->cpu, field->ksym_name);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->ksym_hbkpt->info.type) {
+ case HW_BREAKPOINT_WRITE:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_RW:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%-20s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+
+}
+device_initcall(init_ksym_trace);


2009-03-05 06:37:27

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> This patch adds an ftrace plugin to detect and profile memory access over
> kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> addresses.
>
> Signed-off-by: K.Prasad <[email protected]>
> ---


Hi,

Nice feature. And moreover the standardized hardware breakpoints could
be helpful for tracing.

Just some comments below.


> kernel/trace/Kconfig | 6
> kernel/trace/Makefile | 1
> kernel/trace/trace.h | 15 +
> kernel/trace/trace_ksym.c | 399 ++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 421 insertions(+)
>
> Index: linux-2.6-tip/kernel/trace/Kconfig
> ===================================================================
> --- linux-2.6-tip.orig/kernel/trace/Kconfig
> +++ linux-2.6-tip/kernel/trace/Kconfig
> @@ -249,6 +249,12 @@ config POWER_TRACER
> power management decisions, specifically the C-state and P-state
> behavior.
>
> +config KSYM_TRACER
> + bool "Trace read and write access on kernel memory locations"
> + select TRACING
> + help
> + This tracer helps find read and write operations on any given kernel
> + symbol i.e. /proc/kallsyms.
>
> config STACK_TRACER
> bool "Trace max stack"
> Index: linux-2.6-tip/kernel/trace/Makefile
> ===================================================================
> --- linux-2.6-tip.orig/kernel/trace/Makefile
> +++ linux-2.6-tip/kernel/trace/Makefile
> @@ -41,5 +41,6 @@ obj-$(CONFIG_WORKQUEUE_TRACER) += trace_
> obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
> obj-$(CONFIG_EVENT_TRACER) += trace_events.o
> obj-$(CONFIG_EVENT_TRACER) += events.o
> +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
>
> libftrace-y := ftrace.o
> Index: linux-2.6-tip/kernel/trace/trace.h
> ===================================================================
> --- linux-2.6-tip.orig/kernel/trace/trace.h
> +++ linux-2.6-tip/kernel/trace/trace.h
> @@ -12,6 +12,8 @@
> #include <trace/kmemtrace.h>
> #include <trace/power.h>
>
> +#include <asm/hw_breakpoint.h>
> +
> enum trace_type {
> __TRACE_FIRST_TYPE = 0,
>
> @@ -34,6 +36,7 @@ enum trace_type {
> TRACE_KMEM_FREE,
> TRACE_POWER,
> TRACE_BLK,
> + TRACE_KSYM,
>
> __TRACE_LAST_TYPE,
> };
> @@ -191,6 +194,17 @@ struct kmemtrace_free_entry {
> const void *ptr;
> };
>
> +struct trace_ksym {
> + struct trace_entry ent;
> + struct hw_breakpoint *ksym_hbkpt;
> + unsigned long ksym_addr;
> + unsigned long ip;
> + pid_t pid;


Just a doubt here.
The current pid is automatically recorded on trace_buffer_lock_reserve()
(or unlock_commit, don't remember), so if this pid is the current one, you
don't need to reserve a room for it, current pid is on struct trace_entry.


> + struct hlist_node ksym_hlist;
> + char ksym_name[KSYM_NAME_LEN];
> + char p_name[TASK_COMM_LEN];
> +};
> +
> /*
> * trace_flag_type is an enumeration that holds different
> * states when a trace occurs. These are:
> @@ -302,6 +316,7 @@ extern void __ftrace_bad_type(void);
> TRACE_KMEM_ALLOC); \
> IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
> TRACE_KMEM_FREE); \
> + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
> __ftrace_bad_type(); \
> } while (0)
>
> Index: linux-2.6-tip/kernel/trace/trace_ksym.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6-tip/kernel/trace/trace_ksym.c
> @@ -0,0 +1,399 @@
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/debugfs.h>
> +#include <linux/ftrace.h>
> +#include <linux/kallsyms.h>
> +#include <linux/uaccess.h>
> +
> +#include "trace.h"
> +#include "trace_output.h"
> +
> +/* For now, let us restrict the no. of symbols traced simultaneously to number
> + * of available hardware breakpoint registers.
> + */
> +#define KSYM_TRACER_MAX HB_NUM
> +
> +#define KSYM_TRACER_OP_LEN 3 /* rw- */
> +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
> +
> +#define KSYM_DEBUG 1
> +
> +static struct trace_array *ksym_trace_array;
> +
> +DEFINE_MUTEX(ksym_tracer_mutex);
> +
> +static unsigned int ksym_filter_entry_count;
> +static unsigned int ksym_tracing_enabled;
> +
> +static HLIST_HEAD(ksym_filter_head);
> +
> +/* HW Breakpoint related callback functions */
> +void ksym_hbkpt_installed(struct hw_breakpoint *temp, struct pt_regs
> + *temp_regs)
> +{
> +}
> +
> +void ksym_hbkpt_uninstalled(struct hw_breakpoint *temp, struct
> + pt_regs * temp_regs)
> +{
> +}
> +
> +void ksym_hbkpt_handler(struct hw_breakpoint *hbkpt, struct pt_regs *regs)
> +{
> + struct ring_buffer_event *event;
> + struct trace_array *tr;
> + struct trace_ksym *entry;
> + int pc;
> +
> + if (!ksym_tracing_enabled)
> + return;
> +
> + tr = ksym_trace_array;
> + pc = preempt_count();
> +
> + event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
> + sizeof(*entry), 0, pc);
> + if (!event)
> + return;
> +
> + entry = ring_buffer_event_data(event);
> + strlcpy(entry->ksym_name, hbkpt->info.name, KSYM_SYMBOL_LEN);
> + entry->ksym_hbkpt = hbkpt;
> + entry->ip = instruction_pointer(regs);
> + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
> +
> + entry->pid = current->pid;


Ah, so yes you don't need this field.



> + trace_buffer_unlock_commit(tr, event, 0, pc);
> +}
> +
> +/* Valid access types are represented as
> + *
> + * rw- : Set Read/Write Access Breakpoint
> + * -w- : Set Write Access Breakpoint
> + * --- : Clear Breakpoints
> + * --x : Set Execution Break points (Not available yet)
> + *
> + */
> +static int ksym_trace_get_access_type(char *access_str)
> +{
> + int pos, access = 0;
> +
> + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
> + switch (access_str[pos]) {
> + case 'r':
> + access += (pos == 0) ? 4 : -1;
> + break;
> + case 'w':
> + access += (pos == 1) ? 2 : -1;
> + break;
> + case '-':
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + switch (access) {
> + case 6:
> + access = HW_BREAKPOINT_RW;
> + break;
> + case 2:
> + access = HW_BREAKPOINT_WRITE;
> + break;
> + case 0:
> + access = 0;
> + }
> +
> + return access;
> +}
> +
> +/*
> + * There can be several possible malformed requests and we attempt to capture
> + * all of them. We enumerate some of the rules
> + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
> + * i.e. multiple ':' symbols disallowed. Possible uses are of the form
> + * <module>:<ksym_name>:<op>.
> + * 2. No delimiter symbol ':' in the input string
> + * 3. Spurious operator symbols or symbols not in their respective positions
> + * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
> + * 5. Kernel symbol not a part of /proc/kallsyms
> + * 6. Duplicate requests
> + */
> +static int parse_ksym_trace_str(char *input_string, char **ksymname,
> + unsigned long *addr)
> +{
> + char *delimiter = ":";
> + int ret;
> +
> + ret = -EINVAL;
> + *ksymname = strsep(&input_string, delimiter);
> + *addr = kallsyms_lookup_name(*ksymname);
> +
> + /* Check for malformed request: (2), (1) and (5) */
> + if ((!input_string) ||
> + (strlen(input_string) != KSYM_TRACER_OP_LEN + 1) ||
> + (*addr == 0))
> + goto return_code;
> +
> + ret = ksym_trace_get_access_type(input_string);
> +
> +return_code:
> + return ret;
> +}
> +
> +static int process_new_ksym_entry(struct trace_ksym *entry, char *ksymname,
> + int op, unsigned long addr)
> +{
> + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> + " new requests for tracing can be accepted now.\n",
> + KSYM_TRACER_MAX);
> + return -ENOSPC;
> + }
> +
> + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);


I'm not sure I understand, you passed an allocated entry to that function, no?
If your are using entry as a local variable, it doesn't make sense to pass it
as a parameter.


> + if (!entry)
> + return -ENOMEM;
>
> + entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> + if (!entry->ksym_hbkpt)
> + return -ENOMEM;


Ouch, what happens here to the memory pointed by entry?


> +
> + entry->ksym_hbkpt->info.name = ksymname;
> + entry->ksym_hbkpt->info.type = op;
> + entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
> + entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
> + entry->ksym_hbkpt->priority = HW_BREAKPOINT_PRIO_NORMAL;
> +
> + entry->ksym_hbkpt->installed = (void *)ksym_hbkpt_installed;
> + entry->ksym_hbkpt->uninstalled = (void *)ksym_hbkpt_uninstalled;
> + entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
> +
> + if ((register_kernel_hw_breakpoint(entry->ksym_hbkpt)) < 0) {
> + printk(KERN_INFO "ksym_tracer request failed. Try again"
> + " later!!\n");
> + kfree(entry);
> + return -EAGAIN;


You forgot to free entry->ksym_hbkpt


> + }
> + hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
> + printk(KERN_INFO "ksym_tracer changes are now effective\n");
> +
> + ksym_filter_entry_count++;
> +
> + return 0;
> +}
> +
> +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> + ssize_t ret, cnt = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> + entry->ksym_hbkpt->info.name);
> + if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "-w-\n");
> + else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "rw-\n");
> + }
> + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> + mutex_unlock(&ksym_tracer_mutex);
> +
> + return ret;
> +}
> +
> +static ssize_t ksym_trace_filter_write(struct file *file,
> + const char __user *buffer,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char *input_string, *ksymname = NULL;
> + unsigned long ksym_addr = 0;
> + int ret, op, changed = 0;
> +
> + input_string = kzalloc(count, GFP_KERNEL);
> + if (!input_string)
> + return -ENOMEM;
> +
> + /* Ignore echo "" > ksym_trace_filter */
> + if (count == 0)
> + return 0;


You forgot to free input_string in !count case.


> +
> + if (copy_from_user(input_string, buffer, count))
> + return -EFAULT;


Ditto.

> + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> +
> + if (ret < 0)
> + goto err_ret;


Ah, here you didn't forget.


> + mutex_lock(&ksym_tracer_mutex);
> +
> + ret = -EINVAL;
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + if (entry->ksym_addr == ksym_addr) {
> + /* Check for malformed request: (6) */
> + if (entry->ksym_hbkpt->info.type != op)
> + changed = 1;
> + else
> + goto err_ret;
> + break;
> + }
> + }
> + if (changed) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + entry->ksym_hbkpt->info.type = op;
> + if (op > 0) {
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + if (ret > 0) {
> + ret = count;
> + goto unlock_ret_path;
> + }
> + if (ret == 0) {
> + ret = -ENOSPC;
> + unregister_kernel_hw_breakpoint(entry->\
> + ksym_hbkpt);
> + }
> + }
> + ksym_filter_entry_count--;
> + hlist_del(&(entry->ksym_hlist));
> + kfree(entry->ksym_hbkpt);
> + kfree(entry);
> + ret = count;
> + goto err_ret;
> + } else {
> + /* Check for malformed request: (4) */
> + if (op == 0)
> + goto err_ret;
> +
> + ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);


You are passing an allocated entry as a parameter, but later on process_new_ksym_entry()
you allocate a new space for entry.
I'm confused.


> + if (ret)
> + goto err_ret;
> + }
> + ret = count;
> + goto unlock_ret_path;
> +
> +err_ret:
> + kfree(input_string);
> +
> +unlock_ret_path:
> + mutex_unlock(&ksym_tracer_mutex);
> + return ret;
> +}
> +
> +static const struct file_operations ksym_tracing_fops = {
> + .open = tracing_open_generic,
> + .read = ksym_trace_filter_read,
> + .write = ksym_trace_filter_write,
> +};
> +
> +static int ksym_trace_init(struct trace_array *tr)
> +{
> + int cpu;
> +
> + for_each_online_cpu(cpu)
> + tracing_reset(tr, cpu);
> + ksym_tracing_enabled = 1;
> + ksym_trace_array = tr;
> +
> + return 0;
> +}
> +
> +static void ksym_trace_reset(struct trace_array *tr)
> +{
> + ksym_tracing_enabled = 0;
> +}
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> +int trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> +{
> + /* TODO: Will be implemented later */
> + return 0;
> +}
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
> +
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + field->pid, iter->cpu, field->ksym_name);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbkpt->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +struct tracer ksym_tracer __read_mostly =
> +{
> + .name = "ksym_tracer",
> + .init = ksym_trace_init,
> + .reset = ksym_trace_reset,
> +#ifdef CONFIG_FTRACE_SELFTEST
> + .selftest = trace_selftest_startup_ksym,
> +#endif
> + .print_header = ksym_trace_print_header,
> + .print_line = ksym_trace_output
> +};
> +
> +__init static int init_ksym_trace(void)
> +{
> + struct dentry *d_tracer;
> + struct dentry *entry;
> +
> + d_tracer = tracing_init_dentry();
> + ksym_filter_entry_count = 0;
> +
> + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> + NULL, &ksym_tracing_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'ksym_trace_filter' file\n");
> +
> + return register_tracer(&ksym_tracer);
> +
> +}
> +device_initcall(init_ksym_trace);


Well, the rest looks good.


> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2009-03-05 09:16:52

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces


* Frederic Weisbecker <[email protected]> wrote:

> On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over
> > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > addresses.
> >
> > Signed-off-by: K.Prasad <[email protected]>
> > ---
>
>
> Hi,
>
> Nice feature. And moreover the standardized hardware
> breakpoints could be helpful for tracing.

yeah. The feature is much more alive now.

> Just some comments below.

One other thing:

+#ifdef CONFIG_FTRACE_SELFTEST
+int trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ /* TODO: Will be implemented later */
+ return 0;
+}
+#endif /* CONFIG_FTRACE_SELFTEST */

This needs to be implemented before i can pick the code up into
tip:tracing, as otherwise we will not notice it fast enough if
some of this stuff breaks.

Basically the ftrace plugin will be the main usage vector of
this facility, so the self-test is a must-have.

Looks very nice otherwise.

Ingo

2009-03-05 11:34:20

by K.Prasad

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

On Thu, Mar 05, 2009 at 07:37:04AM +0100, Frederic Weisbecker wrote:
> On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over
> > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > addresses.
> >
> > Signed-off-by: K.Prasad <[email protected]>
> > ---
>
>
> Hi,
>
> Nice feature. And moreover the standardized hardware breakpoints could
> be helpful for tracing.
>
> Just some comments below.
>
>

Hi,
Thanks for reviewing the code and pointing out the potential memory
leaks. The next iteration of this code should contain fixes for them.
I've explained the usage of 'entry' field inline.

> > +struct trace_ksym {
> > + struct trace_entry ent;
> > + struct hw_breakpoint *ksym_hbkpt;
> > + unsigned long ksym_addr;
> > + unsigned long ip;
> > + pid_t pid;
>
>
> Just a doubt here.
> The current pid is automatically recorded on trace_buffer_lock_reserve()
> (or unlock_commit, don't remember), so if this pid is the current one, you
> don't need to reserve a room for it, current pid is on struct trace_entry.
>

It's a carriage from an old version of the code which used the old
ring-buffer APIs like ring_buffer_lock_reserve(). I will now use the
"pid" field in "struct trace_entry".

> > +static int process_new_ksym_entry(struct trace_ksym *entry, char *ksymname,
> > + int op, unsigned long addr)
> > +{
> > + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > + " new requests for tracing can be accepted now.\n",
> > + KSYM_TRACER_MAX);
> > + return -ENOSPC;
> > + }
> > +
> > + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
>
>
> I'm not sure I understand, you passed an allocated entry to that function, no?
> If your are using entry as a local variable, it doesn't make sense to pass it
> as a parameter.
>
>
> > + if (!entry)
> > + return -ENOMEM;
> >
> > + entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > + if (!entry->ksym_hbkpt)
> > + return -ENOMEM;
>
>
> Ouch, what happens here to the memory pointed by entry?
>
>

A potential leak....will fix this and the others you've pointed below.

> > +
> > + entry->ksym_hbkpt->info.name = ksymname;
> > + entry->ksym_hbkpt->info.type = op;
> > + entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
> > + entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
> > + entry->ksym_hbkpt->priority = HW_BREAKPOINT_PRIO_NORMAL;
> > +
> > + entry->ksym_hbkpt->installed = (void *)ksym_hbkpt_installed;
> > + entry->ksym_hbkpt->uninstalled = (void *)ksym_hbkpt_uninstalled;
> > + entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
> > +
> > + if ((register_kernel_hw_breakpoint(entry->ksym_hbkpt)) < 0) {
> > + printk(KERN_INFO "ksym_tracer request failed. Try again"
> > + " later!!\n");
> > + kfree(entry);
> > + return -EAGAIN;
>
>
> You forgot to free entry->ksym_hbkpt
>
>
> > + }
> > + hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
> > + printk(KERN_INFO "ksym_tracer changes are now effective\n");
> > +
> > + ksym_filter_entry_count++;
> > +
> > + return 0;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > + size_t count, loff_t *ppos)
> > +{
> > + struct trace_ksym *entry;
> > + struct hlist_node *node;
> > + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > + ssize_t ret, cnt = 0;
> > +
> > + mutex_lock(&ksym_tracer_mutex);
> > +
> > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > + entry->ksym_hbkpt->info.name);
> > + if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > + "-w-\n");
> > + else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > + "rw-\n");
> > + }
> > + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > + mutex_unlock(&ksym_tracer_mutex);
> > +
> > + return ret;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_write(struct file *file,
> > + const char __user *buffer,
> > + size_t count, loff_t *ppos)
> > +{
> > + struct trace_ksym *entry;
> > + struct hlist_node *node;
> > + char *input_string, *ksymname = NULL;
> > + unsigned long ksym_addr = 0;
> > + int ret, op, changed = 0;
> > +
> > + input_string = kzalloc(count, GFP_KERNEL);
> > + if (!input_string)
> > + return -ENOMEM;
> > +
> > + /* Ignore echo "" > ksym_trace_filter */
> > + if (count == 0)
> > + return 0;
>
>
> You forgot to free input_string in !count case.
>
>
> > +
> > + if (copy_from_user(input_string, buffer, count))
> > + return -EFAULT;
>
>
> Ditto.
>
> > + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > +
> > + if (ret < 0)
> > + goto err_ret;
>
>
> Ah, here you didn't forget.
>
>
> > + mutex_lock(&ksym_tracer_mutex);
> > +
> > + ret = -EINVAL;
> > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> > + if (entry->ksym_addr == ksym_addr) {
> > + /* Check for malformed request: (6) */
> > + if (entry->ksym_hbkpt->info.type != op)
> > + changed = 1;
> > + else
> > + goto err_ret;
> > + break;
> > + }
> > + }
> > + if (changed) {
> > + unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
> > + entry->ksym_hbkpt->info.type = op;
> > + if (op > 0) {
> > + ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
> > + if (ret > 0) {
> > + ret = count;
> > + goto unlock_ret_path;
> > + }
> > + if (ret == 0) {
> > + ret = -ENOSPC;
> > + unregister_kernel_hw_breakpoint(entry->\
> > + ksym_hbkpt);
> > + }
> > + }
> > + ksym_filter_entry_count--;
> > + hlist_del(&(entry->ksym_hlist));
> > + kfree(entry->ksym_hbkpt);
> > + kfree(entry);
> > + ret = count;
> > + goto err_ret;
> > + } else {
> > + /* Check for malformed request: (4) */
> > + if (op == 0)
> > + goto err_ret;
> > +
> > + ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);
>
>
> You are passing an allocated entry as a parameter, but later on process_new_ksym_entry()
> you allocate a new space for entry.
> I'm confused.
>
>

When changed = 1, entry points to the existing instance of 'struct
trace_ksym' and will be used for changing the type of breakpoint. If the
input is a new request to ksym_trace_filter file process_new_ksym_entry()
takes a pointer to 'struct trace_ksym' i.e entry for
allocation/initialisation rather than use it as a parameter in the true
sense.

This is similar to the usage of parameters 'ksymname and addr' in
parse_ksym_trace_str() where they are used to return multiple values.

I hope you find the usage acceptable.

> > +
> > +__init static int init_ksym_trace(void)
> > +{
> > + struct dentry *d_tracer;
> > + struct dentry *entry;
> > +
> > + d_tracer = tracing_init_dentry();
> > + ksym_filter_entry_count = 0;
> > +
> > + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> > + NULL, &ksym_tracing_fops);
> > + if (!entry)
> > + pr_warning("Could not create debugfs "
> > + "'ksym_trace_filter' file\n");
> > +
> > + return register_tracer(&ksym_tracer);
> > +
> > +}
> > +device_initcall(init_ksym_trace);
>
>
> Well, the rest looks good.
>
>

Thanks again for your comments.

-- K.Prasad

2009-03-05 12:19:48

by K.Prasad

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

On Thu, Mar 05, 2009 at 05:03:59PM +0530, K.Prasad wrote:
> On Thu, Mar 05, 2009 at 07:37:04AM +0100, Frederic Weisbecker wrote:
> > On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > > This patch adds an ftrace plugin to detect and profile memory access over
> > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > addresses.
> > >
> > > Signed-off-by: K.Prasad <[email protected]>
> > > ---
> >
> >
> > > +
> > > + ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);
> >
> >
> > You are passing an allocated entry as a parameter, but later on process_new_ksym_entry()
> > you allocate a new space for entry.
> > I'm confused.
> >
> >
>
> When changed = 1, entry points to the existing instance of 'struct
> trace_ksym' and will be used for changing the type of breakpoint. If the
> input is a new request to ksym_trace_filter file process_new_ksym_entry()
> takes a pointer to 'struct trace_ksym' i.e entry for
> allocation/initialisation rather than use it as a parameter in the true
> sense.
>
> This is similar to the usage of parameters 'ksymname and addr' in
> parse_ksym_trace_str() where they are used to return multiple values.
>
> I hope you find the usage acceptable.
>

aah....but entry isn't used anywhere anywhere in
ksym_trace_filter_write() after process_new_ksym_entry(). I was trying
to explain why I used entry as a parameter to let
process_new_ksym_entry() return multiple values, but it isn't used after
that. I will remove it, and thanks for pointing it.

-- K.Prasad

2009-03-05 12:28:46

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

On Thu, Mar 05, 2009 at 05:03:59PM +0530, K.Prasad wrote:
> On Thu, Mar 05, 2009 at 07:37:04AM +0100, Frederic Weisbecker wrote:
> > On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > > This patch adds an ftrace plugin to detect and profile memory access over
> > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > addresses.
> > >
> > > Signed-off-by: K.Prasad <[email protected]>
> > > ---
> >
> >
> > Hi,
> >
> > Nice feature. And moreover the standardized hardware breakpoints could
> > be helpful for tracing.
> >
> > Just some comments below.
> >
> >
>
> Hi,
> Thanks for reviewing the code and pointing out the potential memory
> leaks. The next iteration of this code should contain fixes for them.
> I've explained the usage of 'entry' field inline.
>
> > > +struct trace_ksym {
> > > + struct trace_entry ent;
> > > + struct hw_breakpoint *ksym_hbkpt;
> > > + unsigned long ksym_addr;
> > > + unsigned long ip;
> > > + pid_t pid;
> >
> >
> > Just a doubt here.
> > The current pid is automatically recorded on trace_buffer_lock_reserve()
> > (or unlock_commit, don't remember), so if this pid is the current one, you
> > don't need to reserve a room for it, current pid is on struct trace_entry.
> >
>
> It's a carriage from an old version of the code which used the old
> ring-buffer APIs like ring_buffer_lock_reserve(). I will now use the
> "pid" field in "struct trace_entry".
>
> > > +static int process_new_ksym_entry(struct trace_ksym *entry, char *ksymname,
> > > + int op, unsigned long addr)
> > > +{
> > > + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > > + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > > + " new requests for tracing can be accepted now.\n",
> > > + KSYM_TRACER_MAX);
> > > + return -ENOSPC;
> > > + }
> > > +
> > > + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> >
> >
> > I'm not sure I understand, you passed an allocated entry to that function, no?
> > If your are using entry as a local variable, it doesn't make sense to pass it
> > as a parameter.
> >
> >
> > > + if (!entry)
> > > + return -ENOMEM;
> > >
> > > + entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > > + if (!entry->ksym_hbkpt)
> > > + return -ENOMEM;
> >
> >
> > Ouch, what happens here to the memory pointed by entry?
> >
> >
>
> A potential leak....will fix this and the others you've pointed below.
>
> > > +
> > > + entry->ksym_hbkpt->info.name = ksymname;
> > > + entry->ksym_hbkpt->info.type = op;
> > > + entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
> > > + entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
> > > + entry->ksym_hbkpt->priority = HW_BREAKPOINT_PRIO_NORMAL;
> > > +
> > > + entry->ksym_hbkpt->installed = (void *)ksym_hbkpt_installed;
> > > + entry->ksym_hbkpt->uninstalled = (void *)ksym_hbkpt_uninstalled;
> > > + entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
> > > +
> > > + if ((register_kernel_hw_breakpoint(entry->ksym_hbkpt)) < 0) {
> > > + printk(KERN_INFO "ksym_tracer request failed. Try again"
> > > + " later!!\n");
> > > + kfree(entry);
> > > + return -EAGAIN;
> >
> >
> > You forgot to free entry->ksym_hbkpt
> >
> >
> > > + }
> > > + hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
> > > + printk(KERN_INFO "ksym_tracer changes are now effective\n");
> > > +
> > > + ksym_filter_entry_count++;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > > + size_t count, loff_t *ppos)
> > > +{
> > > + struct trace_ksym *entry;
> > > + struct hlist_node *node;
> > > + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > > + ssize_t ret, cnt = 0;
> > > +
> > > + mutex_lock(&ksym_tracer_mutex);
> > > +
> > > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> > > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > > + entry->ksym_hbkpt->info.name);
> > > + if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
> > > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > > + "-w-\n");
> > > + else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
> > > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > > + "rw-\n");
> > > + }
> > > + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > > + mutex_unlock(&ksym_tracer_mutex);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +static ssize_t ksym_trace_filter_write(struct file *file,
> > > + const char __user *buffer,
> > > + size_t count, loff_t *ppos)
> > > +{
> > > + struct trace_ksym *entry;
> > > + struct hlist_node *node;
> > > + char *input_string, *ksymname = NULL;
> > > + unsigned long ksym_addr = 0;
> > > + int ret, op, changed = 0;
> > > +
> > > + input_string = kzalloc(count, GFP_KERNEL);
> > > + if (!input_string)
> > > + return -ENOMEM;
> > > +
> > > + /* Ignore echo "" > ksym_trace_filter */
> > > + if (count == 0)
> > > + return 0;
> >
> >
> > You forgot to free input_string in !count case.
> >
> >
> > > +
> > > + if (copy_from_user(input_string, buffer, count))
> > > + return -EFAULT;
> >
> >
> > Ditto.
> >
> > > + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > > +
> > > + if (ret < 0)
> > > + goto err_ret;
> >
> >
> > Ah, here you didn't forget.
> >
> >
> > > + mutex_lock(&ksym_tracer_mutex);
> > > +
> > > + ret = -EINVAL;
> > > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> > > + if (entry->ksym_addr == ksym_addr) {
> > > + /* Check for malformed request: (6) */
> > > + if (entry->ksym_hbkpt->info.type != op)
> > > + changed = 1;
> > > + else
> > > + goto err_ret;
> > > + break;
> > > + }
> > > + }
> > > + if (changed) {
> > > + unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
> > > + entry->ksym_hbkpt->info.type = op;
> > > + if (op > 0) {
> > > + ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
> > > + if (ret > 0) {
> > > + ret = count;
> > > + goto unlock_ret_path;
> > > + }
> > > + if (ret == 0) {
> > > + ret = -ENOSPC;
> > > + unregister_kernel_hw_breakpoint(entry->\
> > > + ksym_hbkpt);
> > > + }
> > > + }
> > > + ksym_filter_entry_count--;
> > > + hlist_del(&(entry->ksym_hlist));
> > > + kfree(entry->ksym_hbkpt);
> > > + kfree(entry);
> > > + ret = count;
> > > + goto err_ret;
> > > + } else {
> > > + /* Check for malformed request: (4) */
> > > + if (op == 0)
> > > + goto err_ret;
> > > +
> > > + ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);
> >
> >
> > You are passing an allocated entry as a parameter, but later on process_new_ksym_entry()
> > you allocate a new space for entry.
> > I'm confused.
> >
> >
>
> When changed = 1, entry points to the existing instance of 'struct
> trace_ksym' and will be used for changing the type of breakpoint. If the
> input is a new request to ksym_trace_filter file process_new_ksym_entry()
> takes a pointer to 'struct trace_ksym' i.e entry for
> allocation/initialisation rather than use it as a parameter in the true
> sense.
>
> This is similar to the usage of parameters 'ksymname and addr' in
> parse_ksym_trace_str() where they are used to return multiple values.
>
> I hope you find the usage acceptable.


Hmm. I understand the case of ksymname and addr in parse_ksym_trace_str()

But I don't understand the case here.
You pass the "entry" pointer to process_new_ksym_entry() but:

- this is only a pointer of type struct trace_ksym * and not
struct trace_ksym **entry
Once it comes to process_new_ksym_entry() it's not anymore
the same variable than the caller passed. You override
it with kzalloc() but this change will not be done on the caller
which will keep the same address stored on its pointer.

- you are not reusing it on the caller after it called
process_nex_ksym_ntry()

But you use it on the callee because you insert it on the list.
So the code is not wrong, it's just that such only internal
pointer is generally expected to be declared inside the function itself:

static int process_new_ksym_entry(char *ksymname,
int op, unsigned long addr)
{
struct trace_ksym *entry

entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);

...
}

Otherwise when such a parameter is passed, the code reader would expect that

1) this is a value that we will use inside this function (not the case, the value
is immediately overriden).
2) this is a secondary return value (not the case, or we would need a pointer to
a pointer).

Well, sorry perhaps I'm a bit annoying with that :-)
It's just for the code readability...I mean code flow for the reader eyes.
But the code action itself is not broken.


Thanks.
Frederic.


> > > +
> > > +__init static int init_ksym_trace(void)
> > > +{
> > > + struct dentry *d_tracer;
> > > + struct dentry *entry;
> > > +
> > > + d_tracer = tracing_init_dentry();
> > > + ksym_filter_entry_count = 0;
> > > +
> > > + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> > > + NULL, &ksym_tracing_fops);
> > > + if (!entry)
> > > + pr_warning("Could not create debugfs "
> > > + "'ksym_trace_filter' file\n");
> > > +
> > > + return register_tracer(&ksym_tracer);
> > > +
> > > +}
> > > +device_initcall(init_ksym_trace);
> >
> >
> > Well, the rest looks good.
> >
> >
>
> Thanks again for your comments.
>
> -- K.Prasad

2009-03-05 12:30:43

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

On Thu, Mar 05, 2009 at 05:49:29PM +0530, K.Prasad wrote:
> On Thu, Mar 05, 2009 at 05:03:59PM +0530, K.Prasad wrote:
> > On Thu, Mar 05, 2009 at 07:37:04AM +0100, Frederic Weisbecker wrote:
> > > On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > > > This patch adds an ftrace plugin to detect and profile memory access over
> > > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > > addresses.
> > > >
> > > > Signed-off-by: K.Prasad <[email protected]>
> > > > ---
> > >
> > >
> > > > +
> > > > + ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);
> > >
> > >
> > > You are passing an allocated entry as a parameter, but later on process_new_ksym_entry()
> > > you allocate a new space for entry.
> > > I'm confused.
> > >
> > >
> >
> > When changed = 1, entry points to the existing instance of 'struct
> > trace_ksym' and will be used for changing the type of breakpoint. If the
> > input is a new request to ksym_trace_filter file process_new_ksym_entry()
> > takes a pointer to 'struct trace_ksym' i.e entry for
> > allocation/initialisation rather than use it as a parameter in the true
> > sense.
> >
> > This is similar to the usage of parameters 'ksymname and addr' in
> > parse_ksym_trace_str() where they are used to return multiple values.
> >
> > I hope you find the usage acceptable.
> >
>
> aah....but entry isn't used anywhere anywhere in
> ksym_trace_filter_write() after process_new_ksym_entry(). I was trying
> to explain why I used entry as a parameter to let
> process_new_ksym_entry() return multiple values, but it isn't used after
> that. I will remove it, and thanks for pointing it.
>
> -- K.Prasad
>

Ah, I thought I misunderstood something :-)

Thanks.

2009-03-05 13:15:54

by K.Prasad

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces

On Thu, Mar 05, 2009 at 10:16:11AM +0100, Ingo Molnar wrote:
>
> * Frederic Weisbecker <[email protected]> wrote:
>
> > On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > > This patch adds an ftrace plugin to detect and profile memory access over
> > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > addresses.
> > >
> > > Signed-off-by: K.Prasad <[email protected]>
> > > ---
> >
> >
> > Hi,
> >
> > Nice feature. And moreover the standardized hardware
> > breakpoints could be helpful for tracing.
>
> yeah. The feature is much more alive now.
>
> > Just some comments below.
>
> One other thing:
>
> +#ifdef CONFIG_FTRACE_SELFTEST
> +int trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> +{
> + /* TODO: Will be implemented later */
> + return 0;
> +}
> +#endif /* CONFIG_FTRACE_SELFTEST */
>
> This needs to be implemented before i can pick the code up into
> tip:tracing, as otherwise we will not notice it fast enough if
> some of this stuff breaks.
>
> Basically the ftrace plugin will be the main usage vector of
> this facility, so the self-test is a must-have.
>
> Looks very nice otherwise.
>
> Ingo

Thanks for the comments.

Test-cases for the hardware breakpoint interfaces can be the following:

- Basic sanity test to check if the API is intact
- Perform various types of memory accesses, like read, write (I/O and
others when implemented) on a dummy kernel variable and verify the
trigger of the exception handler.

While the above can be a part of trace_selftest_startup_ksym(),
rigorous testing would involve:

i) stressing the HW breakpoint infrastructure to confirm sane behaviour
when interoperated with other users of a)breakpoint register b)the
do_debug() exception. This will involve simultaneous use of kprobes,
hardware breakpoint interface and requests from user-space (say through
GDB).
ii) Verifying successful HB_NUM number of register_ requests.
iii) Verifying right priority resolution, and handling user-space
requests.

These, in my opinion, would better fit in a full-featured test-suite
such as LTP, as opposed to startup testing in ftrace.

I will implement trace_selftest_startup_ksym() to contain the first two
test-cases in the next iteration of this code.

Thanks,
K.Prasad


Thanks,
K.Prasad

2009-03-05 13:28:47

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces


* K.Prasad <[email protected]> wrote:

> On Thu, Mar 05, 2009 at 10:16:11AM +0100, Ingo Molnar wrote:
> >
> > * Frederic Weisbecker <[email protected]> wrote:
> >
> > > On Thu, Mar 05, 2009 at 10:13:33AM +0530, [email protected] wrote:
> > > > This patch adds an ftrace plugin to detect and profile memory access over
> > > > kernel variables. It uses HW Breakpoint interfaces to 'watch memory
> > > > addresses.
> > > >
> > > > Signed-off-by: K.Prasad <[email protected]>
> > > > ---
> > >
> > >
> > > Hi,
> > >
> > > Nice feature. And moreover the standardized hardware
> > > breakpoints could be helpful for tracing.
> >
> > yeah. The feature is much more alive now.
> >
> > > Just some comments below.
> >
> > One other thing:
> >
> > +#ifdef CONFIG_FTRACE_SELFTEST
> > +int trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> > +{
> > + /* TODO: Will be implemented later */
> > + return 0;
> > +}
> > +#endif /* CONFIG_FTRACE_SELFTEST */
> >
> > This needs to be implemented before i can pick the code up into
> > tip:tracing, as otherwise we will not notice it fast enough if
> > some of this stuff breaks.
> >
> > Basically the ftrace plugin will be the main usage vector of
> > this facility, so the self-test is a must-have.
> >
> > Looks very nice otherwise.
> >
> > Ingo
>
> Thanks for the comments.
>
> Test-cases for the hardware breakpoint interfaces can be the following:
>
> - Basic sanity test to check if the API is intact
> - Perform various types of memory accesses, like read, write (I/O and
> others when implemented) on a dummy kernel variable and verify the
> trigger of the exception handler.
>
> While the above can be a part of trace_selftest_startup_ksym(),
> rigorous testing would involve:
>
> i) stressing the HW breakpoint infrastructure to confirm sane behaviour
> when interoperated with other users of a)breakpoint register b)the
> do_debug() exception. This will involve simultaneous use of kprobes,
> hardware breakpoint interface and requests from user-space (say through
> GDB).
> ii) Verifying successful HB_NUM number of register_ requests.
> iii) Verifying right priority resolution, and handling user-space
> requests.
>
> These, in my opinion, would better fit in a full-featured
> test-suite such as LTP, as opposed to startup testing in
> ftrace.

sure. It's just a quick self-test to make sure basic
functionality is ok.

> I will implement trace_selftest_startup_ksym() to contain the
> first two test-cases in the next iteration of this code.

Thanks.

Ingo

2009-03-05 14:54:25

by Steven Rostedt

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces


On Thu, 5 Mar 2009, [email protected] wrote:
> };
> @@ -191,6 +194,17 @@ struct kmemtrace_free_entry {
> const void *ptr;
> };
>
> +struct trace_ksym {
> + struct trace_entry ent;
> + struct hw_breakpoint *ksym_hbkpt;
> + unsigned long ksym_addr;
> + unsigned long ip;
> + pid_t pid;

pid not needed, see below.

> + struct hlist_node ksym_hlist;
> + char ksym_name[KSYM_NAME_LEN];
> + char p_name[TASK_COMM_LEN];
> +};
> +
> /*
> * trace_flag_type is an enumeration that holds different
> * states when a trace occurs. These are:
> @@ -302,6 +316,7 @@ extern void __ftrace_bad_type(void);
> TRACE_KMEM_ALLOC); \
> IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
> TRACE_KMEM_FREE); \
> + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \
> __ftrace_bad_type(); \
> } while (0)
>
> Index: linux-2.6-tip/kernel/trace/trace_ksym.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6-tip/kernel/trace/trace_ksym.c
> @@ -0,0 +1,399 @@
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/debugfs.h>
> +#include <linux/ftrace.h>
> +#include <linux/kallsyms.h>
> +#include <linux/uaccess.h>
> +
> +#include "trace.h"
> +#include "trace_output.h"
> +
> +/* For now, let us restrict the no. of symbols traced simultaneously to number
> + * of available hardware breakpoint registers.
> + */
> +#define KSYM_TRACER_MAX HB_NUM
> +
> +#define KSYM_TRACER_OP_LEN 3 /* rw- */
> +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1)
> +
> +#define KSYM_DEBUG 1
> +
> +static struct trace_array *ksym_trace_array;
> +
> +DEFINE_MUTEX(ksym_tracer_mutex);
> +
> +static unsigned int ksym_filter_entry_count;
> +static unsigned int ksym_tracing_enabled;
> +
> +static HLIST_HEAD(ksym_filter_head);
> +
> +/* HW Breakpoint related callback functions */
> +void ksym_hbkpt_installed(struct hw_breakpoint *temp, struct pt_regs
> + *temp_regs)
> +{
> +}
> +
> +void ksym_hbkpt_uninstalled(struct hw_breakpoint *temp, struct
> + pt_regs * temp_regs)
> +{
> +}
> +
> +void ksym_hbkpt_handler(struct hw_breakpoint *hbkpt, struct pt_regs *regs)
> +{
> + struct ring_buffer_event *event;
> + struct trace_array *tr;
> + struct trace_ksym *entry;
> + int pc;
> +
> + if (!ksym_tracing_enabled)
> + return;
> +
> + tr = ksym_trace_array;
> + pc = preempt_count();
> +
> + event = trace_buffer_lock_reserve(tr, TRACE_KSYM,
> + sizeof(*entry), 0, pc);
> + if (!event)
> + return;
> +
> + entry = ring_buffer_event_data(event);
> + strlcpy(entry->ksym_name, hbkpt->info.name, KSYM_SYMBOL_LEN);
> + entry->ksym_hbkpt = hbkpt;
> + entry->ip = instruction_pointer(regs);
> + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN);
> +
> + entry->pid = current->pid;

You just duplicated the pid. In trace_buffer_lock_reserve we record
the pid in entry->ent.pid.


> + trace_buffer_unlock_commit(tr, event, 0, pc);
> +}
> +
> +/* Valid access types are represented as
> + *
> + * rw- : Set Read/Write Access Breakpoint
> + * -w- : Set Write Access Breakpoint
> + * --- : Clear Breakpoints
> + * --x : Set Execution Break points (Not available yet)
> + *
> + */
> +static int ksym_trace_get_access_type(char *access_str)
> +{
> + int pos, access = 0;
> +
> + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) {
> + switch (access_str[pos]) {
> + case 'r':
> + access += (pos == 0) ? 4 : -1;
> + break;
> + case 'w':
> + access += (pos == 1) ? 2 : -1;
> + break;
> + case '-':
> + break;
> + default:
> + return -EINVAL;
> + }
> + }
> +
> + switch (access) {
> + case 6:
> + access = HW_BREAKPOINT_RW;
> + break;
> + case 2:
> + access = HW_BREAKPOINT_WRITE;
> + break;
> + case 0:
> + access = 0;
> + }
> +
> + return access;
> +}
> +
> +/*
> + * There can be several possible malformed requests and we attempt to capture
> + * all of them. We enumerate some of the rules
> + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
> + * i.e. multiple ':' symbols disallowed. Possible uses are of the form
> + * <module>:<ksym_name>:<op>.
> + * 2. No delimiter symbol ':' in the input string
> + * 3. Spurious operator symbols or symbols not in their respective positions
> + * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
> + * 5. Kernel symbol not a part of /proc/kallsyms
> + * 6. Duplicate requests
> + */
> +static int parse_ksym_trace_str(char *input_string, char **ksymname,
> + unsigned long *addr)
> +{
> + char *delimiter = ":";
> + int ret;
> +
> + ret = -EINVAL;
> + *ksymname = strsep(&input_string, delimiter);
> + *addr = kallsyms_lookup_name(*ksymname);
> +
> + /* Check for malformed request: (2), (1) and (5) */
> + if ((!input_string) ||
> + (strlen(input_string) != KSYM_TRACER_OP_LEN + 1) ||
> + (*addr == 0))
> + goto return_code;
> +
> + ret = ksym_trace_get_access_type(input_string);
> +
> +return_code:
> + return ret;
> +}
> +
> +static int process_new_ksym_entry(struct trace_ksym *entry, char *ksymname,
> + int op, unsigned long addr)
> +{
> + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> + " new requests for tracing can be accepted now.\n",
> + KSYM_TRACER_MAX);
> + return -ENOSPC;
> + }
> +
> + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> + if (!entry)
> + return -ENOMEM;
> +
> + entry->ksym_hbkpt = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> + if (!entry->ksym_hbkpt)
> + return -ENOMEM;
> +
> + entry->ksym_hbkpt->info.name = ksymname;
> + entry->ksym_hbkpt->info.type = op;
> + entry->ksym_addr = entry->ksym_hbkpt->info.address = addr;
> + entry->ksym_hbkpt->info.len = HW_BREAKPOINT_LEN_4;
> + entry->ksym_hbkpt->priority = HW_BREAKPOINT_PRIO_NORMAL;
> +
> + entry->ksym_hbkpt->installed = (void *)ksym_hbkpt_installed;
> + entry->ksym_hbkpt->uninstalled = (void *)ksym_hbkpt_uninstalled;
> + entry->ksym_hbkpt->triggered = (void *)ksym_hbkpt_handler;
> +
> + if ((register_kernel_hw_breakpoint(entry->ksym_hbkpt)) < 0) {
> + printk(KERN_INFO "ksym_tracer request failed. Try again"
> + " later!!\n");
> + kfree(entry);
> + return -EAGAIN;
> + }
> + hlist_add_head(&(entry->ksym_hlist), &ksym_filter_head);
> + printk(KERN_INFO "ksym_tracer changes are now effective\n");
> +
> + ksym_filter_entry_count++;
> +
> + return 0;
> +}
> +
> +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> + ssize_t ret, cnt = 0;
> +
> + mutex_lock(&ksym_tracer_mutex);
> +
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> + entry->ksym_hbkpt->info.name);
> + if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_WRITE)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "-w-\n");
> + else if (entry->ksym_hbkpt->info.type == HW_BREAKPOINT_RW)
> + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> + "rw-\n");
> + }
> + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> + mutex_unlock(&ksym_tracer_mutex);
> +
> + return ret;
> +}
> +
> +static ssize_t ksym_trace_filter_write(struct file *file,
> + const char __user *buffer,
> + size_t count, loff_t *ppos)
> +{
> + struct trace_ksym *entry;
> + struct hlist_node *node;
> + char *input_string, *ksymname = NULL;
> + unsigned long ksym_addr = 0;
> + int ret, op, changed = 0;
> +
> + input_string = kzalloc(count, GFP_KERNEL);
> + if (!input_string)
> + return -ENOMEM;
> +
> + /* Ignore echo "" > ksym_trace_filter */
> + if (count == 0)
> + return 0;
> +
> + if (copy_from_user(input_string, buffer, count))
> + return -EFAULT;
> +
> + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> +
> + if (ret < 0)
> + goto err_ret;
> + mutex_lock(&ksym_tracer_mutex);
> +
> + ret = -EINVAL;
> + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> + if (entry->ksym_addr == ksym_addr) {
> + /* Check for malformed request: (6) */
> + if (entry->ksym_hbkpt->info.type != op)
> + changed = 1;
> + else
> + goto err_ret;
> + break;
> + }
> + }
> + if (changed) {
> + unregister_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + entry->ksym_hbkpt->info.type = op;
> + if (op > 0) {
> + ret = register_kernel_hw_breakpoint(entry->ksym_hbkpt);
> + if (ret > 0) {
> + ret = count;
> + goto unlock_ret_path;
> + }
> + if (ret == 0) {
> + ret = -ENOSPC;
> + unregister_kernel_hw_breakpoint(entry->\
> + ksym_hbkpt);
> + }
> + }
> + ksym_filter_entry_count--;
> + hlist_del(&(entry->ksym_hlist));
> + kfree(entry->ksym_hbkpt);
> + kfree(entry);
> + ret = count;
> + goto err_ret;
> + } else {
> + /* Check for malformed request: (4) */
> + if (op == 0)
> + goto err_ret;
> +
> + ret = process_new_ksym_entry(entry, ksymname, op, ksym_addr);
> + if (ret)
> + goto err_ret;
> + }
> + ret = count;
> + goto unlock_ret_path;
> +
> +err_ret:
> + kfree(input_string);
> +
> +unlock_ret_path:
> + mutex_unlock(&ksym_tracer_mutex);
> + return ret;
> +}
> +
> +static const struct file_operations ksym_tracing_fops = {
> + .open = tracing_open_generic,
> + .read = ksym_trace_filter_read,
> + .write = ksym_trace_filter_write,
> +};
> +
> +static int ksym_trace_init(struct trace_array *tr)
> +{
> + int cpu;
> +
> + for_each_online_cpu(cpu)
> + tracing_reset(tr, cpu);
> + ksym_tracing_enabled = 1;
> + ksym_trace_array = tr;
> +
> + return 0;
> +}
> +
> +static void ksym_trace_reset(struct trace_array *tr)
> +{
> + ksym_tracing_enabled = 0;
> +}
> +
> +#ifdef CONFIG_FTRACE_SELFTEST
> +int trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
> +{
> + /* TODO: Will be implemented later */
> + return 0;
> +}
> +#endif /* CONFIG_FTRACE_SELFTEST */
> +
> +static void ksym_trace_print_header(struct seq_file *m)
> +{
> +
> + seq_puts(m,
> + "# TASK-PID CPU# Symbol Type "
> + "Function \n");
> + seq_puts(m,
> + "# | | | | "
> + "| \n");
> +}
> +
> +static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct trace_ksym *field;
> + char str[KSYM_SYMBOL_LEN];
> + int ret;
> +
> + trace_assign_type(field, entry);
> +
> + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name,
> + field->pid, iter->cpu, field->ksym_name);

s/field->pid/entry->pid/

-- Steve

> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + switch (field->ksym_hbkpt->info.type) {
> + case HW_BREAKPOINT_WRITE:
> + ret = trace_seq_printf(s, " W ");
> + break;
> + case HW_BREAKPOINT_RW:
> + ret = trace_seq_printf(s, " RW ");
> + break;
> + default:
> + return TRACE_TYPE_PARTIAL_LINE;
> + }
> +
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + sprint_symbol(str, field->ip);
> + ret = trace_seq_printf(s, "%-20s\n", str);
> + if (!ret)
> + return TRACE_TYPE_PARTIAL_LINE;
> +
> + return TRACE_TYPE_HANDLED;
> +}
> +
> +struct tracer ksym_tracer __read_mostly =
> +{
> + .name = "ksym_tracer",
> + .init = ksym_trace_init,
> + .reset = ksym_trace_reset,
> +#ifdef CONFIG_FTRACE_SELFTEST
> + .selftest = trace_selftest_startup_ksym,
> +#endif
> + .print_header = ksym_trace_print_header,
> + .print_line = ksym_trace_output
> +};
> +
> +__init static int init_ksym_trace(void)
> +{
> + struct dentry *d_tracer;
> + struct dentry *entry;
> +
> + d_tracer = tracing_init_dentry();
> + ksym_filter_entry_count = 0;
> +
> + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> + NULL, &ksym_tracing_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'ksym_trace_filter' file\n");
> +
> + return register_tracer(&ksym_tracer);
> +
> +}
> +device_initcall(init_ksym_trace);
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2009-03-05 15:00:23

by Steven Rostedt

[permalink] [raw]
Subject: Re: [patch 11/11] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces


On Thu, 5 Mar 2009, Frederic Weisbecker wrote:
> >
> > +struct trace_ksym {
> > + struct trace_entry ent;
> > + struct hw_breakpoint *ksym_hbkpt;
> > + unsigned long ksym_addr;
> > + unsigned long ip;
> > + pid_t pid;
>
>
> Just a doubt here.
> The current pid is automatically recorded on trace_buffer_lock_reserve()
> (or unlock_commit, don't remember), so if this pid is the current one, you
> don't need to reserve a room for it, current pid is on struct trace_entry.

Heh, I guess I should have read the rest of the thread before replying.
Frederic beat me to it ;-)

-- Steve