2009-03-22 08:32:01

by Tom Zanussi

[permalink] [raw]
Subject: [PATCH 4/4] tracing: add per-subsystem filtering

This patch adds per-subsystem filtering to the event tracing subsystem.

It adds a 'filter' debugfs file to each subsystem directory. This file
can be written to to set filters; reading from it will display the
current set of filters set for that subsystem.

Basically what it does is propagate the filter down to each event
contained in the subsystem. If a particular event doesn't have a field
with the name specified in the filter, it simply doesn't get set for
that event. You can verify whether or not the filter was set for a
particular event by looking at the filter file for that event.

As with per-event filters, compound expressions are supported, echoing
'0' to the subsystem's filter file clears all filters in the subsystem,
etc.

Signed-off-by: Tom Zanussi <[email protected]>

---
kernel/trace/trace.h | 15 ++++++
kernel/trace/trace_events.c | 86 +++++++++++++++++++++++++++++++++---
kernel/trace/trace_events_filter.c | 80 +++++++++++++++++++++++++++++++++
3 files changed, 175 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d9eb39e..f267723 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -804,6 +804,18 @@ struct ftrace_event_call {
#endif
};

+struct event_subsystem {
+ struct list_head list;
+ const char *name;
+ struct dentry *entry;
+ struct filter_pred **preds;
+};
+
+#define events_for_each(event) \
+ for (event = __start_ftrace_events; \
+ (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+ event++)
+
#define MAX_FILTER_PRED 8

struct filter_pred;
@@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred);
extern void filter_free_preds(struct ftrace_event_call *call);
extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern void filter_free_subsystem_preds(struct event_subsystem *system);
+extern int filter_add_subsystem_pred(struct event_subsystem *system,
+ struct filter_pred *pred);

void event_trace_printk(unsigned long ip, const char *fmt, ...);
extern struct ftrace_event_call __start_ftrace_events[];
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 97470c4..97d4daa 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}

+static ssize_t
+subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct event_subsystem *system = filp->private_data;
+ struct trace_seq *s;
+ int r;
+
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ r = filter_print_preds(system->preds, s->buffer);
+ r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r);
+
+ kfree(s);
+
+ return r;
+}
+
+static ssize_t
+subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct event_subsystem *system = filp->private_data;
+ char buf[64], *pbuf = buf;
+ struct filter_pred *pred;
+ int err;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+ if (!pred)
+ return -ENOMEM;
+
+ err = filter_parse(&pbuf, pred);
+ if (err < 0) {
+ filter_free_pred(pred);
+ return err;
+ }
+
+ if (pred->clear) {
+ filter_free_subsystem_preds(system);
+ return cnt;
+ }
+
+ if (filter_add_subsystem_pred(system, pred)) {
+ filter_free_pred(pred);
+ return -EINVAL;
+ }
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
static const struct seq_operations show_event_seq_ops = {
.start = t_start,
.next = t_next,
@@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = {
.write = event_filter_write,
};

+static const struct file_operations ftrace_subsystem_filter_fops = {
+ .open = tracing_open_generic,
+ .read = subsystem_filter_read,
+ .write = subsystem_filter_write,
+};
+
static struct dentry *event_trace_events_dir(void)
{
static struct dentry *d_tracer;
@@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void)
return d_events;
}

-struct event_subsystem {
- struct list_head list;
- const char *name;
- struct dentry *entry;
-};
-
static LIST_HEAD(event_subsystems);

static struct dentry *
event_subsystem_dir(const char *name, struct dentry *d_events)
{
struct event_subsystem *system;
+ struct dentry *entry;

/* First see if we did not already create this dir */
list_for_each_entry(system, &event_subsystems, list) {
@@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
system->name = name;
list_add(&system->list, &event_subsystems);

+ system->preds = NULL;
+
+ entry = debugfs_create_file("filter", 0444, system->entry, system,
+ &ftrace_subsystem_filter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'%s/filter' entry\n", name);
+
return system->entry;
}

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8e8c5fa..1ab20ce 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call)
}
}

+void filter_free_subsystem_preds(struct event_subsystem *system)
+{
+ struct ftrace_event_call *call = __start_ftrace_events;
+ int i;
+
+ if (system->preds) {
+ for (i = 0; i < MAX_FILTER_PRED; i++)
+ filter_free_pred(system->preds[i]);
+ kfree(system->preds);
+ system->preds = NULL;
+ }
+
+ events_for_each(call) {
+ if (!call->name || !call->regfunc)
+ continue;
+
+ if (!strcmp(call->system, system->name))
+ filter_free_preds(call);
+ }
+}
+
static int __filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred)
{
@@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
return __filter_add_pred(call, pred);
}

+static struct filter_pred *copy_pred(struct filter_pred *pred)
+{
+ struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
+ if (!new_pred)
+ return NULL;
+
+ memcpy(new_pred, pred, sizeof(*pred));
+ if (pred->str_val) {
+ new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
+ new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
+ if (!new_pred->str_val) {
+ kfree(new_pred);
+ return NULL;
+ }
+ }
+
+ return new_pred;
+}
+
+int filter_add_subsystem_pred(struct event_subsystem *system,
+ struct filter_pred *pred)
+{
+ struct ftrace_event_call *call = __start_ftrace_events;
+ struct filter_pred *event_pred;
+ int i;
+
+ if (system->preds && !pred->compound)
+ filter_free_subsystem_preds(system);
+
+ if (!system->preds) {
+ system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+ GFP_KERNEL);
+ if (!system->preds)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < MAX_FILTER_PRED; i++) {
+ if (!system->preds[i]) {
+ system->preds[i] = pred;
+ break;
+ }
+ if (i == MAX_FILTER_PRED - 1)
+ return -EINVAL;
+ }
+
+ events_for_each(call) {
+ if (!call->name || !call->regfunc)
+ continue;
+
+ if (!strcmp(call->system, system->name)) {
+ event_pred = copy_pred(pred);
+ if (event_pred)
+ filter_add_pred(call, event_pred);
+ }
+ }
+
+ return 0;
+}
+
int filter_parse(char **pbuf, struct filter_pred *pred)
{
char *tmp, *tok, *val_str = NULL;
--
1.5.6.3



2009-03-22 19:02:20

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering

On Sun, Mar 22, 2009 at 03:31:17AM -0500, Tom Zanussi wrote:
> This patch adds per-subsystem filtering to the event tracing subsystem.
>
> It adds a 'filter' debugfs file to each subsystem directory. This file
> can be written to to set filters; reading from it will display the
> current set of filters set for that subsystem.
>
> Basically what it does is propagate the filter down to each event
> contained in the subsystem. If a particular event doesn't have a field
> with the name specified in the filter, it simply doesn't get set for
> that event. You can verify whether or not the filter was set for a
> particular event by looking at the filter file for that event.
>
> As with per-event filters, compound expressions are supported, echoing
> '0' to the subsystem's filter file clears all filters in the subsystem,
> etc.
>
> Signed-off-by: Tom Zanussi <[email protected]>
>
> ---
> kernel/trace/trace.h | 15 ++++++
> kernel/trace/trace_events.c | 86 +++++++++++++++++++++++++++++++++---
> kernel/trace/trace_events_filter.c | 80 +++++++++++++++++++++++++++++++++
> 3 files changed, 175 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index d9eb39e..f267723 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -804,6 +804,18 @@ struct ftrace_event_call {
> #endif
> };
>
> +struct event_subsystem {
> + struct list_head list;
> + const char *name;
> + struct dentry *entry;
> + struct filter_pred **preds;
> +};
> +
> +#define events_for_each(event) \
> + for (event = __start_ftrace_events; \
> + (unsigned long)event < (unsigned long)__stop_ftrace_events; \
> + event++)
> +
> #define MAX_FILTER_PRED 8
>
> struct filter_pred;
> @@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call,
> struct filter_pred *pred);
> extern void filter_free_preds(struct ftrace_event_call *call);
> extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
> +extern void filter_free_subsystem_preds(struct event_subsystem *system);
> +extern int filter_add_subsystem_pred(struct event_subsystem *system,
> + struct filter_pred *pred);
>
> void event_trace_printk(unsigned long ip, const char *fmt, ...);
> extern struct ftrace_event_call __start_ftrace_events[];
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 97470c4..97d4daa 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> return cnt;
> }
>
> +static ssize_t
> +subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
> + loff_t *ppos)
> +{
> + struct event_subsystem *system = filp->private_data;
> + struct trace_seq *s;
> + int r;
> +
> + if (*ppos)
> + return 0;
> +
> + s = kmalloc(sizeof(*s), GFP_KERNEL);
> + if (!s)
> + return -ENOMEM;
> +
> + trace_seq_init(s);
> +
> + r = filter_print_preds(system->preds, s->buffer);
> + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r);
> +
> + kfree(s);
> +
> + return r;
> +}
> +
> +static ssize_t
> +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> + loff_t *ppos)
> +{
> + struct event_subsystem *system = filp->private_data;
> + char buf[64], *pbuf = buf;
> + struct filter_pred *pred;
> + int err;
> +
> + if (cnt >= sizeof(buf))
> + return -EINVAL;
> +
> + if (copy_from_user(&buf, ubuf, cnt))
> + return -EFAULT;
> +
> + pred = kzalloc(sizeof(*pred), GFP_KERNEL);
> + if (!pred)
> + return -ENOMEM;
> +
> + err = filter_parse(&pbuf, pred);
> + if (err < 0) {
> + filter_free_pred(pred);
> + return err;
> + }
> +
> + if (pred->clear) {
> + filter_free_subsystem_preds(system);
> + return cnt;
> + }
> +
> + if (filter_add_subsystem_pred(system, pred)) {
> + filter_free_pred(pred);
> + return -EINVAL;
> + }
> +
> + *ppos += cnt;
> +
> + return cnt;
> +}
> +
> static const struct seq_operations show_event_seq_ops = {
> .start = t_start,
> .next = t_next,
> @@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = {
> .write = event_filter_write,
> };
>
> +static const struct file_operations ftrace_subsystem_filter_fops = {
> + .open = tracing_open_generic,
> + .read = subsystem_filter_read,
> + .write = subsystem_filter_write,
> +};
> +
> static struct dentry *event_trace_events_dir(void)
> {
> static struct dentry *d_tracer;
> @@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void)
> return d_events;
> }
>
> -struct event_subsystem {
> - struct list_head list;
> - const char *name;
> - struct dentry *entry;
> -};
> -
> static LIST_HEAD(event_subsystems);
>
> static struct dentry *
> event_subsystem_dir(const char *name, struct dentry *d_events)
> {
> struct event_subsystem *system;
> + struct dentry *entry;
>
> /* First see if we did not already create this dir */
> list_for_each_entry(system, &event_subsystems, list) {
> @@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
> system->name = name;
> list_add(&system->list, &event_subsystems);
>
> + system->preds = NULL;
> +
> + entry = debugfs_create_file("filter", 0444, system->entry, system,


Should be 0644.


> + &ftrace_subsystem_filter_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'%s/filter' entry\n", name);
> +
> return system->entry;
> }
>
> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
> index 8e8c5fa..1ab20ce 100644
> --- a/kernel/trace/trace_events_filter.c
> +++ b/kernel/trace/trace_events_filter.c
> @@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call)
> }
> }
>
> +void filter_free_subsystem_preds(struct event_subsystem *system)
> +{
> + struct ftrace_event_call *call = __start_ftrace_events;
> + int i;
> +
> + if (system->preds) {
> + for (i = 0; i < MAX_FILTER_PRED; i++)
> + filter_free_pred(system->preds[i]);
> + kfree(system->preds);
> + system->preds = NULL;
> + }
> +
> + events_for_each(call) {
> + if (!call->name || !call->regfunc)
> + continue;
> +
> + if (!strcmp(call->system, system->name))
> + filter_free_preds(call);
> + }
> +}
> +
> static int __filter_add_pred(struct ftrace_event_call *call,
> struct filter_pred *pred)
> {
> @@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
> return __filter_add_pred(call, pred);
> }
>
> +static struct filter_pred *copy_pred(struct filter_pred *pred)
> +{
> + struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
> + if (!new_pred)
> + return NULL;
> +
> + memcpy(new_pred, pred, sizeof(*pred));
> + if (pred->str_val) {
> + new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
> + new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
> + if (!new_pred->str_val) {
> + kfree(new_pred);
> + return NULL;
> + }
> + }
> +
> + return new_pred;
> +}
> +
> +int filter_add_subsystem_pred(struct event_subsystem *system,
> + struct filter_pred *pred)
> +{
> + struct ftrace_event_call *call = __start_ftrace_events;
> + struct filter_pred *event_pred;
> + int i;
> +
> + if (system->preds && !pred->compound)
> + filter_free_subsystem_preds(system);
> +
> + if (!system->preds) {
> + system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
> + GFP_KERNEL);
> + if (!system->preds)
> + return -ENOMEM;
> + }
> +
> + for (i = 0; i < MAX_FILTER_PRED; i++) {
> + if (!system->preds[i]) {
> + system->preds[i] = pred;
> + break;
> + }
> + if (i == MAX_FILTER_PRED - 1)
> + return -EINVAL;


Shouldn't it be i == MAX_FILTER_PRED ?


> + }
> +
> + events_for_each(call) {
> + if (!call->name || !call->regfunc)
> + continue;
> +
> + if (!strcmp(call->system, system->name)) {
> + event_pred = copy_pred(pred);
> + if (event_pred)
> + filter_add_pred(call, event_pred);
> + }
> + }
> +
> + return 0;
> +}
> +
> int filter_parse(char **pbuf, struct filter_pred *pred)
> {
> char *tmp, *tok, *val_str = NULL;
> --
> 1.5.6.3
>
>
>


Looks good too.

Acked-by: Frederic Weisbecker <[email protected]>

2009-03-22 19:42:58

by Tom Zanussi

[permalink] [raw]
Subject: [tip:tracing/filters] tracing: add per-subsystem filtering

Commit-ID: cfb180f3e71b2a280a254c8646a9ab1beab63f84
Gitweb: http://git.kernel.org/tip/cfb180f3e71b2a280a254c8646a9ab1beab63f84
Author: Tom Zanussi <[email protected]>
AuthorDate: Sun, 22 Mar 2009 03:31:17 -0500
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 22 Mar 2009 18:38:47 +0100

tracing: add per-subsystem filtering

This patch adds per-subsystem filtering to the event tracing subsystem.

It adds a 'filter' debugfs file to each subsystem directory. This file
can be written to to set filters; reading from it will display the
current set of filters set for that subsystem.

Basically what it does is propagate the filter down to each event
contained in the subsystem. If a particular event doesn't have a field
with the name specified in the filter, it simply doesn't get set for
that event. You can verify whether or not the filter was set for a
particular event by looking at the filter file for that event.

As with per-event filters, compound expressions are supported, echoing
'0' to the subsystem's filter file clears all filters in the subsystem,
etc.

Signed-off-by: Tom Zanussi <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <1237710677.7703.49.camel@charm-linux>
Signed-off-by: Ingo Molnar <[email protected]>


---
kernel/trace/trace.h | 15 ++++++
kernel/trace/trace_events.c | 86 +++++++++++++++++++++++++++++++++---
kernel/trace/trace_events_filter.c | 80 +++++++++++++++++++++++++++++++++
3 files changed, 175 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d9eb39e..f267723 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -804,6 +804,18 @@ struct ftrace_event_call {
#endif
};

+struct event_subsystem {
+ struct list_head list;
+ const char *name;
+ struct dentry *entry;
+ struct filter_pred **preds;
+};
+
+#define events_for_each(event) \
+ for (event = __start_ftrace_events; \
+ (unsigned long)event < (unsigned long)__stop_ftrace_events; \
+ event++)
+
#define MAX_FILTER_PRED 8

struct filter_pred;
@@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred);
extern void filter_free_preds(struct ftrace_event_call *call);
extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern void filter_free_subsystem_preds(struct event_subsystem *system);
+extern int filter_add_subsystem_pred(struct event_subsystem *system,
+ struct filter_pred *pred);

void event_trace_printk(unsigned long ip, const char *fmt, ...);
extern struct ftrace_event_call __start_ftrace_events[];
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 97470c4..97d4daa 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}

+static ssize_t
+subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct event_subsystem *system = filp->private_data;
+ struct trace_seq *s;
+ int r;
+
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ r = filter_print_preds(system->preds, s->buffer);
+ r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r);
+
+ kfree(s);
+
+ return r;
+}
+
+static ssize_t
+subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct event_subsystem *system = filp->private_data;
+ char buf[64], *pbuf = buf;
+ struct filter_pred *pred;
+ int err;
+
+ if (cnt >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+ if (!pred)
+ return -ENOMEM;
+
+ err = filter_parse(&pbuf, pred);
+ if (err < 0) {
+ filter_free_pred(pred);
+ return err;
+ }
+
+ if (pred->clear) {
+ filter_free_subsystem_preds(system);
+ return cnt;
+ }
+
+ if (filter_add_subsystem_pred(system, pred)) {
+ filter_free_pred(pred);
+ return -EINVAL;
+ }
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
static const struct seq_operations show_event_seq_ops = {
.start = t_start,
.next = t_next,
@@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = {
.write = event_filter_write,
};

+static const struct file_operations ftrace_subsystem_filter_fops = {
+ .open = tracing_open_generic,
+ .read = subsystem_filter_read,
+ .write = subsystem_filter_write,
+};
+
static struct dentry *event_trace_events_dir(void)
{
static struct dentry *d_tracer;
@@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void)
return d_events;
}

-struct event_subsystem {
- struct list_head list;
- const char *name;
- struct dentry *entry;
-};
-
static LIST_HEAD(event_subsystems);

static struct dentry *
event_subsystem_dir(const char *name, struct dentry *d_events)
{
struct event_subsystem *system;
+ struct dentry *entry;

/* First see if we did not already create this dir */
list_for_each_entry(system, &event_subsystems, list) {
@@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
system->name = name;
list_add(&system->list, &event_subsystems);

+ system->preds = NULL;
+
+ entry = debugfs_create_file("filter", 0444, system->entry, system,
+ &ftrace_subsystem_filter_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'%s/filter' entry\n", name);
+
return system->entry;
}

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8e8c5fa..1ab20ce 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call)
}
}

+void filter_free_subsystem_preds(struct event_subsystem *system)
+{
+ struct ftrace_event_call *call = __start_ftrace_events;
+ int i;
+
+ if (system->preds) {
+ for (i = 0; i < MAX_FILTER_PRED; i++)
+ filter_free_pred(system->preds[i]);
+ kfree(system->preds);
+ system->preds = NULL;
+ }
+
+ events_for_each(call) {
+ if (!call->name || !call->regfunc)
+ continue;
+
+ if (!strcmp(call->system, system->name))
+ filter_free_preds(call);
+ }
+}
+
static int __filter_add_pred(struct ftrace_event_call *call,
struct filter_pred *pred)
{
@@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
return __filter_add_pred(call, pred);
}

+static struct filter_pred *copy_pred(struct filter_pred *pred)
+{
+ struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
+ if (!new_pred)
+ return NULL;
+
+ memcpy(new_pred, pred, sizeof(*pred));
+ if (pred->str_val) {
+ new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
+ new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
+ if (!new_pred->str_val) {
+ kfree(new_pred);
+ return NULL;
+ }
+ }
+
+ return new_pred;
+}
+
+int filter_add_subsystem_pred(struct event_subsystem *system,
+ struct filter_pred *pred)
+{
+ struct ftrace_event_call *call = __start_ftrace_events;
+ struct filter_pred *event_pred;
+ int i;
+
+ if (system->preds && !pred->compound)
+ filter_free_subsystem_preds(system);
+
+ if (!system->preds) {
+ system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+ GFP_KERNEL);
+ if (!system->preds)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < MAX_FILTER_PRED; i++) {
+ if (!system->preds[i]) {
+ system->preds[i] = pred;
+ break;
+ }
+ if (i == MAX_FILTER_PRED - 1)
+ return -EINVAL;
+ }
+
+ events_for_each(call) {
+ if (!call->name || !call->regfunc)
+ continue;
+
+ if (!strcmp(call->system, system->name)) {
+ event_pred = copy_pred(pred);
+ if (event_pred)
+ filter_add_pred(call, event_pred);
+ }
+ }
+
+ return 0;
+}
+
int filter_parse(char **pbuf, struct filter_pred *pred)
{
char *tmp, *tok, *val_str = NULL;

2009-03-22 19:50:57

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering


* Frederic Weisbecker <[email protected]> wrote:

> > +int filter_add_subsystem_pred(struct event_subsystem *system,
> > + struct filter_pred *pred)
> > +{
> > + struct ftrace_event_call *call = __start_ftrace_events;
> > + struct filter_pred *event_pred;
> > + int i;
> > +
> > + if (system->preds && !pred->compound)
> > + filter_free_subsystem_preds(system);
> > +
> > + if (!system->preds) {
> > + system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
> > + GFP_KERNEL);
> > + if (!system->preds)
> > + return -ENOMEM;
> > + }
> > +
> > + for (i = 0; i < MAX_FILTER_PRED; i++) {
> > + if (!system->preds[i]) {
> > + system->preds[i] = pred;
> > + break;
> > + }
> > + if (i == MAX_FILTER_PRED - 1)
> > + return -EINVAL;
>
>
> Shouldn't it be i == MAX_FILTER_PRED ?

Here we search for a free slot in the array of sub-expressions of
the subsystem level filters. That condition cannot even be true
inside a 'i < MAX_FILTER_PRED' loop.

Checking for i==MAX would be fine if done outside of the loop - and
should probably be done that way. But the code is correct this way
too i think.

Ingo

2009-03-22 19:54:42

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering

On Sun, Mar 22, 2009 at 08:50:33PM +0100, Ingo Molnar wrote:
>
> * Frederic Weisbecker <[email protected]> wrote:
>
> > > +int filter_add_subsystem_pred(struct event_subsystem *system,
> > > + struct filter_pred *pred)
> > > +{
> > > + struct ftrace_event_call *call = __start_ftrace_events;
> > > + struct filter_pred *event_pred;
> > > + int i;
> > > +
> > > + if (system->preds && !pred->compound)
> > > + filter_free_subsystem_preds(system);
> > > +
> > > + if (!system->preds) {
> > > + system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
> > > + GFP_KERNEL);
> > > + if (!system->preds)
> > > + return -ENOMEM;
> > > + }
> > > +
> > > + for (i = 0; i < MAX_FILTER_PRED; i++) {
> > > + if (!system->preds[i]) {
> > > + system->preds[i] = pred;
> > > + break;
> > > + }
> > > + if (i == MAX_FILTER_PRED - 1)
> > > + return -EINVAL;
> >
> >
> > Shouldn't it be i == MAX_FILTER_PRED ?
>
> Here we search for a free slot in the array of sub-expressions of
> the subsystem level filters. That condition cannot even be true
> inside a 'i < MAX_FILTER_PRED' loop.


Darn, I should sleep more!


> Checking for i==MAX would be fine if done outside of the loop - and
> should probably be done that way. But the code is correct this way
> too i think.
>


Yes, at least it's harmless.

2009-03-23 18:24:35

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering


On Sun, 22 Mar 2009, Tom Zanussi wrote:
>
> +struct event_subsystem {
> + struct list_head list;
> + const char *name;
> + struct dentry *entry;
> + struct filter_pred **preds;
> +};
> +
> +#define events_for_each(event) \
> + for (event = __start_ftrace_events; \
> + (unsigned long)event < (unsigned long)__stop_ftrace_events; \
> + event++)
> +
> #define MAX_FILTER_PRED 8
>
> struct filter_pred;
> @@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call,
> struct filter_pred *pred);
> extern void filter_free_preds(struct ftrace_event_call *call);
> extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
> +extern void filter_free_subsystem_preds(struct event_subsystem *system);
> +extern int filter_add_subsystem_pred(struct event_subsystem *system,
> + struct filter_pred *pred);
>
> void event_trace_printk(unsigned long ip, const char *fmt, ...);
> extern struct ftrace_event_call __start_ftrace_events[];
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 97470c4..97d4daa 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> return cnt;
> }
>
> +static ssize_t
> +subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
> + loff_t *ppos)
> +{
> + struct event_subsystem *system = filp->private_data;
> + struct trace_seq *s;

Again, trace_seq is not used, might as well use your own buffer.

> + int r;
> +
> + if (*ppos)
> + return 0;
> +
> + s = kmalloc(sizeof(*s), GFP_KERNEL);
> + if (!s)
> + return -ENOMEM;
> +
> + trace_seq_init(s);
> +
> + r = filter_print_preds(system->preds, s->buffer);
> + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r);
> +
> + kfree(s);
> +
> + return r;
> +}
> +
> +static ssize_t
> +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> + loff_t *ppos)
> +{
> + struct event_subsystem *system = filp->private_data;
> + char buf[64], *pbuf = buf;
> + struct filter_pred *pred;
> + int err;
> +
> + if (cnt >= sizeof(buf))
> + return -EINVAL;
> +
> + if (copy_from_user(&buf, ubuf, cnt))
> + return -EFAULT;
> +
> + pred = kzalloc(sizeof(*pred), GFP_KERNEL);
> + if (!pred)
> + return -ENOMEM;
> +
> + err = filter_parse(&pbuf, pred);
> + if (err < 0) {
> + filter_free_pred(pred);
> + return err;
> + }
> +
> + if (pred->clear) {
> + filter_free_subsystem_preds(system);

is "system" correct here?

> + return cnt;
> + }
> +
> + if (filter_add_subsystem_pred(system, pred)) {
> + filter_free_pred(pred);
> + return -EINVAL;
> + }
> +
> + *ppos += cnt;
> +
> + return cnt;
> +}
> +
> static const struct seq_operations show_event_seq_ops = {
> .start = t_start,
> .next = t_next,
> @@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = {
> .write = event_filter_write,
> };
>
> +static const struct file_operations ftrace_subsystem_filter_fops = {
> + .open = tracing_open_generic,
> + .read = subsystem_filter_read,
> + .write = subsystem_filter_write,
> +};
> +
> static struct dentry *event_trace_events_dir(void)
> {
> static struct dentry *d_tracer;
> @@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void)
> return d_events;
> }
>
> -struct event_subsystem {
> - struct list_head list;
> - const char *name;
> - struct dentry *entry;
> -};
> -
> static LIST_HEAD(event_subsystems);
>
> static struct dentry *
> event_subsystem_dir(const char *name, struct dentry *d_events)
> {
> struct event_subsystem *system;
> + struct dentry *entry;
>
> /* First see if we did not already create this dir */
> list_for_each_entry(system, &event_subsystems, list) {
> @@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
> system->name = name;
> list_add(&system->list, &event_subsystems);
>
> + system->preds = NULL;
> +
> + entry = debugfs_create_file("filter", 0444, system->entry, system,
> + &ftrace_subsystem_filter_fops);
> + if (!entry)
> + pr_warning("Could not create debugfs "
> + "'%s/filter' entry\n", name);
> +
> return system->entry;
> }
>
> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
> index 8e8c5fa..1ab20ce 100644
> --- a/kernel/trace/trace_events_filter.c
> +++ b/kernel/trace/trace_events_filter.c
> @@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call)
> }
> }
>
> +void filter_free_subsystem_preds(struct event_subsystem *system)
> +{
> + struct ftrace_event_call *call = __start_ftrace_events;
> + int i;
> +
> + if (system->preds) {
> + for (i = 0; i < MAX_FILTER_PRED; i++)
> + filter_free_pred(system->preds[i]);
> + kfree(system->preds);
> + system->preds = NULL;
> + }
> +
> + events_for_each(call) {
> + if (!call->name || !call->regfunc)
> + continue;
> +
> + if (!strcmp(call->system, system->name))
> + filter_free_preds(call);
> + }
> +}
> +
> static int __filter_add_pred(struct ftrace_event_call *call,
> struct filter_pred *pred)
> {
> @@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
> return __filter_add_pred(call, pred);
> }
>
> +static struct filter_pred *copy_pred(struct filter_pred *pred)
> +{
> + struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
> + if (!new_pred)
> + return NULL;
> +
> + memcpy(new_pred, pred, sizeof(*pred));
> + if (pred->str_val) {
> + new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
> + new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
> + if (!new_pred->str_val) {
> + kfree(new_pred);

Shouldn't there be a check for field_name too?

-- Steve

> + return NULL;
> + }
> + }
> +
> + return new_pred;
> +}
> +

2009-03-24 07:19:54

by Tom Zanussi

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering

Hi,

On Mon, 2009-03-23 at 14:24 -0400, Steven Rostedt wrote:
> On Sun, 22 Mar 2009, Tom Zanussi wrote:
> >
> > +struct event_subsystem {
> > + struct list_head list;
> > + const char *name;
> > + struct dentry *entry;
> > + struct filter_pred **preds;
> > +};
> > +
> > +#define events_for_each(event) \
> > + for (event = __start_ftrace_events; \
> > + (unsigned long)event < (unsigned long)__stop_ftrace_events; \
> > + event++)
> > +
> > #define MAX_FILTER_PRED 8
> >
> > struct filter_pred;
> > @@ -832,6 +844,9 @@ extern int filter_add_pred(struct ftrace_event_call *call,
> > struct filter_pred *pred);
> > extern void filter_free_preds(struct ftrace_event_call *call);
> > extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
> > +extern void filter_free_subsystem_preds(struct event_subsystem *system);
> > +extern int filter_add_subsystem_pred(struct event_subsystem *system,
> > + struct filter_pred *pred);
> >
> > void event_trace_printk(unsigned long ip, const char *fmt, ...);
> > extern struct ftrace_event_call __start_ftrace_events[];
> > diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> > index 97470c4..97d4daa 100644
> > --- a/kernel/trace/trace_events.c
> > +++ b/kernel/trace/trace_events.c
> > @@ -524,6 +524,71 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> > return cnt;
> > }
> >
> > +static ssize_t
> > +subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
> > + loff_t *ppos)
> > +{
> > + struct event_subsystem *system = filp->private_data;
> > + struct trace_seq *s;
>
> Again, trace_seq is not used, might as well use your own buffer.
>
> > + int r;
> > +
> > + if (*ppos)
> > + return 0;
> > +
> > + s = kmalloc(sizeof(*s), GFP_KERNEL);
> > + if (!s)
> > + return -ENOMEM;
> > +
> > + trace_seq_init(s);
> > +
> > + r = filter_print_preds(system->preds, s->buffer);
> > + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, r);
> > +
> > + kfree(s);
> > +
> > + return r;
> > +}
> > +
> > +static ssize_t
> > +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> > + loff_t *ppos)
> > +{
> > + struct event_subsystem *system = filp->private_data;
> > + char buf[64], *pbuf = buf;
> > + struct filter_pred *pred;
> > + int err;
> > +
> > + if (cnt >= sizeof(buf))
> > + return -EINVAL;
> > +
> > + if (copy_from_user(&buf, ubuf, cnt))
> > + return -EFAULT;
> > +
> > + pred = kzalloc(sizeof(*pred), GFP_KERNEL);
> > + if (!pred)
> > + return -ENOMEM;
> > +
> > + err = filter_parse(&pbuf, pred);
> > + if (err < 0) {
> > + filter_free_pred(pred);
> > + return err;
> > + }
> > +
> > + if (pred->clear) {
> > + filter_free_subsystem_preds(system);
>
> is "system" correct here?

Do you mean the naming i.e. would be better as "subsystem"? Otherwise,
I think it's correct.

>
> > + return cnt;
> > + }
> > +
> > + if (filter_add_subsystem_pred(system, pred)) {
> > + filter_free_pred(pred);
> > + return -EINVAL;
> > + }
> > +
> > + *ppos += cnt;
> > +
> > + return cnt;
> > +}
> > +
> > static const struct seq_operations show_event_seq_ops = {
> > .start = t_start,
> > .next = t_next,
> > @@ -575,6 +640,12 @@ static const struct file_operations ftrace_event_filter_fops = {
> > .write = event_filter_write,
> > };
> >
> > +static const struct file_operations ftrace_subsystem_filter_fops = {
> > + .open = tracing_open_generic,
> > + .read = subsystem_filter_read,
> > + .write = subsystem_filter_write,
> > +};
> > +
> > static struct dentry *event_trace_events_dir(void)
> > {
> > static struct dentry *d_tracer;
> > @@ -595,18 +666,13 @@ static struct dentry *event_trace_events_dir(void)
> > return d_events;
> > }
> >
> > -struct event_subsystem {
> > - struct list_head list;
> > - const char *name;
> > - struct dentry *entry;
> > -};
> > -
> > static LIST_HEAD(event_subsystems);
> >
> > static struct dentry *
> > event_subsystem_dir(const char *name, struct dentry *d_events)
> > {
> > struct event_subsystem *system;
> > + struct dentry *entry;
> >
> > /* First see if we did not already create this dir */
> > list_for_each_entry(system, &event_subsystems, list) {
> > @@ -633,6 +699,14 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
> > system->name = name;
> > list_add(&system->list, &event_subsystems);
> >
> > + system->preds = NULL;
> > +
> > + entry = debugfs_create_file("filter", 0444, system->entry, system,
> > + &ftrace_subsystem_filter_fops);
> > + if (!entry)
> > + pr_warning("Could not create debugfs "
> > + "'%s/filter' entry\n", name);
> > +
> > return system->entry;
> > }
> >
> > diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
> > index 8e8c5fa..1ab20ce 100644
> > --- a/kernel/trace/trace_events_filter.c
> > +++ b/kernel/trace/trace_events_filter.c
> > @@ -181,6 +181,27 @@ void filter_free_preds(struct ftrace_event_call *call)
> > }
> > }
> >
> > +void filter_free_subsystem_preds(struct event_subsystem *system)
> > +{
> > + struct ftrace_event_call *call = __start_ftrace_events;
> > + int i;
> > +
> > + if (system->preds) {
> > + for (i = 0; i < MAX_FILTER_PRED; i++)
> > + filter_free_pred(system->preds[i]);
> > + kfree(system->preds);
> > + system->preds = NULL;
> > + }
> > +
> > + events_for_each(call) {
> > + if (!call->name || !call->regfunc)
> > + continue;
> > +
> > + if (!strcmp(call->system, system->name))
> > + filter_free_preds(call);
> > + }
> > +}
> > +
> > static int __filter_add_pred(struct ftrace_event_call *call,
> > struct filter_pred *pred)
> > {
> > @@ -250,6 +271,65 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
> > return __filter_add_pred(call, pred);
> > }
> >
> > +static struct filter_pred *copy_pred(struct filter_pred *pred)
> > +{
> > + struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
> > + if (!new_pred)
> > + return NULL;
> > +
> > + memcpy(new_pred, pred, sizeof(*pred));
> > + if (pred->str_val) {
> > + new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
> > + new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
> > + if (!new_pred->str_val) {
> > + kfree(new_pred);
>
> Shouldn't there be a check for field_name too?
>

Yes - I posted a patch to copy_pred() yesterday to fix that.

Tom

> -- Steve
>
> > + return NULL;
> > + }
> > + }
> > +
> > + return new_pred;
> > +}
> > +

2009-03-24 16:30:46

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering


On Tue, 24 Mar 2009, Tom Zanussi wrote:
> > > +
> > > +static ssize_t
> > > +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> > > + loff_t *ppos)
> > > +{
> > > + struct event_subsystem *system = filp->private_data;
> > > + char buf[64], *pbuf = buf;
> > > + struct filter_pred *pred;
> > > + int err;
> > > +
> > > + if (cnt >= sizeof(buf))
> > > + return -EINVAL;
> > > +
> > > + if (copy_from_user(&buf, ubuf, cnt))
> > > + return -EFAULT;
> > > +
> > > + pred = kzalloc(sizeof(*pred), GFP_KERNEL);
> > > + if (!pred)
> > > + return -ENOMEM;
> > > +
> > > + err = filter_parse(&pbuf, pred);
> > > + if (err < 0) {
> > > + filter_free_pred(pred);
> > > + return err;
> > > + }
> > > +
> > > + if (pred->clear) {
> > > + filter_free_subsystem_preds(system);
> >
> > is "system" correct here?
>
> Do you mean the naming i.e. would be better as "subsystem"? Otherwise,
> I think it's correct.

No, I'm just confused how the system could see the pred before it was
added below.

-- Steve

>
> >
> > > + return cnt;
> > > + }
> > > +
> > > + if (filter_add_subsystem_pred(system, pred)) {
> > > + filter_free_pred(pred);
> > > + return -EINVAL;
> > > + }
> > > +
> > > + *ppos += cnt;
> > > +
> > > + return cnt;
> > > +}
> > > +

2009-03-25 05:26:22

by Tom Zanussi

[permalink] [raw]
Subject: Re: [PATCH 4/4] tracing: add per-subsystem filtering

On Tue, 2009-03-24 at 12:29 -0400, Steven Rostedt wrote:
> On Tue, 24 Mar 2009, Tom Zanussi wrote:
> > > > +
> > > > +static ssize_t
> > > > +subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
> > > > + loff_t *ppos)
> > > > +{
> > > > + struct event_subsystem *system = filp->private_data;
> > > > + char buf[64], *pbuf = buf;
> > > > + struct filter_pred *pred;
> > > > + int err;
> > > > +
> > > > + if (cnt >= sizeof(buf))
> > > > + return -EINVAL;
> > > > +
> > > > + if (copy_from_user(&buf, ubuf, cnt))
> > > > + return -EFAULT;
> > > > +
> > > > + pred = kzalloc(sizeof(*pred), GFP_KERNEL);
> > > > + if (!pred)
> > > > + return -ENOMEM;
> > > > +
> > > > + err = filter_parse(&pbuf, pred);
> > > > + if (err < 0) {
> > > > + filter_free_pred(pred);
> > > > + return err;
> > > > + }
> > > > +
> > > > + if (pred->clear) {
> > > > + filter_free_subsystem_preds(system);
> > >
> > > is "system" correct here?
> >
> > Do you mean the naming i.e. would be better as "subsystem"? Otherwise,
> > I think it's correct.
>
> No, I'm just confused how the system could see the pred before it was
> added below.

Again, in this case the pred is only used to flag clearing and never
gets added.

Tom

>
> -- Steve
>
> >
> > >
> > > > + return cnt;
> > > > + }
> > > > +
> > > > + if (filter_add_subsystem_pred(system, pred)) {
> > > > + filter_free_pred(pred);
> > > > + return -EINVAL;
> > > > + }
> > > > +
> > > > + *ppos += cnt;
> > > > +
> > > > + return cnt;
> > > > +}
> > > > +