2010-06-03 19:41:48

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file


Ingo,

Please pull the latest tip/perf/urgent tree, which can be found at:

git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
tip/perf/urgent


Steven Rostedt (1):
tracing/events: Convert format output to seq_file

----
kernel/trace/trace_events.c | 208 +++++++++++++++++++++++++++++--------------
1 files changed, 142 insertions(+), 66 deletions(-)
---------------------------
commit 0953fa1f774105fcd6150282b7b9ea7763090f64
Author: Steven Rostedt <[email protected]>
Date: Thu Jun 3 15:21:34 2010 -0400

tracing/events: Convert format output to seq_file

Two new events were added that broke the current format output.

Both from the SCSI system: scsi_dispatch_cmd_done and scsi_dispatch_cmd_timeout

The reason is that their print_fmt exceeded a page size. Since the output
of the format used simple_read_from_buffer and trace_seq, it was limited
to a page size in output.

This patch converts the printing of the format of an event into seq_file,
which allows greater than a page size to be shown.

I diffed all event formats comparing the output with and without this
patch. All matched except for the above two, which showed just:

FORMAT TOO BIG

without this patch, but now properly displays the output with this patch.

Cc: Martin K. Petersen <[email protected]>
Cc: Kei Tokunaga <[email protected]>
Cc: James Bottomley <[email protected]>
Cc: Tomohiro Kusumi <[email protected]>
Cc: Xiao Guangrong <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0..b1b5093 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -29,6 +29,8 @@ DEFINE_MUTEX(event_mutex);

LIST_HEAD(ftrace_events);

+#define COMMON_FIELD_COUNT 5
+
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
{
@@ -544,85 +546,157 @@ out:
return ret;
}

-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+enum {
+ FORMAT_HEADER = 1,
+ FORMAT_PRINTFMT = 2,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
struct list_head *head;
- struct trace_seq *s;
- int common_field_count = 5;
- char *buf;
- int r = 0;
+ loff_t index = *pos;

- if (*ppos)
- return 0;
+ (*pos)++;

- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ head = trace_get_fields(call);

- trace_seq_init(s);
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:

- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
+ if (unlikely(list_empty(head)))
+ return NULL;

- head = trace_get_fields(call);
- list_for_each_entry_reverse(field, head, link) {
- /*
- * Smartly shows the array type(except dynamic array).
- * Normal:
- * field:TYPE VAR
- * If TYPE := TYPE[LEN], it is shown:
- * field:TYPE VAR[LEN]
- */
- const char *array_descriptor = strchr(field->type, '[');
-
- if (!strncmp(field->type, "__data_loc", 10))
- array_descriptor = NULL;
-
- if (!array_descriptor) {
- r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- field->type, field->name, field->offset,
- field->size, !!field->is_signed);
- } else {
- r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- (int)(array_descriptor - field->type),
- field->type, field->name,
- array_descriptor, field->offset,
- field->size, !!field->is_signed);
- }
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return field;

- if (--common_field_count == 0)
- r = trace_seq_printf(s, "\n");
+ case FORMAT_PRINTFMT:
+ /* all done */
+ return NULL;
+ }

- if (!r)
- break;
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it in case.
+ */
+ v = (void *)((unsigned long)v & ~1L);
+
+ field = v;
+ if (field->link.prev == head)
+ return (void *)FORMAT_PRINTFMT;
+
+ field = list_entry(field->link.prev, struct ftrace_event_field, link);
+
+ /* Set the LSB to notify f_show to print an extra newline */
+ if (index == (COMMON_FIELD_COUNT + 1))
+ field = (struct ftrace_event_field *)
+ ((unsigned long)field | 1);
+
+ return field;
+}
+
+static void *f_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t l = 1;
+ void *p;
+
+ /* Start by showing the header */
+ if (!*pos) {
+ (*pos)++;
+ return (void *)FORMAT_HEADER;
}

- if (r)
- r = trace_seq_printf(s, "\nprint fmt: %s\n",
- call->print_fmt);
+ p = (void *)FORMAT_HEADER;
+ do {
+ p = f_next(m, p, &l);
+ } while (p && l < *pos);

- if (!r) {
- /*
- * ug! The format output is bigger than a PAGE!!
- */
- buf = "FORMAT TOO BIG\n";
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- buf, strlen(buf));
- goto out;
+ return p;
+}
+
+static int f_show(struct seq_file *m, void *v)
+{
+ struct ftrace_event_call *call = m->private;
+ struct ftrace_event_field *field;
+ const char *array_descriptor;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "ID: %d\n", call->event.type);
+ seq_printf(m, "format:\n");
+ return 0;
+
+ case FORMAT_PRINTFMT:
+ seq_printf(m, "\nprint fmt: %s\n",
+ call->print_fmt);
+ return 0;
}

- r = simple_read_from_buffer(ubuf, cnt, ppos,
- s->buffer, s->len);
- out:
- kfree(s);
- return r;
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it and
+ * print a newline if it is set.
+ */
+ if ((unsigned long)v & 1) {
+ seq_putc(m, '\n');
+ v = (void *)((unsigned long)v & ~1L);
+ }
+
+ field = v;
+
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ array_descriptor = strchr(field->type, '[');
+
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
+
+ if (!array_descriptor)
+ seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
+
+ return 0;
+}
+
+static void f_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations trace_format_seq_ops = {
+ .start = f_start,
+ .next = f_next,
+ .stop = f_stop,
+ .show = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_event_call *call = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &trace_format_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = call;
+
+ return 0;
}

static ssize_t
@@ -820,8 +894,10 @@ static const struct file_operations ftrace_enable_fops = {
};

static const struct file_operations ftrace_event_format_fops = {
- .open = tracing_open_generic,
- .read = event_format_read,
+ .open = trace_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};

static const struct file_operations ftrace_event_id_fops = {


2010-06-04 01:36:59

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

Steven Rostedt wrote:
> Ingo,
>
> Please pull the latest tip/perf/urgent tree, which can be found at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
> tip/perf/urgent
>
>
> Steven Rostedt (1):
> tracing/events: Convert format output to seq_file
>
> ----
> kernel/trace/trace_events.c | 208 +++++++++++++++++++++++++++++--------------
> 1 files changed, 142 insertions(+), 66 deletions(-)
> ---------------------------
> commit 0953fa1f774105fcd6150282b7b9ea7763090f64
> Author: Steven Rostedt <[email protected]>
> Date: Thu Jun 3 15:21:34 2010 -0400
>
> tracing/events: Convert format output to seq_file
>

This patch seriously conflicts with my patch that extracts common fields
from every trace event to a global list, which is already in your perf/core2
branch. Should I rebase and resend it?

2010-06-04 02:09:14

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

> +static void *f_start(struct seq_file *m, loff_t *pos)
> +{
> + loff_t l = 1;
> + void *p;
> +
> + /* Start by showing the header */
> + if (!*pos) {
> + (*pos)++;

We shoudn't increment *pos in start() handler. It's a common mistake when
using seqfile.

What we need to do in start() is move the pointer to postion *pos.

> + return (void *)FORMAT_HEADER;
> }
>
> - if (r)
> - r = trace_seq_printf(s, "\nprint fmt: %s\n",
> - call->print_fmt);
> + p = (void *)FORMAT_HEADER;
> + do {
> + p = f_next(m, p, &l);
> + } while (p && l < *pos);
>
> - if (!r) {
> - /*
> - * ug! The format output is bigger than a PAGE!!
> - */
> - buf = "FORMAT TOO BIG\n";
> - r = simple_read_from_buffer(ubuf, cnt, ppos,
> - buf, strlen(buf));
> - goto out;
> + return p;
> +}

2010-06-04 02:11:12

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

On Fri, 2010-06-04 at 09:39 +0800, Li Zefan wrote:
> Steven Rostedt wrote:
> > Ingo,
> >
> > Please pull the latest tip/perf/urgent tree, which can be found at:
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
> > tip/perf/urgent
> >
> >
> > Steven Rostedt (1):
> > tracing/events: Convert format output to seq_file
> >
> > ----
> > kernel/trace/trace_events.c | 208 +++++++++++++++++++++++++++++--------------
> > 1 files changed, 142 insertions(+), 66 deletions(-)
> > ---------------------------
> > commit 0953fa1f774105fcd6150282b7b9ea7763090f64
> > Author: Steven Rostedt <[email protected]>
> > Date: Thu Jun 3 15:21:34 2010 -0400
> >
> > tracing/events: Convert format output to seq_file
> >
>
> This patch seriously conflicts with my patch that extracts common fields
> from every trace event to a global list, which is already in your perf/core2
> branch. Should I rebase and resend it?
>

Ah sorry. No don't.

We held off your patches to after the merge window. I even reminded Ingo
to pull them in earlier today. But I forgot that these changes would
conflict with yours.

Ingo and I were not sure if your patches were too much for the end of
the merge window, so we held off on them.

This patch set was created when I noticed that the SCSI tracepoints
failed the size limit in the format, and I worked to fix them.

We may still try to push your patches in -rc. I'll do the conflict
resolution, and make it perf/core-4.

IOW, don't worry about it, I'll do the worrying ;-)

Thanks!

-- Steve

2010-06-04 02:13:15

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

On Fri, 2010-06-04 at 10:11 +0800, Li Zefan wrote:
> > +static void *f_start(struct seq_file *m, loff_t *pos)
> > +{
> > + loff_t l = 1;
> > + void *p;
> > +
> > + /* Start by showing the header */
> > + if (!*pos) {
> > + (*pos)++;
>
> We shoudn't increment *pos in start() handler. It's a common mistake when
> using seqfile.

And a common mistake I do :-p

I'll rebase it with the fix.

>
> What we need to do in start() is move the pointer to postion *pos.

So the rest is OK then? All that is needed is the removal of *pos++ ?

Thanks,

-- Steve

>
> > + return (void *)FORMAT_HEADER;
> > }
> >
> > - if (r)
> > - r = trace_seq_printf(s, "\nprint fmt: %s\n",
> > - call->print_fmt);
> > + p = (void *)FORMAT_HEADER;
> > + do {
> > + p = f_next(m, p, &l);
> > + } while (p && l < *pos);
> >
> > - if (!r) {
> > - /*
> > - * ug! The format output is bigger than a PAGE!!
> > - */
> > - buf = "FORMAT TOO BIG\n";
> > - r = simple_read_from_buffer(ubuf, cnt, ppos,
> > - buf, strlen(buf));
> > - goto out;
> > + return p;
> > +}

2010-06-04 02:23:59

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

Steven Rostedt wrote:
> On Fri, 2010-06-04 at 10:11 +0800, Li Zefan wrote:
>>> +static void *f_start(struct seq_file *m, loff_t *pos)
>>> +{
>>> + loff_t l = 1;
>>> + void *p;
>>> +
>>> + /* Start by showing the header */
>>> + if (!*pos) {
>>> + (*pos)++;
>> We shoudn't increment *pos in start() handler. It's a common mistake when
>> using seqfile.
>
> And a common mistake I do :-p
>
> I'll rebase it with the fix.
>
>> What we need to do in start() is move the pointer to postion *pos.
>
> So the rest is OK then? All that is needed is the removal of *pos++ ?
>

I think you should also change "loff_t l = 1" to "loff_t l = 0". :)

Otherwise:

Reviewed-by: Li Zefan <[email protected]>

2010-06-04 03:27:10

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH v2][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file


Ingo,

Please pull the latest tip/perf/urgent-2 tree, which can be found at:

git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
tip/perf/urgent-2


Steven Rostedt (1):
tracing/events: Convert format output to seq_file

----
kernel/trace/trace_events.c | 208 +++++++++++++++++++++++++++++--------------
1 files changed, 141 insertions(+), 67 deletions(-)
---------------------------
commit bcaa10360f3b2a623453a9fb10ef77aafeef8bb6
Author: Steven Rostedt <[email protected]>
Date: Thu Jun 3 15:21:34 2010 -0400

tracing/events: Convert format output to seq_file

Two new events were added that broke the current format output.

Both from the SCSI system: scsi_dispatch_cmd_done and scsi_dispatch_cmd_timeout

The reason is that their print_fmt exceeded a page size. Since the output
of the format used simple_read_from_buffer and trace_seq, it was limited
to a page size in output.

This patch converts the printing of the format of an event into seq_file,
which allows greater than a page size to be shown.

I diffed all event formats comparing the output with and without this
patch. All matched except for the above two, which showed just:

FORMAT TOO BIG

without this patch, but now properly displays the output with this patch.

v2: Remove updating *pos in seq start function.
[ Thanks to Li Zefan for pointing that out ]

Reviewed-by: Li Zefan <[email protected]>
Cc: Martin K. Petersen <[email protected]>
Cc: Kei Tokunaga <[email protected]>
Cc: James Bottomley <[email protected]>
Cc: Tomohiro Kusumi <[email protected]>
Cc: Xiao Guangrong <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0..45a8968 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -29,6 +29,8 @@ DEFINE_MUTEX(event_mutex);

LIST_HEAD(ftrace_events);

+#define COMMON_FIELD_COUNT 5
+
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
{
@@ -544,85 +546,155 @@ out:
return ret;
}

-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+enum {
+ FORMAT_HEADER = 1,
+ FORMAT_PRINTFMT = 2,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
struct list_head *head;
- struct trace_seq *s;
- int common_field_count = 5;
- char *buf;
- int r = 0;
-
- if (*ppos)
- return 0;
+ loff_t index = *pos;

- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ (*pos)++;

- trace_seq_init(s);
+ head = trace_get_fields(call);

- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:

- head = trace_get_fields(call);
- list_for_each_entry_reverse(field, head, link) {
- /*
- * Smartly shows the array type(except dynamic array).
- * Normal:
- * field:TYPE VAR
- * If TYPE := TYPE[LEN], it is shown:
- * field:TYPE VAR[LEN]
- */
- const char *array_descriptor = strchr(field->type, '[');
-
- if (!strncmp(field->type, "__data_loc", 10))
- array_descriptor = NULL;
-
- if (!array_descriptor) {
- r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- field->type, field->name, field->offset,
- field->size, !!field->is_signed);
- } else {
- r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- (int)(array_descriptor - field->type),
- field->type, field->name,
- array_descriptor, field->offset,
- field->size, !!field->is_signed);
- }
+ if (unlikely(list_empty(head)))
+ return NULL;

- if (--common_field_count == 0)
- r = trace_seq_printf(s, "\n");
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return field;

- if (!r)
- break;
+ case FORMAT_PRINTFMT:
+ /* all done */
+ return NULL;
}

- if (r)
- r = trace_seq_printf(s, "\nprint fmt: %s\n",
- call->print_fmt);
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it in case.
+ */
+ v = (void *)((unsigned long)v & ~1L);

- if (!r) {
- /*
- * ug! The format output is bigger than a PAGE!!
- */
- buf = "FORMAT TOO BIG\n";
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- buf, strlen(buf));
- goto out;
+ field = v;
+ if (field->link.prev == head)
+ return (void *)FORMAT_PRINTFMT;
+
+ field = list_entry(field->link.prev, struct ftrace_event_field, link);
+
+ /* Set the LSB to notify f_show to print an extra newline */
+ if (index == COMMON_FIELD_COUNT)
+ field = (struct ftrace_event_field *)
+ ((unsigned long)field | 1);
+
+ return field;
+}
+
+static void *f_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t l = 0;
+ void *p;
+
+ /* Start by showing the header */
+ if (!*pos)
+ return (void *)FORMAT_HEADER;
+
+ p = (void *)FORMAT_HEADER;
+ do {
+ p = f_next(m, p, &l);
+ } while (p && l < *pos);
+
+ return p;
+}
+
+static int f_show(struct seq_file *m, void *v)
+{
+ struct ftrace_event_call *call = m->private;
+ struct ftrace_event_field *field;
+ const char *array_descriptor;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "ID: %d\n", call->event.type);
+ seq_printf(m, "format:\n");
+ return 0;
+
+ case FORMAT_PRINTFMT:
+ seq_printf(m, "\nprint fmt: %s\n",
+ call->print_fmt);
+ return 0;
}

- r = simple_read_from_buffer(ubuf, cnt, ppos,
- s->buffer, s->len);
- out:
- kfree(s);
- return r;
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it and
+ * print a newline if it is set.
+ */
+ if ((unsigned long)v & 1) {
+ seq_putc(m, '\n');
+ v = (void *)((unsigned long)v & ~1L);
+ }
+
+ field = v;
+
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ array_descriptor = strchr(field->type, '[');
+
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
+
+ if (!array_descriptor)
+ seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
+
+ return 0;
+}
+
+static void f_stop(struct seq_file *m, void *p)
+{
+}
+
+static const struct seq_operations trace_format_seq_ops = {
+ .start = f_start,
+ .next = f_next,
+ .stop = f_stop,
+ .show = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_event_call *call = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &trace_format_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = call;
+
+ return 0;
}

static ssize_t
@@ -820,8 +892,10 @@ static const struct file_operations ftrace_enable_fops = {
};

static const struct file_operations ftrace_event_format_fops = {
- .open = tracing_open_generic,
- .read = event_format_read,
+ .open = trace_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};

static const struct file_operations ftrace_event_id_fops = {

2010-06-04 04:15:01

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

On Fri, 2010-06-04 at 09:39 +0800, Li Zefan wrote:

> This patch seriously conflicts with my patch that extracts common fields
> from every trace event to a global list, which is already in your perf/core2
> branch. Should I rebase and resend it?

I pushed out a conflict resolution of pulling tip/perf/urgent-2 into
tip/perf/core-2 (and pushing that as tip/perf/core-4).

I did change a bit.

-- Steve

Here's the diff:

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f4dbce1..5b08ff6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -30,6 +30,8 @@ DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
LIST_HEAD(ftrace_common_fields);

+#define COMMON_FIELD_COUNT 5
+
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
{
@@ -555,88 +557,157 @@ out:
return ret;
}

-static void print_event_fields(struct trace_seq *s, struct list_head *head)
+enum {
+ FORMAT_HEADER = 1,
+ FORMAT_PRINTFMT = 2,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
+ struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
+ struct list_head *head;

- list_for_each_entry_reverse(field, head, link) {
- /*
- * Smartly shows the array type(except dynamic array).
- * Normal:
- * field:TYPE VAR
- * If TYPE := TYPE[LEN], it is shown:
- * field:TYPE VAR[LEN]
- */
- const char *array_descriptor = strchr(field->type, '[');
-
- if (!strncmp(field->type, "__data_loc", 10))
- array_descriptor = NULL;
-
- if (!array_descriptor) {
- trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- field->type, field->name, field->offset,
- field->size, !!field->is_signed);
- } else {
- trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- (int)(array_descriptor - field->type),
- field->type, field->name,
- array_descriptor, field->offset,
- field->size, !!field->is_signed);
- }
+ (*pos)++;
+
+ head = &ftrace_common_fields;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return field;
+
+ case FORMAT_PRINTFMT:
+ /* all done */
+ return NULL;
+ }
+
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it in case.
+ */
+ v = (void *)((unsigned long)v & ~1L);
+ field = v;
+
+ head = trace_get_fields(call);
+
+ /*
+ * If this is the last common field, then set the field to
+ * the first event field and also set the LSB to tell f_show()
+ * to print a newline.
+ */
+ if (field->link.prev == &ftrace_common_fields) {
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return (void *)((unsigned long)field | 1);
}
+
+ if (field->link.prev == head)
+ return (void *)FORMAT_PRINTFMT;
+
+ field = list_entry(field->link.prev, struct ftrace_event_field, link);
+
+ return field;
}

-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+static void *f_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_call *call = filp->private_data;
- struct list_head *head;
- struct trace_seq *s;
- char *buf;
- int r;
+ loff_t l = 0;
+ void *p;

- if (*ppos)
+ /* Start by showing the header */
+ if (!*pos)
+ return (void *)FORMAT_HEADER;
+
+ p = (void *)FORMAT_HEADER;
+ do {
+ p = f_next(m, p, &l);
+ } while (p && l < *pos);
+
+ return p;
+}
+
+static int f_show(struct seq_file *m, void *v)
+{
+ struct ftrace_event_call *call = m->private;
+ struct ftrace_event_field *field;
+ const char *array_descriptor;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "ID: %d\n", call->event.type);
+ seq_printf(m, "format:\n");
return 0;

- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ case FORMAT_PRINTFMT:
+ seq_printf(m, "\nprint fmt: %s\n",
+ call->print_fmt);
+ return 0;
+ }

- trace_seq_init(s);
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it and
+ * print a newline if it is set.
+ */
+ if ((unsigned long)v & 1) {
+ seq_putc(m, '\n');
+ v = (void *)((unsigned long)v & ~1L);
+ }

- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
+ field = v;

- /* print common fields */
- print_event_fields(s, &ftrace_common_fields);
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ array_descriptor = strchr(field->type, '[');

- trace_seq_putc(s, '\n');
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;

- /* print event specific fields */
- head = trace_get_fields(call);
- print_event_fields(s, head);
+ if (!array_descriptor)
+ seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);

- r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
+ return 0;
+}

- if (!r) {
- /*
- * ug! The format output is bigger than a PAGE!!
- */
- buf = "FORMAT TOO BIG\n";
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- buf, strlen(buf));
- goto out;
- }
+static void f_stop(struct seq_file *m, void *p)
+{
+}

- r = simple_read_from_buffer(ubuf, cnt, ppos,
- s->buffer, s->len);
- out:
- kfree(s);
- return r;
+static const struct seq_operations trace_format_seq_ops = {
+ .start = f_start,
+ .next = f_next,
+ .stop = f_stop,
+ .show = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_event_call *call = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &trace_format_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = call;
+
+ return 0;
}

static ssize_t
@@ -834,8 +905,10 @@ static const struct file_operations ftrace_enable_fops = {
};

static const struct file_operations ftrace_event_format_fops = {
- .open = tracing_open_generic,
- .read = event_format_read,
+ .open = trace_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};

static const struct file_operations ftrace_event_id_fops = {


2010-06-04 05:59:24

by Li Zefan

[permalink] [raw]
Subject: Re: [PATCH][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

Steven Rostedt wrote:
> On Fri, 2010-06-04 at 09:39 +0800, Li Zefan wrote:
>
>> This patch seriously conflicts with my patch that extracts common fields
>> from every trace event to a global list, which is already in your perf/core2
>> branch. Should I rebase and resend it?
>
> I pushed out a conflict resolution of pulling tip/perf/urgent-2 into
> tip/perf/core-2 (and pushing that as tip/perf/core-4).
>
> I did change a bit.
>

The code looks ok.

But we can simplify the seqfile code based on the fact that the common
fields has been in a global list. I can make an incremental patch.

2010-06-07 07:59:03

by Kei Tokunaga

[permalink] [raw]
Subject: Re: [PATCH v2][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

Steven Rostedt wrote:
> Ingo,
>
> Please pull the latest tip/perf/urgent-2 tree, which can be found at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
> tip/perf/urgent-2
>
>
> Steven Rostedt (1):
> tracing/events: Convert format output to seq_file
>
> ----
> kernel/trace/trace_events.c | 208 +++++++++++++++++++++++++++++--------------
> 1 files changed, 141 insertions(+), 67 deletions(-)
> ---------------------------
> commit bcaa10360f3b2a623453a9fb10ef77aafeef8bb6
> Author: Steven Rostedt <[email protected]>
> Date: Thu Jun 3 15:21:34 2010 -0400
>
> tracing/events: Convert format output to seq_file
>
> Two new events were added that broke the current format output.
>
> Both from the SCSI system: scsi_dispatch_cmd_done and scsi_dispatch_cmd_timeout
>
> The reason is that their print_fmt exceeded a page size. Since the output
> of the format used simple_read_from_buffer and trace_seq, it was limited
> to a page size in output.
>
> This patch converts the printing of the format of an event into seq_file,
> which allows greater than a page size to be shown.
>
> I diffed all event formats comparing the output with and without this
> patch. All matched except for the above two, which showed just:
>
> FORMAT TOO BIG
>
> without this patch, but now properly displays the output with this patch.
>
> v2: Remove updating *pos in seq start function.
> [ Thanks to Li Zefan for pointing that out ]

There's been an oversight... Thanks a lot for finding and fixing
this! I built a kernel with the patch applied and confirmed the
issue is fixed on my box.

Tested-by: Kei Tokunaga <[email protected]>

> Reviewed-by: Li Zefan <[email protected]>
> Cc: Martin K. Petersen <[email protected]>
> Cc: Kei Tokunaga <[email protected]>
> Cc: James Bottomley <[email protected]>
> Cc: Tomohiro Kusumi <[email protected]>
> Cc: Xiao Guangrong <[email protected]>
> Signed-off-by: Steven Rostedt <[email protected]>
>
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 53cffc0..45a8968 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -29,6 +29,8 @@ DEFINE_MUTEX(event_mutex);
>
> LIST_HEAD(ftrace_events);
>
> +#define COMMON_FIELD_COUNT 5
> +
> struct list_head *
> trace_get_fields(struct ftrace_event_call *event_call)
> {
> @@ -544,85 +546,155 @@ out:
> return ret;
> }
>
> -static ssize_t
> -event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
> - loff_t *ppos)
> +enum {
> + FORMAT_HEADER = 1,
> + FORMAT_PRINTFMT = 2,
> +};
> +
> +static void *f_next(struct seq_file *m, void *v, loff_t *pos)
> {
> - struct ftrace_event_call *call = filp->private_data;
> + struct ftrace_event_call *call = m->private;
> struct ftrace_event_field *field;
> struct list_head *head;
> - struct trace_seq *s;
> - int common_field_count = 5;
> - char *buf;
> - int r = 0;
> -
> - if (*ppos)
> - return 0;
> + loff_t index = *pos;
>
> - s = kmalloc(sizeof(*s), GFP_KERNEL);
> - if (!s)
> - return -ENOMEM;
> + (*pos)++;
>
> - trace_seq_init(s);
> + head = trace_get_fields(call);
>
> - trace_seq_printf(s, "name: %s\n", call->name);
> - trace_seq_printf(s, "ID: %d\n", call->event.type);
> - trace_seq_printf(s, "format:\n");
> + switch ((unsigned long)v) {
> + case FORMAT_HEADER:
>
> - head = trace_get_fields(call);
> - list_for_each_entry_reverse(field, head, link) {
> - /*
> - * Smartly shows the array type(except dynamic array).
> - * Normal:
> - * field:TYPE VAR
> - * If TYPE := TYPE[LEN], it is shown:
> - * field:TYPE VAR[LEN]
> - */
> - const char *array_descriptor = strchr(field->type, '[');
> -
> - if (!strncmp(field->type, "__data_loc", 10))
> - array_descriptor = NULL;
> -
> - if (!array_descriptor) {
> - r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
> - "\tsize:%u;\tsigned:%d;\n",
> - field->type, field->name, field->offset,
> - field->size, !!field->is_signed);
> - } else {
> - r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
> - "\tsize:%u;\tsigned:%d;\n",
> - (int)(array_descriptor - field->type),
> - field->type, field->name,
> - array_descriptor, field->offset,
> - field->size, !!field->is_signed);
> - }
> + if (unlikely(list_empty(head)))
> + return NULL;
>
> - if (--common_field_count == 0)
> - r = trace_seq_printf(s, "\n");
> + field = list_entry(head->prev, struct ftrace_event_field, link);
> + return field;
>
> - if (!r)
> - break;
> + case FORMAT_PRINTFMT:
> + /* all done */
> + return NULL;
> }
>
> - if (r)
> - r = trace_seq_printf(s, "\nprint fmt: %s\n",
> - call->print_fmt);
> + /*
> + * To separate common fields from event fields, the
> + * LSB is set on the first event field. Clear it in case.
> + */
> + v = (void *)((unsigned long)v & ~1L);
>
> - if (!r) {
> - /*
> - * ug! The format output is bigger than a PAGE!!
> - */
> - buf = "FORMAT TOO BIG\n";
> - r = simple_read_from_buffer(ubuf, cnt, ppos,
> - buf, strlen(buf));
> - goto out;
> + field = v;
> + if (field->link.prev == head)
> + return (void *)FORMAT_PRINTFMT;
> +
> + field = list_entry(field->link.prev, struct ftrace_event_field, link);
> +
> + /* Set the LSB to notify f_show to print an extra newline */
> + if (index == COMMON_FIELD_COUNT)
> + field = (struct ftrace_event_field *)
> + ((unsigned long)field | 1);
> +
> + return field;
> +}
> +
> +static void *f_start(struct seq_file *m, loff_t *pos)
> +{
> + loff_t l = 0;
> + void *p;
> +
> + /* Start by showing the header */
> + if (!*pos)
> + return (void *)FORMAT_HEADER;
> +
> + p = (void *)FORMAT_HEADER;
> + do {
> + p = f_next(m, p, &l);
> + } while (p && l < *pos);
> +
> + return p;
> +}
> +
> +static int f_show(struct seq_file *m, void *v)
> +{
> + struct ftrace_event_call *call = m->private;
> + struct ftrace_event_field *field;
> + const char *array_descriptor;
> +
> + switch ((unsigned long)v) {
> + case FORMAT_HEADER:
> + seq_printf(m, "name: %s\n", call->name);
> + seq_printf(m, "ID: %d\n", call->event.type);
> + seq_printf(m, "format:\n");
> + return 0;
> +
> + case FORMAT_PRINTFMT:
> + seq_printf(m, "\nprint fmt: %s\n",
> + call->print_fmt);
> + return 0;
> }
>
> - r = simple_read_from_buffer(ubuf, cnt, ppos,
> - s->buffer, s->len);
> - out:
> - kfree(s);
> - return r;
> + /*
> + * To separate common fields from event fields, the
> + * LSB is set on the first event field. Clear it and
> + * print a newline if it is set.
> + */
> + if ((unsigned long)v & 1) {
> + seq_putc(m, '\n');
> + v = (void *)((unsigned long)v & ~1L);
> + }
> +
> + field = v;
> +
> + /*
> + * Smartly shows the array type(except dynamic array).
> + * Normal:
> + * field:TYPE VAR
> + * If TYPE := TYPE[LEN], it is shown:
> + * field:TYPE VAR[LEN]
> + */
> + array_descriptor = strchr(field->type, '[');
> +
> + if (!strncmp(field->type, "__data_loc", 10))
> + array_descriptor = NULL;
> +
> + if (!array_descriptor)
> + seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
> + field->type, field->name, field->offset,
> + field->size, !!field->is_signed);
> + else
> + seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
> + (int)(array_descriptor - field->type),
> + field->type, field->name,
> + array_descriptor, field->offset,
> + field->size, !!field->is_signed);
> +
> + return 0;
> +}
> +
> +static void f_stop(struct seq_file *m, void *p)
> +{
> +}
> +
> +static const struct seq_operations trace_format_seq_ops = {
> + .start = f_start,
> + .next = f_next,
> + .stop = f_stop,
> + .show = f_show,
> +};
> +
> +static int trace_format_open(struct inode *inode, struct file *file)
> +{
> + struct ftrace_event_call *call = inode->i_private;
> + struct seq_file *m;
> + int ret;
> +
> + ret = seq_open(file, &trace_format_seq_ops);
> + if (ret < 0)
> + return ret;
> +
> + m = file->private_data;
> + m->private = call;
> +
> + return 0;
> }
>
> static ssize_t
> @@ -820,8 +892,10 @@ static const struct file_operations ftrace_enable_fops = {
> };
>
> static const struct file_operations ftrace_event_format_fops = {
> - .open = tracing_open_generic,
> - .read = event_format_read,
> + .open = trace_format_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = seq_release,
> };
>
> static const struct file_operations ftrace_event_id_fops = {
>
>
>
>

2010-06-07 21:00:42

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v2][GIT PULL][v2.6.35] tracing/events: Convert format output to seq_file

On Mon, 2010-06-07 at 16:57 +0900, Kei Tokunaga wrote:

> There's been an oversight... Thanks a lot for finding and fixing
> this! I built a kernel with the patch applied and confirmed the
> issue is fixed on my box.
>
> Tested-by: Kei Tokunaga <[email protected]>

Thanks Kei!

>
> > Reviewed-by: Li Zefan <[email protected]>
> > Cc: Martin K. Petersen <[email protected]>
> > Cc: Kei Tokunaga <[email protected]>
> > Cc: James Bottomley <[email protected]>
> > Cc: Tomohiro Kusumi <[email protected]>
> > Cc: Xiao Guangrong <[email protected]>
> > Signed-off-by: Steven Rostedt <[email protected]>
> >
> >

Ingo,

Can we get this into 35? Without this patch, the two new SCSI
tracepoints are useless for tracing (this includes perf trace).
Actually, the in kernel pretty-printing will still work fine without
this patch.

If you want, you can pull this patch in from email, and add Kei's
Tested-by tag too.

Thanks,

-- Steve