LinuxLists.cc - [PATCH 2/3] ftrace: use struct pid

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

Steven Rostedt <[email protected]> writes:

> From: Steven Rostedt <[email protected]>
>
> Impact: clean up
>
> Eric Biederman suggested using the struct pid for filtering on
> pids in the kernel. This patch is based off of a demonstration
> of an implementation that Eric sent me in an email.

A little nit. I forgot to mention rcu_read_lock() is still needed in that
email.

> Signed-off-by: Steven Rostedt <[email protected]>
> ---
> kernel/trace/ftrace.c | 76 ++++++++++++++++++++++++++++--------------------
> kernel/trace/trace.h | 4 +-
> 2 files changed, 46 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 57592a9..10b1d7c 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -48,7 +48,7 @@ int ftrace_enabled __read_mostly;
> static int last_ftrace_enabled;
>
> /* set when tracing only a pid */
> -int ftrace_pid_trace = -1;
> +struct pid *ftrace_pid_trace;
>
> /* Quick disabling of function tracer. */
> int function_trace_stop;
> @@ -153,7 +153,7 @@ static int __register_ftrace_function(struct ftrace_ops
> *ops)
> else
> func = ftrace_list_func;
>
> - if (ftrace_pid_trace >= 0) {
> + if (ftrace_pid_trace) {
> set_ftrace_pid_function(func);
> func = ftrace_pid_func;
> }
> @@ -209,7 +209,7 @@ static int __unregister_ftrace_function(struct ftrace_ops
> *ops)
> if (ftrace_list->next == &ftrace_list_end) {
> ftrace_func_t func = ftrace_list->func;
>
> - if (ftrace_pid_trace >= 0) {
> + if (ftrace_pid_trace) {
> set_ftrace_pid_function(func);
> func = ftrace_pid_func;
> }
> @@ -239,7 +239,7 @@ static void ftrace_update_pid_func(void)
>
> func = ftrace_trace_function;
>
> - if (ftrace_pid_trace >= 0) {
> + if (ftrace_pid_trace) {
> set_ftrace_pid_function(func);
> func = ftrace_pid_func;
> } else {
> @@ -1678,18 +1678,40 @@ ftrace_pid_read(struct file *file, char __user *ubuf,
> char buf[64];
> int r;
>
> - if (ftrace_pid_trace >= 0)
> - r = sprintf(buf, "%u\n", ftrace_pid_trace);
> + if (ftrace_pid_trace)
> + r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
> else
> r = sprintf(buf, "no pid\n");
>
> return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
> }
>
> +static void clear_ftrace_pid_task(struct pid **pid)
> +{
> + struct task_struct *p;
> +
rcu_read_lock();

> + do_each_pid_task(*pid, PIDTYPE_PID, p) {
> + clear_tsk_trace_trace(p);
> + } while_each_pid_task(*pid, PIDTYPE_PID, p);
rcu_read_unlock()

> + put_pid(*pid);
> +
> + *pid = NULL;
> +}
> +
> +static void set_ftrace_pid_task(struct pid *pid)
> +{
> + struct task_struct *p;
> +
rcu_read_lock();
> + do_each_pid_task(pid, PIDTYPE_PID, p) {
> + set_tsk_trace_trace(p);
> + } while_each_pid_task(pid, PIDTYPE_PID, p);
rcu_read_unlock();
> +}
> +
> static ssize_t
> ftrace_pid_write(struct file *filp, const char __user *ubuf,
> size_t cnt, loff_t *ppos)
> {
> + struct pid *pid;
> char buf[64];
> long val;
> int ret;
> @@ -1707,40 +1729,30 @@ ftrace_pid_write(struct file *filp, const char __user
> *ubuf,
> return ret;
>
> mutex_lock(&ftrace_start_lock);
> - if (ret < 0) {
> + if (val < 0) {
> /* disable pid tracing */
> - if (ftrace_pid_trace < 0)
> + if (!ftrace_pid_trace)
> goto out;
> - ftrace_pid_trace = -1;
> +
> + clear_ftrace_pid_task(&ftrace_pid_trace);
>
> } else {
> - struct task_struct *p;
> - int found = 0;
> + pid = find_get_pid(val);
>
> - if (ftrace_pid_trace == val)
> + if (pid == ftrace_pid_trace) {
> + put_pid(pid);
> goto out;
> -
> - /*
> - * Find the task that matches this pid.
> - * TODO: use pid namespaces instead.
> - */
> - rcu_read_lock();
> - for_each_process(p) {
> - if (p->pid == val) {
> - found = 1;
> - set_tsk_trace_trace(p);
> - } else if (test_tsk_trace_trace(p))
> - clear_tsk_trace_trace(p);
> }
> - rcu_read_unlock();
>
> - if (found)
> - ftrace_pid_trace = val;
> - else {
> - if (ftrace_pid_trace < 0)
> - goto out;
> - ftrace_pid_trace = -1;
> - }
> + if (ftrace_pid_trace)
> + clear_ftrace_pid_task(&ftrace_pid_trace);
> +
> + if (!pid)
> + goto out;
> +
> + ftrace_pid_trace = pid;
> +
> + set_ftrace_pid_task(ftrace_pid_trace);
> }
>
> /* update the function call */
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 95fff37..8b81b4d 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -541,11 +541,11 @@ print_graph_function(struct trace_iterator *iter)
> }
> #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>
> -extern int ftrace_pid_trace;
> +extern struct pid *ftrace_pid_trace;
>
> static inline int ftrace_trace_task(struct task_struct *task)
> {
> - if (ftrace_pid_trace < 0)
> + if (ftrace_pid_trace)
> return 1;
>
> return test_tsk_trace_trace(task);
> --
> 1.5.6.5
>
> --

2008-12-04 12:56:32

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

On Thu, 2008-12-04 at 04:42 -0800, Eric W. Biederman wrote:
>
> > +static void clear_ftrace_pid_task(struct pid **pid)
> > +{
> > + struct task_struct *p;
> > +
> rcu_read_lock();
>
> > + do_each_pid_task(*pid, PIDTYPE_PID, p) {
> > + clear_tsk_trace_trace(p);
> > + } while_each_pid_task(*pid, PIDTYPE_PID, p);
> rcu_read_unlock()
>
> > + put_pid(*pid);
> > +
> > + *pid = NULL;
> > +}

Could we get away with sticking the rcu_read_{un}lock() inside those
macros? Those are going to get used in pretty high level code and we're
allowed to nest rcu_read_lock(). No danger of deadlocks or lock
inversions.

-- Dave

2008-12-04 13:06:35

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

2008-12-04 13:07:41

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

On Thu, 2008-12-04 at 04:56 -0800, Dave Hansen wrote:
> On Thu, 2008-12-04 at 04:42 -0800, Eric W. Biederman wrote:
> >
> > > +static void clear_ftrace_pid_task(struct pid **pid)
> > > +{
> > > + struct task_struct *p;
> > > +
> > rcu_read_lock();
> >
> > > + do_each_pid_task(*pid, PIDTYPE_PID, p) {
> > > + clear_tsk_trace_trace(p);
> > > + } while_each_pid_task(*pid, PIDTYPE_PID, p);
> > rcu_read_unlock()
> >
> > > + put_pid(*pid);
> > > +
> > > + *pid = NULL;
> > > +}
>
> Could we get away with sticking the rcu_read_{un}lock() inside those
> macros? Those are going to get used in pretty high level code and we're
> allowed to nest rcu_read_lock(). No danger of deadlocks or lock
> inversions.

Why don't any of the other users of do_each_pid_task() use
rcu_read_lock()? They all seem to be under read_lock(&tasklist_lock)
(except one is under a write lock of the same).

-- Dave

2008-12-04 13:46:33

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

Dave Hansen <[email protected]> writes:

> On Thu, 2008-12-04 at 04:56 -0800, Dave Hansen wrote:
>> On Thu, 2008-12-04 at 04:42 -0800, Eric W. Biederman wrote:
>> >
>> > > +static void clear_ftrace_pid_task(struct pid **pid)
>> > > +{
>> > > + struct task_struct *p;
>> > > +
>> > rcu_read_lock();
>> >
>> > > + do_each_pid_task(*pid, PIDTYPE_PID, p) {
>> > > + clear_tsk_trace_trace(p);
>> > > + } while_each_pid_task(*pid, PIDTYPE_PID, p);
>> > rcu_read_unlock()
>> >
>> > > + put_pid(*pid);
>> > > +
>> > > + *pid = NULL;
>> > > +}
>>
>> Could we get away with sticking the rcu_read_{un}lock() inside those
>> macros? Those are going to get used in pretty high level code and we're
>> allowed to nest rcu_read_lock(). No danger of deadlocks or lock
>> inversions.
>
> Why don't any of the other users of do_each_pid_task() use
> rcu_read_lock()? They all seem to be under read_lock(&tasklist_lock)
> (except one is under a write lock of the same).

We probably should. Historically read_lock(&tasklist_lock) implies
rcu_read_lock(). And the tasklist lock is what we hold when it is safe.

But if you look at find_vpid we should be holding just the rcu lock there.

Eric

2008-12-04 14:23:20

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

On Thu, 4 Dec 2008, Eric W. Biederman wrote:

> Steven Rostedt <[email protected]> writes:
>
> > From: Steven Rostedt <[email protected]>
> >
> > Impact: clean up
> >
> > Eric Biederman suggested using the struct pid for filtering on
> > pids in the kernel. This patch is based off of a demonstration
> > of an implementation that Eric sent me in an email.
>
> Please find_get_vpid and pid_vnr.

I could not find a "find_get_vpid".

-- Steve

2008-12-04 14:29:22

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

On Thu, 4 Dec 2008, Dave Hansen wrote:

> On Thu, 2008-12-04 at 04:56 -0800, Dave Hansen wrote:
> > On Thu, 2008-12-04 at 04:42 -0800, Eric W. Biederman wrote:
> > >
> > > > +static void clear_ftrace_pid_task(struct pid **pid)
> > > > +{
> > > > + struct task_struct *p;
> > > > +
> > > rcu_read_lock();
> > >
> > > > + do_each_pid_task(*pid, PIDTYPE_PID, p) {
> > > > + clear_tsk_trace_trace(p);
> > > > + } while_each_pid_task(*pid, PIDTYPE_PID, p);
> > > rcu_read_unlock()
> > >
> > > > + put_pid(*pid);
> > > > +
> > > > + *pid = NULL;
> > > > +}
> >
> > Could we get away with sticking the rcu_read_{un}lock() inside those
> > macros? Those are going to get used in pretty high level code and we're
> > allowed to nest rcu_read_lock(). No danger of deadlocks or lock
> > inversions.
>
> Why don't any of the other users of do_each_pid_task() use
> rcu_read_lock()? They all seem to be under read_lock(&tasklist_lock)
> (except one is under a write lock of the same).

Well, if the pid hashes are traversal safe (rcu style), then we only worry
about a node or task being freed. I'm assuming that the node is protected
via RCU as tasks are, then using only rcu_read_lock should be sufficient.

-- Steve

2008-12-04 14:36:36

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

Steven Rostedt <[email protected]> writes:

> On Thu, 4 Dec 2008, Eric W. Biederman wrote:
>
>> Steven Rostedt <[email protected]> writes:
>>
>> > From: Steven Rostedt <[email protected]>
>> >
>> > Impact: clean up
>> >
>> > Eric Biederman suggested using the struct pid for filtering on
>> > pids in the kernel. This patch is based off of a demonstration
>> > of an implementation that Eric sent me in an email.
>>
>> Please find_get_vpid and pid_vnr.
>
> I could not find a "find_get_vpid".

Doh. Grumble Pavel Grumble.

find_get_pid is the right one.

Sorry. We have a stupid inconsistency in the naming here,
If we were consistent it would be find_get_vpid.

pid_vnr in that case then pid_vnr is definitely what you want
to use when talking to user space.

Eric

2008-12-04 15:13:04

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

On Thu, 2008-12-04 at 05:40 -0800, Eric W. Biederman wrote:
> Dave Hansen <[email protected]> writes:
> > On Thu, 2008-12-04 at 04:56 -0800, Dave Hansen wrote:
> >> On Thu, 2008-12-04 at 04:42 -0800, Eric W. Biederman wrote:
> >> >
> >> > > +static void clear_ftrace_pid_task(struct pid **pid)
> >> > > +{
> >> > > + struct task_struct *p;
> >> > > +
> >> > rcu_read_lock();
> >> >
> >> > > + do_each_pid_task(*pid, PIDTYPE_PID, p) {
> >> > > + clear_tsk_trace_trace(p);
> >> > > + } while_each_pid_task(*pid, PIDTYPE_PID, p);
> >> > rcu_read_unlock()
> >> >
> >> > > + put_pid(*pid);
> >> > > +
> >> > > + *pid = NULL;
> >> > > +}
> >>
> >> Could we get away with sticking the rcu_read_{un}lock() inside those
> >> macros? Those are going to get used in pretty high level code and we're
> >> allowed to nest rcu_read_lock(). No danger of deadlocks or lock
> >> inversions.
> >
> > Why don't any of the other users of do_each_pid_task() use
> > rcu_read_lock()? They all seem to be under read_lock(&tasklist_lock)
> > (except one is under a write lock of the same).
>
> We probably should. Historically read_lock(&tasklist_lock) implies
> rcu_read_lock().

You mean because the current task can't go through a quiescent period
until it hits userspace, and we can't go to userspace while holding
read_lock()? Nah, that's not subtle. ;)

> And the tasklist lock is what we hold when it is safe.
>
> But if you look at find_vpid we should be holding just the rcu lock there.

Yup, I see it there.

So, any reason not to do this? Brown-bag compile tested.

Signed-off-by: Dave Hansen <[email protected]>

---

linux-2.6.git-dave/include/linux/pid.h | 2 ++
1 file changed, 2 insertions(+)

diff -puN include/linux/pid.h~put-rcu-ops-in-do_each_pid_task include/linux/pid.h
--- linux-2.6.git/include/linux/pid.h~put-rcu-ops-in-do_each_pid_task 2008-12-04 06:03:09.000000000 -0800
+++ linux-2.6.git-dave/include/linux/pid.h 2008-12-04 06:19:35.000000000 -0800
@@ -147,6 +147,7 @@ pid_t pid_vnr(struct pid *pid);
#define do_each_pid_task(pid, type, task) \
do { \
struct hlist_node *pos___; \
+ rcu_read_lock(); \
if (pid != NULL) \
hlist_for_each_entry_rcu((task), pos___, \
&pid->tasks[type], pids[type].node) {
@@ -159,6 +160,7 @@ pid_t pid_vnr(struct pid *pid);
if (type == PIDTYPE_PID) \
break; \
} \
+ rcu_read_unlock(); \
} while (0)

#define do_each_pid_thread(pid, type, task) \
_

-- Dave

2008-12-04 15:35:51

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

On Thu, 4 Dec 2008, Dave Hansen wrote:
> > >>
> > >> Could we get away with sticking the rcu_read_{un}lock() inside those
> > >> macros? Those are going to get used in pretty high level code and we're
> > >> allowed to nest rcu_read_lock(). No danger of deadlocks or lock
> > >> inversions.
> > >
> > > Why don't any of the other users of do_each_pid_task() use
> > > rcu_read_lock()? They all seem to be under read_lock(&tasklist_lock)
> > > (except one is under a write lock of the same).
> >
> > We probably should. Historically read_lock(&tasklist_lock) implies
> > rcu_read_lock().
>
> You mean because the current task can't go through a quiescent period
> until it hits userspace, and we can't go to userspace while holding
> read_lock()? Nah, that's not subtle. ;)

Has nothing to do with userspace. We can not go through a quiescent period
while holding a rcu_read_lock, or if preemption is disabled. read_lock
prevents preemption, as does spin_locks.

>
> > And the tasklist lock is what we hold when it is safe.
> >
> > But if you look at find_vpid we should be holding just the rcu lock there.
>
> Yup, I see it there.
>
> So, any reason not to do this? Brown-bag compile tested.
>
> Signed-off-by: Dave Hansen <[email protected]>
>
> ---
>
> linux-2.6.git-dave/include/linux/pid.h | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff -puN include/linux/pid.h~put-rcu-ops-in-do_each_pid_task include/linux/pid.h
> --- linux-2.6.git/include/linux/pid.h~put-rcu-ops-in-do_each_pid_task 2008-12-04 06:03:09.000000000 -0800
> +++ linux-2.6.git-dave/include/linux/pid.h 2008-12-04 06:19:35.000000000 -0800
> @@ -147,6 +147,7 @@ pid_t pid_vnr(struct pid *pid);
> #define do_each_pid_task(pid, type, task) \
> do { \
> struct hlist_node *pos___; \
> + rcu_read_lock(); \
> if (pid != NULL) \
> hlist_for_each_entry_rcu((task), pos___, \
> &pid->tasks[type], pids[type].node) {
> @@ -159,6 +160,7 @@ pid_t pid_vnr(struct pid *pid);
> if (type == PIDTYPE_PID) \
> break; \
> } \
> + rcu_read_unlock(); \
> } while (0)

That probably could work.

-- Steve

2008-12-04 15:43:53

[permalink] [raw]

Subject: Re: [PATCH 2/3] ftrace: use struct pid

2008-12-04 15:45:05