1)Iterate throw all of threads in the system.
Check for all threads, not only for group leaders.
2)Check for p->on_rq instead of p->state and cputime.
Preempted task in !TASK_RUNNING state OR just
created task may be queued, that we want to be
reported too.
3)Use read_lock() instead of write_lock().
This function does not change any structures, and
read_lock() is enough.
Signed-off-by: Kirill Tkhai <[email protected]>
CC: Srikar Dronamraju <[email protected]>
CC: Mike Galbraith <[email protected]>
CC: Peter Zijlstra <[email protected]>
CC: Ingo Molnar <[email protected]>
---
kernel/cpu.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a343bde..81e2a38 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -274,21 +274,28 @@ void clear_tasks_mm_cpumask(int cpu)
rcu_read_unlock();
}
-static inline void check_for_tasks(int cpu)
+static inline void check_for_tasks(int dead_cpu)
{
- struct task_struct *p;
- cputime_t utime, stime;
+ struct task_struct *g, *p;
- write_lock_irq(&tasklist_lock);
- for_each_process(p) {
- task_cputime(p, &utime, &stime);
- if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
- (utime || stime))
- pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n",
- p->comm, task_pid_nr(p), cpu,
- p->state, p->flags);
- }
- write_unlock_irq(&tasklist_lock);
+ read_lock_irq(&tasklist_lock);
+ do_each_thread(g, p) {
+ if (!p->on_rq)
+ continue;
+ /*
+ * We do the check with unlocked task_rq(p)->lock.
+ * Order the reading to do not warn about a task,
+ * which was running on this cpu in the past, and
+ * it's just been woken on another cpu.
+ */
+ rmb();
+ if (task_cpu(p) != dead_cpu)
+ continue;
+
+ pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
+ p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
+ } while_each_thread(g, p);
+ read_unlock_irq(&tasklist_lock);
}
struct take_cpu_down_param {
On Tue, Jun 17, 2014 at 05:24:22PM +0400, Kirill Tkhai wrote:
>
> 1)Iterate throw all of threads in the system.
thru
> Check for all threads, not only for group leaders.
>
> 2)Check for p->on_rq instead of p->state and cputime.
> Preempted task in !TASK_RUNNING state OR just
> created task may be queued, that we want to be
> reported too.
>
> 3)Use read_lock() instead of write_lock().
> This function does not change any structures, and
> read_lock() is enough.
>
> Signed-off-by: Kirill Tkhai <[email protected]>
> CC: Srikar Dronamraju <[email protected]>
> CC: Mike Galbraith <[email protected]>
> CC: Peter Zijlstra <[email protected]>
> CC: Ingo Molnar <[email protected]>
> ---
> kernel/cpu.c | 33 ++++++++++++++++++++-------------
> 1 file changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index a343bde..81e2a38 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -274,21 +274,28 @@ void clear_tasks_mm_cpumask(int cpu)
> rcu_read_unlock();
> }
>
> -static inline void check_for_tasks(int cpu)
> +static inline void check_for_tasks(int dead_cpu)
> {
> - struct task_struct *p;
> - cputime_t utime, stime;
> + struct task_struct *g, *p;
>
> - write_lock_irq(&tasklist_lock);
> - for_each_process(p) {
> - task_cputime(p, &utime, &stime);
> - if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
> - (utime || stime))
> - pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n",
> - p->comm, task_pid_nr(p), cpu,
> - p->state, p->flags);
> - }
> - write_unlock_irq(&tasklist_lock);
> + read_lock_irq(&tasklist_lock);
> + do_each_thread(g, p) {
> + if (!p->on_rq)
> + continue;
> + /*
> + * We do the check with unlocked task_rq(p)->lock.
> + * Order the reading to do not warn about a task,
> + * which was running on this cpu in the past, and
> + * it's just been woken on another cpu.
> + */
> + rmb();
smp_rmb();
> + if (task_cpu(p) != dead_cpu)
> + continue;
But because we don't have rq->lock held, we can be in the middle of a
wakeup and miss a task.
Then again, I suppose anything without rq->lock can and will miss tasks.
> + pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
> + p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
> + } while_each_thread(g, p);
> + read_unlock_irq(&tasklist_lock);
> }
>
> struct take_cpu_down_param {
>
>
>
В Пн, 23/06/2014 в 12:24 +0200, Peter Zijlstra пишет:
> On Tue, Jun 17, 2014 at 05:24:22PM +0400, Kirill Tkhai wrote:
> >
> > 1)Iterate throw all of threads in the system.
>
> thru
Thanks :)
>
> > Check for all threads, not only for group leaders.
> >
> > 2)Check for p->on_rq instead of p->state and cputime.
> > Preempted task in !TASK_RUNNING state OR just
> > created task may be queued, that we want to be
> > reported too.
> >
> > 3)Use read_lock() instead of write_lock().
> > This function does not change any structures, and
> > read_lock() is enough.
> >
> > Signed-off-by: Kirill Tkhai <[email protected]>
> > CC: Srikar Dronamraju <[email protected]>
> > CC: Mike Galbraith <[email protected]>
> > CC: Peter Zijlstra <[email protected]>
> > CC: Ingo Molnar <[email protected]>
> > ---
> > kernel/cpu.c | 33 ++++++++++++++++++++-------------
> > 1 file changed, 20 insertions(+), 13 deletions(-)
> >
> > diff --git a/kernel/cpu.c b/kernel/cpu.c
> > index a343bde..81e2a38 100644
> > --- a/kernel/cpu.c
> > +++ b/kernel/cpu.c
> > @@ -274,21 +274,28 @@ void clear_tasks_mm_cpumask(int cpu)
> > rcu_read_unlock();
> > }
> >
> > -static inline void check_for_tasks(int cpu)
> > +static inline void check_for_tasks(int dead_cpu)
> > {
> > - struct task_struct *p;
> > - cputime_t utime, stime;
> > + struct task_struct *g, *p;
> >
> > - write_lock_irq(&tasklist_lock);
> > - for_each_process(p) {
> > - task_cputime(p, &utime, &stime);
> > - if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
> > - (utime || stime))
> > - pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n",
> > - p->comm, task_pid_nr(p), cpu,
> > - p->state, p->flags);
> > - }
> > - write_unlock_irq(&tasklist_lock);
> > + read_lock_irq(&tasklist_lock);
> > + do_each_thread(g, p) {
> > + if (!p->on_rq)
> > + continue;
> > + /*
> > + * We do the check with unlocked task_rq(p)->lock.
> > + * Order the reading to do not warn about a task,
> > + * which was running on this cpu in the past, and
> > + * it's just been woken on another cpu.
> > + */
> > + rmb();
>
> smp_rmb();
>
> > + if (task_cpu(p) != dead_cpu)
> > + continue;
>
> But because we don't have rq->lock held, we can be in the middle of a
> wakeup and miss a task.
>
> Then again, I suppose anything without rq->lock can and will miss tasks.
If we use rq->lock it's possible to move check_for_tasks() to kernel/sched/core.c.
And we can leave TASK_RUNNING check for waking tasks. Maybe something like this?
static inline void check_for_tasks(int dead_cpu)
{
struct task_struct *g, *p;
struct rq *rq = cpu_rq(dead_cpu);
read_lock_irq(&tasklist_lock);
raw_spin_lock(&rq->lock)
do_each_thread(g, p) {
if (!p->on_rq && p->state != TASK_RUNNING)
continue;
if (task_cpu(p) != dead_cpu)
continue;
pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
} while_each_thread(g, p);
raw_spin_unlock(&rq->lock)
read_unlock_irq(&tasklist_lock);
}
It still does not give a 100% guarantee... Should we take p->pi_lock for every task?
> > + pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
> > + p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
> > + } while_each_thread(g, p);
> > + read_unlock_irq(&tasklist_lock);
> > }
> >
> > struct take_cpu_down_param {
> >
> >
> >
Regards,
Kirill
On Mon, Jun 23, 2014 at 02:52:18PM +0400, Kirill Tkhai wrote:
> > Then again, I suppose anything without rq->lock can and will miss tasks.
>
> If we use rq->lock it's possible to move check_for_tasks() to kernel/sched/core.c.
>
> And we can leave TASK_RUNNING check for waking tasks. Maybe something like this?
>
> static inline void check_for_tasks(int dead_cpu)
> {
> struct task_struct *g, *p;
> struct rq *rq = cpu_rq(dead_cpu);
>
> read_lock_irq(&tasklist_lock);
> raw_spin_lock(&rq->lock)
>
> do_each_thread(g, p) {
> if (!p->on_rq && p->state != TASK_RUNNING)
> continue;
> if (task_cpu(p) != dead_cpu)
> continue;
>
> pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
> p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
> } while_each_thread(g, p);
>
> raw_spin_unlock(&rq->lock)
> read_unlock_irq(&tasklist_lock);
> }
>
> It still does not give a 100% guarantee... Should we take p->pi_lock for every task?
seeing how rq->lock nests inside of ->pi_lock that's going to be
somewhat icky.
I think we can live with a false negative, given how much people run
this nonsense it'll trigger eventually.
False positives would be bad though :-)
So I think we can keep your original (lock-free) proposal.