LinuxLists.cc - [RFC PATCH] workqueue: Introduce __show_worker_pool_state and __show_workqueue

2021-09-28 10:32:05

Subject: [RFC PATCH] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

Currently show_workqueue_state shows the state of all workqueues and of
all worker pools.
Divide it into more granular functions (__show_workqueue_state and
__show_worker_pool_state), that would show states of individual workqueues
and worker pools.

Signed-off-by: Imran Khan <[email protected]>
---
kernel/workqueue.c | 139 ++++++++++++++++++++++++++-------------------
1 file changed, 79 insertions(+), 60 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9a042a449002..cfae0c8d5e2e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4796,6 +4796,81 @@ static void show_pwq(struct pool_workqueue *pwq)
}
}

+/**
+ * __show_workqueue_state - dump state of specified workqueue
+ * @wq: workqueue whose state will be printed
+ */
+static void __show_workqueue_state(struct workqueue_struct *wq)
+{
+ struct pool_workqueue *pwq;
+ bool idle = true;
+ unsigned long flags;
+
+ for_each_pwq(pwq, wq) {
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+ idle = false;
+ break;
+ }
+ }
+ if (idle) /* Nothing to print for idle workqueue */
+ return;
+
+ pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+ for_each_pwq(pwq, wq) {
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works))
+ show_pwq(pwq);
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
+ }
+
+}
+
+/**
+ * __show_worker_pool_state - dump state of specified worker pool
+ * @pool: worker pool whose state will be printed
+ */
+static void __show_worker_pool_state(struct worker_pool *pool)
+{
+ struct worker *worker;
+ bool first = true;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pool->lock, flags);
+ if (pool->nr_workers == pool->nr_idle)
+ goto next_pool;
+
+ pr_info("pool %d:", pool->id);
+ pr_cont_pool_info(pool);
+ pr_cont(" hung=%us workers=%d",
+ jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+ pool->nr_workers);
+ if (pool->manager)
+ pr_cont(" manager: %d",
+ task_pid_nr(pool->manager->task));
+ list_for_each_entry(worker, &pool->idle_list, entry) {
+ pr_cont(" %s%d", first ? "idle: " : "",
+ task_pid_nr(worker->task));
+ first = false;
+ }
+ pr_cont("\n");
+next_pool:
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
+
+}
+
/**
* show_workqueue_state - dump workqueue state
*
@@ -4806,73 +4881,17 @@ void show_workqueue_state(void)
{
struct workqueue_struct *wq;
struct worker_pool *pool;
- unsigned long flags;
int pi;

rcu_read_lock();

pr_info("Showing busy workqueues and worker pools:\n");

- list_for_each_entry_rcu(wq, &workqueues, list) {
- struct pool_workqueue *pwq;
- bool idle = true;
-
- for_each_pwq(pwq, wq) {
- if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
- idle = false;
- break;
- }
- }
- if (idle)
- continue;
-
- pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
-
- for_each_pwq(pwq, wq) {
- raw_spin_lock_irqsave(&pwq->pool->lock, flags);
- if (pwq->nr_active || !list_empty(&pwq->inactive_works))
- show_pwq(pwq);
- raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
- /*
- * We could be printing a lot from atomic context, e.g.
- * sysrq-t -> show_workqueue_state(). Avoid triggering
- * hard lockup.
- */
- touch_nmi_watchdog();
- }
- }
-
- for_each_pool(pool, pi) {
- struct worker *worker;
- bool first = true;
+ list_for_each_entry_rcu(wq, &workqueues, list)
+ __show_workqueue_state(wq);

- raw_spin_lock_irqsave(&pool->lock, flags);
- if (pool->nr_workers == pool->nr_idle)
- goto next_pool;
-
- pr_info("pool %d:", pool->id);
- pr_cont_pool_info(pool);
- pr_cont(" hung=%us workers=%d",
- jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
- pool->nr_workers);
- if (pool->manager)
- pr_cont(" manager: %d",
- task_pid_nr(pool->manager->task));
- list_for_each_entry(worker, &pool->idle_list, entry) {
- pr_cont(" %s%d", first ? "idle: " : "",
- task_pid_nr(worker->task));
- first = false;
- }
- pr_cont("\n");
- next_pool:
- raw_spin_unlock_irqrestore(&pool->lock, flags);
- /*
- * We could be printing a lot from atomic context, e.g.
- * sysrq-t -> show_workqueue_state(). Avoid triggering
- * hard lockup.
- */
- touch_nmi_watchdog();
- }
+ for_each_pool(pool, pi)
+ __show_worker_pool_state(pool);

rcu_read_unlock();
}

base-commit: 64f28502801aad0cb4f97102b4b5a81df315c2aa
--
2.30.2

2021-10-05 00:11:13

by Tejun Heo

[permalink] [raw]

Subject: Re: [RFC PATCH] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

On Tue, Sep 28, 2021 at 08:31:06PM +1000, Imran Khan wrote:
> Currently show_workqueue_state shows the state of all workqueues and of
> all worker pools.
> Divide it into more granular functions (__show_workqueue_state and
> __show_worker_pool_state), that would show states of individual workqueues
> and worker pools.

But why is this change good? Are you building something on top later?

Thanks.

--
tejun

2021-10-05 09:58:08

by Imran Khan

[permalink] [raw]

Subject: Re: [RFC PATCH] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

On 5/10/21 3:42 am, Tejun Heo wrote:
> On Tue, Sep 28, 2021 at 08:31:06PM +1000, Imran Khan wrote:
>> Currently show_workqueue_state shows the state of all workqueues and of
>> all worker pools.
>> Divide it into more granular functions (__show_workqueue_state and
>> __show_worker_pool_state), that would show states of individual workqueues
>> and worker pools.
>
> But why is this change good? Are you building something on top later?
>

The main motive was to dump data pertaining to only that workqueue which
is being destroyed in destroy_workqueue.

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cfae0c8d5e2e..e191646dd3e4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4447,7 +4447,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
raw_spin_unlock_irq(&pwq->pool->lock);
mutex_unlock(&wq->mutex);
mutex_unlock(&wq_pool_mutex);
- show_workqueue_state();
+ __show_workqueue_state(wq);
return;
}
raw_spin_unlock_irq(&pwq->pool->lock);

Please let me know if I am missing some information that may be helpful
here and can be obtained only by dumping the states of other workqueues.

Then while going through the code, it seemed better to break
show_workqueue_state into more granular entities which can be used later
for cases such as one mentioned above.

Thanks for reviewing this.

-- Imran

> Thanks.
>

2021-10-05 16:11:42

by Tejun Heo

[permalink] [raw]

Subject: Re: [RFC PATCH] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

Hello,

On Tue, Oct 05, 2021 at 08:55:47PM +1100, [email protected] wrote:
> Please let me know if I am missing some information that may be helpful here
> and can be obtained only by dumping the states of other workqueues.
>
> Then while going through the code, it seemed better to break
> show_workqueue_state into more granular entities which can be used later for
> cases such as one mentioned above.

Both sound reasonable to me. Do you mind updating the description?

Thanks.

--
tejun

2021-10-05 21:47:50

by Imran Khan

[permalink] [raw]

Subject: Re: [RFC PATCH] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

Hi Tejun,

On 6/10/21 3:09 am, Tejun Heo wrote:
> Hello,
>
> On Tue, Oct 05, 2021 at 08:55:47PM +1100, [email protected] wrote:
>> Please let me know if I am missing some information that may be helpful here
>> and can be obtained only by dumping the states of other workqueues.
>>
>> Then while going through the code, it seemed better to break
>> show_workqueue_state into more granular entities which can be used later for
>> cases such as one mentioned above.
>
> Both sound reasonable to me. Do you mind updating the description?
>
I have updated the description in v2 of patch [1].

[1]
https://lore.kernel.org/lkml/[email protected]/

Thanks,
Imran

> Thanks.
>