2021-10-05 21:40:02

by Imran Khan

[permalink] [raw]
Subject: [RFC PATCH v2] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

Currently show_workqueue_state shows the state of all workqueues and of
all worker pools. In certain cases we may need to dump state of only a
specific workqueue or worker pool. For example in destroy_workqueue we
only need to show state of the workqueue which is getting destroyed.

So divide show_workqueue_state into more granular functions
(__show_workqueue_state and __show_worker_pool_state), that would show
states of individual workqueues and worker pools and can be used in
cases such as the one mentioned above.

Also, as mentioned earlier, make destroy_workqueue dump data pertaining
to only the workqueue that is being destroyed.

Signed-off-by: Imran Khan <[email protected]>
---
Changes in v2:
- Update patch description
- Dump state of only relevant workqueue in destroy_workqueue

include/linux/workqueue.h | 1 +
kernel/workqueue.c | 142 ++++++++++++++++++++++----------------
2 files changed, 82 insertions(+), 61 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2ebef6b1a3d6..e7b7c3f231e5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -471,6 +471,7 @@ extern unsigned int work_busy(struct work_struct *work);
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
extern void print_worker_info(const char *log_lvl, struct task_struct *task);
extern void show_workqueue_state(void);
+extern void __show_workqueue_state(struct workqueue_struct *wq);
extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);

/**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9a042a449002..21f9b615c255 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -375,6 +375,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
static void show_pwq(struct pool_workqueue *pwq);
+static void __show_worker_pool_state(struct worker_pool *pool);

#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
@@ -4447,7 +4448,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
raw_spin_unlock_irq(&pwq->pool->lock);
mutex_unlock(&wq->mutex);
mutex_unlock(&wq_pool_mutex);
- show_workqueue_state();
+ __show_workqueue_state(wq);
return;
}
raw_spin_unlock_irq(&pwq->pool->lock);
@@ -4796,6 +4797,81 @@ static void show_pwq(struct pool_workqueue *pwq)
}
}

+/**
+ * __show_workqueue_state - dump state of specified workqueue
+ * @wq: workqueue whose state will be printed
+ */
+void __show_workqueue_state(struct workqueue_struct *wq)
+{
+ struct pool_workqueue *pwq;
+ bool idle = true;
+ unsigned long flags;
+
+ for_each_pwq(pwq, wq) {
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+ idle = false;
+ break;
+ }
+ }
+ if (idle) /* Nothing to print for idle workqueue */
+ return;
+
+ pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+ for_each_pwq(pwq, wq) {
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+ if (pwq->nr_active || !list_empty(&pwq->inactive_works))
+ show_pwq(pwq);
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
+ }
+
+}
+
+/**
+ * __show_worker_pool_state - dump state of specified worker pool
+ * @pool: worker pool whose state will be printed
+ */
+static void __show_worker_pool_state(struct worker_pool *pool)
+{
+ struct worker *worker;
+ bool first = true;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pool->lock, flags);
+ if (pool->nr_workers == pool->nr_idle)
+ goto next_pool;
+
+ pr_info("pool %d:", pool->id);
+ pr_cont_pool_info(pool);
+ pr_cont(" hung=%us workers=%d",
+ jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+ pool->nr_workers);
+ if (pool->manager)
+ pr_cont(" manager: %d",
+ task_pid_nr(pool->manager->task));
+ list_for_each_entry(worker, &pool->idle_list, entry) {
+ pr_cont(" %s%d", first ? "idle: " : "",
+ task_pid_nr(worker->task));
+ first = false;
+ }
+ pr_cont("\n");
+next_pool:
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
+ /*
+ * We could be printing a lot from atomic context, e.g.
+ * sysrq-t -> show_workqueue_state(). Avoid triggering
+ * hard lockup.
+ */
+ touch_nmi_watchdog();
+
+}
+
/**
* show_workqueue_state - dump workqueue state
*
@@ -4806,73 +4882,17 @@ void show_workqueue_state(void)
{
struct workqueue_struct *wq;
struct worker_pool *pool;
- unsigned long flags;
int pi;

rcu_read_lock();

pr_info("Showing busy workqueues and worker pools:\n");

- list_for_each_entry_rcu(wq, &workqueues, list) {
- struct pool_workqueue *pwq;
- bool idle = true;
-
- for_each_pwq(pwq, wq) {
- if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
- idle = false;
- break;
- }
- }
- if (idle)
- continue;
-
- pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
-
- for_each_pwq(pwq, wq) {
- raw_spin_lock_irqsave(&pwq->pool->lock, flags);
- if (pwq->nr_active || !list_empty(&pwq->inactive_works))
- show_pwq(pwq);
- raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
- /*
- * We could be printing a lot from atomic context, e.g.
- * sysrq-t -> show_workqueue_state(). Avoid triggering
- * hard lockup.
- */
- touch_nmi_watchdog();
- }
- }
-
- for_each_pool(pool, pi) {
- struct worker *worker;
- bool first = true;
+ list_for_each_entry_rcu(wq, &workqueues, list)
+ __show_workqueue_state(wq);

- raw_spin_lock_irqsave(&pool->lock, flags);
- if (pool->nr_workers == pool->nr_idle)
- goto next_pool;
-
- pr_info("pool %d:", pool->id);
- pr_cont_pool_info(pool);
- pr_cont(" hung=%us workers=%d",
- jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
- pool->nr_workers);
- if (pool->manager)
- pr_cont(" manager: %d",
- task_pid_nr(pool->manager->task));
- list_for_each_entry(worker, &pool->idle_list, entry) {
- pr_cont(" %s%d", first ? "idle: " : "",
- task_pid_nr(worker->task));
- first = false;
- }
- pr_cont("\n");
- next_pool:
- raw_spin_unlock_irqrestore(&pool->lock, flags);
- /*
- * We could be printing a lot from atomic context, e.g.
- * sysrq-t -> show_workqueue_state(). Avoid triggering
- * hard lockup.
- */
- touch_nmi_watchdog();
- }
+ for_each_pool(pool, pi)
+ __show_worker_pool_state(pool);

rcu_read_unlock();
}

base-commit: 29616f67fcbd6b26242440372a3b0f0a085109c6
--
2.30.2


2021-10-11 16:50:49

by Tejun Heo

[permalink] [raw]
Subject: Re: [RFC PATCH v2] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

On Wed, Oct 06, 2021 at 08:38:41AM +1100, Imran Khan wrote:
> So divide show_workqueue_state into more granular functions
> (__show_workqueue_state and __show_worker_pool_state), that would show

This is nit-picky but can we please name them sth like
show_one_workqueue_state() and show_one_worker_pool_state() or just
show_one_workqueue() and show_one_worker_pool() with the corresponding
versions renamed to show_all_workqueues() and show_all_worker_pools()?

Thanks.

--
tejun

2021-10-18 03:25:06

by Imran Khan

[permalink] [raw]
Subject: Re: [RFC PATCH v2] workqueue: Introduce __show_worker_pool_state and __show_workqueue_state.

Hi Tejun,
Sorry for getting back late on this.

On 12/10/21 3:48 am, Tejun Heo wrote:
> On Wed, Oct 06, 2021 at 08:38:41AM +1100, Imran Khan wrote:
>> So divide show_workqueue_state into more granular functions
>> (__show_workqueue_state and __show_worker_pool_state), that would show
>
> This is nit-picky but can we please name them sth like
> show_one_workqueue_state() and show_one_worker_pool_state() or just
> show_one_workqueue() and show_one_worker_pool() with the corresponding
> versions renamed to show_all_workqueues() and show_all_worker_pools()?
>

Yes. This sounds good to me. I have renamed the functions as
show_one_workqueue() and show_one_worker_pool(). On the same lines I
have renamed show_workqueue_state() to show_all_workqueues(). These
changes are available in v3 of the patch [1].

[1]
https://lore.kernel.org/lkml/[email protected]/

Thanks again for reviewing this.

-- Imran
> Thanks.
>