LinuxLists.cc - [RFC] workqueue: remove manual lockdep uses to detect deadlocks

2017-08-25 08:41:11

Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

Hello all,

This is _RFC_.

I want to request for comments about if it's reasonable conceptually. If
yes, I want to resend after working it more carefully.

Could you let me know your opinions about this?

----->8-----
>From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
From: Byungchul Park <[email protected]>
Date: Fri, 25 Aug 2017 17:35:07 +0900
Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

We introduced the following commit to detect deadlocks caused by
wait_for_completion() in flush_{workqueue, work}() and other locks. But
now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
Removed it.

commit 4e6045f134784f4b158b3c0f7a282b04bd816887
workqueue: debug flushing deadlocks with lockdep

Signed-off-by: Byungchul Park <[email protected]>
---
include/linux/workqueue.h | 43 -------------------------------------------
kernel/workqueue.c | 38 --------------------------------------
2 files changed, 81 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index db6dc9d..91d0e14 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,7 +8,6 @@
#include <linux/timer.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
-#include <linux/lockdep.h>
#include <linux/threads.h>
#include <linux/atomic.h>
#include <linux/cpumask.h>
@@ -101,9 +100,6 @@ struct work_struct {
atomic_long_t data;
struct list_head entry;
work_func_t func;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
-#endif
};

#define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
@@ -154,23 +150,10 @@ struct execute_work {
struct work_struct work;
};

-#ifdef CONFIG_LOCKDEP
-/*
- * NB: because we have to copy the lockdep_map, setting _key
- * here is required, otherwise it could get initialised to the
- * copy of the lockdep_map!
- */
-#define __WORK_INIT_LOCKDEP_MAP(n, k) \
- .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
-#else
-#define __WORK_INIT_LOCKDEP_MAP(n, k)
-#endif
-
#define __WORK_INITIALIZER(n, f) { \
.data = WORK_DATA_STATIC_INIT(), \
.entry = { &(n).entry, &(n).entry }, \
.func = (f), \
- __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
}

#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
@@ -211,26 +194,13 @@ static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { }
* assignment of the work data initializer allows the compiler
* to generate better code.
*/
-#ifdef CONFIG_LOCKDEP
#define __INIT_WORK(_work, _func, _onstack) \
do { \
- static struct lock_class_key __key; \
- \
__init_work((_work), _onstack); \
(_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
- lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
INIT_LIST_HEAD(&(_work)->entry); \
(_work)->func = (_func); \
} while (0)
-#else
-#define __INIT_WORK(_work, _func, _onstack) \
- do { \
- __init_work((_work), _onstack); \
- (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
- INIT_LIST_HEAD(&(_work)->entry); \
- (_work)->func = (_func); \
- } while (0)
-#endif

#define INIT_WORK(_work, _func) \
__INIT_WORK((_work), (_func), 0)
@@ -392,22 +362,9 @@ enum {
* RETURNS:
* Pointer to the allocated workqueue on success, %NULL on failure.
*/
-#ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(fmt, flags, max_active, args...) \
-({ \
- static struct lock_class_key __key; \
- const char *__lock_name; \
- \
- __lock_name = #fmt#args; \
- \
- __alloc_workqueue_key((fmt), (flags), (max_active), \
- &__key, __lock_name, ##args); \
-})
-#else
#define alloc_workqueue(fmt, flags, max_active, args...) \
__alloc_workqueue_key((fmt), (flags), (max_active), \
NULL, NULL, ##args)
-#endif

/**
* alloc_ordered_workqueue - allocate an ordered workqueue
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f128b3b..87d4bc2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -40,7 +40,6 @@
#include <linux/freezer.h>
#include <linux/kallsyms.h>
#include <linux/debug_locks.h>
-#include <linux/lockdep.h>
#include <linux/idr.h>
#include <linux/jhash.h>
#include <linux/hashtable.h>
@@ -260,9 +259,6 @@ struct workqueue_struct {
#ifdef CONFIG_SYSFS
struct wq_device *wq_dev; /* I: for sysfs interface */
#endif
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
-#endif
char name[WQ_NAME_LEN]; /* I: workqueue name */

/*
@@ -2024,18 +2020,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
int work_color;
struct worker *collision;
-#ifdef CONFIG_LOCKDEP
- /*
- * It is permissible to free the struct work_struct from
- * inside the function that is called from it, this we need to
- * take into account for lockdep too. To avoid bogus "held
- * lock freed" warnings as well as problems when looking into
- * work->lockdep_map, make a copy and use that here.
- */
- struct lockdep_map lockdep_map;

- lockdep_copy_map(&lockdep_map, &work->lockdep_map);
-#endif
/* ensure we're on the correct CPU */
WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
raw_smp_processor_id() != pool->cpu);
@@ -2091,8 +2076,6 @@ static void process_one_work(struct worker *worker, struct work_struct *work)

spin_unlock_irq(&pool->lock);

- lock_map_acquire_read(&pwq->wq->lockdep_map);
- lock_map_acquire(&lockdep_map);
crossrelease_hist_start(XHLOCK_PROC);
trace_workqueue_execute_start(work);
worker->current_func(work);
@@ -2102,8 +2085,6 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
*/
trace_workqueue_execute_end(work);
crossrelease_hist_end(XHLOCK_PROC);
- lock_map_release(&lockdep_map);
- lock_map_release(&pwq->wq->lockdep_map);

if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
@@ -2598,9 +2579,6 @@ void flush_workqueue(struct workqueue_struct *wq)
if (WARN_ON(!wq_online))
return;

- lock_map_acquire(&wq->lockdep_map);
- lock_map_release(&wq->lockdep_map);
-
mutex_lock(&wq->mutex);

/*
@@ -2825,18 +2803,6 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
insert_wq_barrier(pwq, barr, work, worker);
spin_unlock_irq(&pool->lock);

- /*
- * If @max_active is 1 or rescuer is in use, flushing another work
- * item on the same workqueue may lead to deadlock. Make sure the
- * flusher is not running on the same workqueue by verifying write
- * access.
- */
- if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
- lock_map_acquire(&pwq->wq->lockdep_map);
- else
- lock_map_acquire_read(&pwq->wq->lockdep_map);
- lock_map_release(&pwq->wq->lockdep_map);
-
return true;
already_gone:
spin_unlock_irq(&pool->lock);
@@ -2861,9 +2827,6 @@ bool flush_work(struct work_struct *work)
if (WARN_ON(!wq_online))
return false;

- lock_map_acquire(&work->lockdep_map);
- lock_map_release(&work->lockdep_map);
-
if (start_flush_work(work, &barr)) {
wait_for_completion(&barr.done);
destroy_work_on_stack(&barr.work);
@@ -3996,7 +3959,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
INIT_LIST_HEAD(&wq->flusher_overflow);
INIT_LIST_HEAD(&wq->maydays);

- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
INIT_LIST_HEAD(&wq->list);

if (alloc_and_link_pwqs(wq) < 0)
--
1.9.1

2017-08-25 08:52:53

by Byungchul Park

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote:
> Hello all,
>
> This is _RFC_.
>
> I want to request for comments about if it's reasonable conceptually. If
> yes, I want to resend after working it more carefully.
>
> Could you let me know your opinions about this?

+cc [email protected]
+cc [email protected]

> ----->8-----
> >From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
> From: Byungchul Park <[email protected]>
> Date: Fri, 25 Aug 2017 17:35:07 +0900
> Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
>
> We introduced the following commit to detect deadlocks caused by
> wait_for_completion() in flush_{workqueue, work}() and other locks. But
> now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
> by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
> Removed it.
>
> commit 4e6045f134784f4b158b3c0f7a282b04bd816887
> workqueue: debug flushing deadlocks with lockdep
>
> Signed-off-by: Byungchul Park <[email protected]>
> ---
> include/linux/workqueue.h | 43 -------------------------------------------
> kernel/workqueue.c | 38 --------------------------------------
> 2 files changed, 81 deletions(-)
>
> diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
> index db6dc9d..91d0e14 100644
> --- a/include/linux/workqueue.h
> +++ b/include/linux/workqueue.h
> @@ -8,7 +8,6 @@
> #include <linux/timer.h>
> #include <linux/linkage.h>
> #include <linux/bitops.h>
> -#include <linux/lockdep.h>
> #include <linux/threads.h>
> #include <linux/atomic.h>
> #include <linux/cpumask.h>
> @@ -101,9 +100,6 @@ struct work_struct {
> atomic_long_t data;
> struct list_head entry;
> work_func_t func;
> -#ifdef CONFIG_LOCKDEP
> - struct lockdep_map lockdep_map;
> -#endif
> };
>
> #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL)
> @@ -154,23 +150,10 @@ struct execute_work {
> struct work_struct work;
> };
>
> -#ifdef CONFIG_LOCKDEP
> -/*
> - * NB: because we have to copy the lockdep_map, setting _key
> - * here is required, otherwise it could get initialised to the
> - * copy of the lockdep_map!
> - */
> -#define __WORK_INIT_LOCKDEP_MAP(n, k) \
> - .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
> -#else
> -#define __WORK_INIT_LOCKDEP_MAP(n, k)
> -#endif
> -
> #define __WORK_INITIALIZER(n, f) { \
> .data = WORK_DATA_STATIC_INIT(), \
> .entry = { &(n).entry, &(n).entry }, \
> .func = (f), \
> - __WORK_INIT_LOCKDEP_MAP(#n, &(n)) \
> }
>
> #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
> @@ -211,26 +194,13 @@ static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { }
> * assignment of the work data initializer allows the compiler
> * to generate better code.
> */
> -#ifdef CONFIG_LOCKDEP
> #define __INIT_WORK(_work, _func, _onstack) \
> do { \
> - static struct lock_class_key __key; \
> - \
> __init_work((_work), _onstack); \
> (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
> - lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
> INIT_LIST_HEAD(&(_work)->entry); \
> (_work)->func = (_func); \
> } while (0)
> -#else
> -#define __INIT_WORK(_work, _func, _onstack) \
> - do { \
> - __init_work((_work), _onstack); \
> - (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
> - INIT_LIST_HEAD(&(_work)->entry); \
> - (_work)->func = (_func); \
> - } while (0)
> -#endif
>
> #define INIT_WORK(_work, _func) \
> __INIT_WORK((_work), (_func), 0)
> @@ -392,22 +362,9 @@ enum {
> * RETURNS:
> * Pointer to the allocated workqueue on success, %NULL on failure.
> */
> -#ifdef CONFIG_LOCKDEP
> -#define alloc_workqueue(fmt, flags, max_active, args...) \
> -({ \
> - static struct lock_class_key __key; \
> - const char *__lock_name; \
> - \
> - __lock_name = #fmt#args; \
> - \
> - __alloc_workqueue_key((fmt), (flags), (max_active), \
> - &__key, __lock_name, ##args); \
> -})
> -#else
> #define alloc_workqueue(fmt, flags, max_active, args...) \
> __alloc_workqueue_key((fmt), (flags), (max_active), \
> NULL, NULL, ##args)
> -#endif
>
> /**
> * alloc_ordered_workqueue - allocate an ordered workqueue
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index f128b3b..87d4bc2 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -40,7 +40,6 @@
> #include <linux/freezer.h>
> #include <linux/kallsyms.h>
> #include <linux/debug_locks.h>
> -#include <linux/lockdep.h>
> #include <linux/idr.h>
> #include <linux/jhash.h>
> #include <linux/hashtable.h>
> @@ -260,9 +259,6 @@ struct workqueue_struct {
> #ifdef CONFIG_SYSFS
> struct wq_device *wq_dev; /* I: for sysfs interface */
> #endif
> -#ifdef CONFIG_LOCKDEP
> - struct lockdep_map lockdep_map;
> -#endif
> char name[WQ_NAME_LEN]; /* I: workqueue name */
>
> /*
> @@ -2024,18 +2020,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
> bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
> int work_color;
> struct worker *collision;
> -#ifdef CONFIG_LOCKDEP
> - /*
> - * It is permissible to free the struct work_struct from
> - * inside the function that is called from it, this we need to
> - * take into account for lockdep too. To avoid bogus "held
> - * lock freed" warnings as well as problems when looking into
> - * work->lockdep_map, make a copy and use that here.
> - */
> - struct lockdep_map lockdep_map;
>
> - lockdep_copy_map(&lockdep_map, &work->lockdep_map);
> -#endif
> /* ensure we're on the correct CPU */
> WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
> raw_smp_processor_id() != pool->cpu);
> @@ -2091,8 +2076,6 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
>
> spin_unlock_irq(&pool->lock);
>
> - lock_map_acquire_read(&pwq->wq->lockdep_map);
> - lock_map_acquire(&lockdep_map);
> crossrelease_hist_start(XHLOCK_PROC);
> trace_workqueue_execute_start(work);
> worker->current_func(work);
> @@ -2102,8 +2085,6 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
> */
> trace_workqueue_execute_end(work);
> crossrelease_hist_end(XHLOCK_PROC);
> - lock_map_release(&lockdep_map);
> - lock_map_release(&pwq->wq->lockdep_map);
>
> if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
> pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
> @@ -2598,9 +2579,6 @@ void flush_workqueue(struct workqueue_struct *wq)
> if (WARN_ON(!wq_online))
> return;
>
> - lock_map_acquire(&wq->lockdep_map);
> - lock_map_release(&wq->lockdep_map);
> -
> mutex_lock(&wq->mutex);
>
> /*
> @@ -2825,18 +2803,6 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
> insert_wq_barrier(pwq, barr, work, worker);
> spin_unlock_irq(&pool->lock);
>
> - /*
> - * If @max_active is 1 or rescuer is in use, flushing another work
> - * item on the same workqueue may lead to deadlock. Make sure the
> - * flusher is not running on the same workqueue by verifying write
> - * access.
> - */
> - if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
> - lock_map_acquire(&pwq->wq->lockdep_map);
> - else
> - lock_map_acquire_read(&pwq->wq->lockdep_map);
> - lock_map_release(&pwq->wq->lockdep_map);
> -
> return true;
> already_gone:
> spin_unlock_irq(&pool->lock);
> @@ -2861,9 +2827,6 @@ bool flush_work(struct work_struct *work)
> if (WARN_ON(!wq_online))
> return false;
>
> - lock_map_acquire(&work->lockdep_map);
> - lock_map_release(&work->lockdep_map);
> -
> if (start_flush_work(work, &barr)) {
> wait_for_completion(&barr.done);
> destroy_work_on_stack(&barr.work);
> @@ -3996,7 +3959,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
> INIT_LIST_HEAD(&wq->flusher_overflow);
> INIT_LIST_HEAD(&wq->maydays);
>
> - lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
> INIT_LIST_HEAD(&wq->list);
>
> if (alloc_and_link_pwqs(wq) < 0)
> --
> 1.9.1

2017-08-25 13:34:50

by Tejun Heo

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

2017-08-25 15:49:30

by Byungchul Park

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Fri, Aug 25, 2017 at 10:34 PM, Tejun Heo <[email protected]> wrote:
> On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote:
>> Hello all,
>>
>> This is _RFC_.
>>
>> I want to request for comments about if it's reasonable conceptually. If
>> yes, I want to resend after working it more carefully.
>>
>> Could you let me know your opinions about this?
>>
>> ----->8-----
>> From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
>> From: Byungchul Park <[email protected]>
>> Date: Fri, 25 Aug 2017 17:35:07 +0900
>> Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
>>
>> We introduced the following commit to detect deadlocks caused by
>> wait_for_completion() in flush_{workqueue, work}() and other locks. But
>> now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
>> by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
>> Removed it.
>
> I'm not following lockdep development, so can't really comment but if
> you're saying that wq can retain the same level of protection while
> not having explicit annotations, conceptually, it's of course great.

Well.. I don't think it's the same level currently. But, I can make it with some
modification. I expect the wq code to become much simpler.

> However, how would it distinguish things like flushing another work

I think it must be distinguished with what it actually waits for, e.i.
completion
variables instead of work or wq. I will make it next week and let you know.

> item on a workqueue w/ max_active of 1?

I will answer it wrt max_active == 1 next week. I need to review wq code.

--
Thanks,
Byungchul

2017-08-28 06:56:03

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

2017-08-28 10:53:05

by Byungchul Park

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Mon, Aug 28, 2017 at 3:55 PM, Peter Zijlstra <[email protected]> wrote:
> On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote:
>> Hello all,
>>
>> This is _RFC_.
>>
>> I want to request for comments about if it's reasonable conceptually. If
>> yes, I want to resend after working it more carefully.
>>
>> Could you let me know your opinions about this?
>>
>> ----->8-----
>> From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
>> From: Byungchul Park <[email protected]>
>> Date: Fri, 25 Aug 2017 17:35:07 +0900
>> Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
>>
>> We introduced the following commit to detect deadlocks caused by
>> wait_for_completion() in flush_{workqueue, work}() and other locks. But
>> now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
>> by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
>> Removed it.
>>
>
> No.. the existing annotation is strictly better because it will _always_
> warn. It doesn't need to first observe things just right.

Right. That is exactly why I replied to TJ like:

https://lkml.org/lkml/2017/8/25/490

--
Thanks,
Byungchul

2017-08-29 00:23:36

by Byungchul Park

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Fri, Aug 25, 2017 at 06:34:43AM -0700, Tejun Heo wrote:
> On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote:
> > Hello all,
> >
> > This is _RFC_.
> >
> > I want to request for comments about if it's reasonable conceptually. If
> > yes, I want to resend after working it more carefully.
> >
> > Could you let me know your opinions about this?
> >
> > ----->8-----
> > From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
> > From: Byungchul Park <[email protected]>
> > Date: Fri, 25 Aug 2017 17:35:07 +0900
> > Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
> >
> > We introduced the following commit to detect deadlocks caused by
> > wait_for_completion() in flush_{workqueue, work}() and other locks. But
> > now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
> > by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
> > Removed it.
>
> I'm not following lockdep development, so can't really comment but if
> you're saying that wq can retain the same level of protection while
> not having explicit annotations, conceptually, it's of course great.
> However, how would it distinguish things like flushing another work
> item on a workqueue w/ max_active of 1?

Do you mean the following?

process_one_work()
acquire(W1) <---------+- distinguishable?
work->fn() |
flush_work(W2) |
acquire(W2) <---+
release(W2)
release(W1)

2017-08-29 00:55:23

by Byungchul Park

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Mon, Aug 28, 2017 at 08:55:53AM +0200, Peter Zijlstra wrote:
> On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote:
> > Hello all,
> >
> > This is _RFC_.
> >
> > I want to request for comments about if it's reasonable conceptually. If
> > yes, I want to resend after working it more carefully.
> >
> > Could you let me know your opinions about this?
> >
> > ----->8-----
> > From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
> > From: Byungchul Park <[email protected]>
> > Date: Fri, 25 Aug 2017 17:35:07 +0900
> > Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
> >
> > We introduced the following commit to detect deadlocks caused by
> > wait_for_completion() in flush_{workqueue, work}() and other locks. But
> > now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
> > by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
> > Removed it.
> >
>
> No.. the existing annotation is strictly better because it will _always_
> warn. It doesn't need to first observe things just right.

In addition, the existing annotation is never good, but just able to
detect deadlocks aggresively. However, it's inevitable to create false
dependencies. I mean some dependencies between work/wq and any locks
inside of each work might be false ones sometimes.

2017-08-29 18:57:32

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Sat, Aug 26, 2017 at 12:49:26AM +0900, Byungchul Park wrote:
> > However, how would it distinguish things like flushing another work
>
> I think it must be distinguished with what it actually waits for, e.i.
> completion
> variables instead of work or wq. I will make it next week and let you know.

So no. The existing annotations are strictly better than relying on
cross-release.

As you know the problem with cross-release is that it is timing
dependent. You need to actually observe the problematic sequence before
it can warn, and only the whole instance->class mapping saves us from
actually hitting the deadlock.

Cross-release can result in deadlocks without warnings. If you were to
run:

mutex_lock(A);
mutex_lock(A);
complete(C);
wait_for_completion(C);

You'd deadlock without issue. Only if we observe this:

mutex_lock(A);
wait_for_completion(C);
mutex_lock(A);
complete(C);

Where we acquire A after wait_for_completion() but before complete()
will we observe the deadlock.

The same would be true for using cross-release for workqueues as well,
something like:

W:
mutex_lock(A)

mutex_lock(A)
flush_work(W)

would go unreported whereas the current workqueue annotation will
generate a splat.

This does not mean cross-release isn't worth it, its better than nothing,
but its strictly weaker than traditional annotations.

So where a traditional annotation is possible, we should use them.

2017-08-30 01:53:43

by Byungchul Park

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Tue, Aug 29, 2017 at 08:57:27PM +0200, Peter Zijlstra wrote:
> On Sat, Aug 26, 2017 at 12:49:26AM +0900, Byungchul Park wrote:
> > > However, how would it distinguish things like flushing another work
> >
> > I think it must be distinguished with what it actually waits for, e.i.
> > completion
> > variables instead of work or wq. I will make it next week and let you know.
>
> So no. The existing annotations are strictly better than relying on
> cross-release.

Thank you for exaplanation but, as I already said, this is why I said
"I don't think it's the same level currently. But, I can make it with
some modification." to TJ:

https://www.mail-archive.com/[email protected]/msg1479560.html

And also I mentioned we might need the current code inevitably but, the
existing annotations are never good and why here:

https://www.mail-archive.com/[email protected]/msg1480173.html

> As you know the problem with cross-release is that it is timing
> dependent. You need to actually observe the problematic sequence before
> it can warn, and only the whole instance->class mapping saves us from
> actually hitting the deadlock.

Of course.

> The same would be true for using cross-release for workqueues as well,
> something like:
>
> W:
> mutex_lock(A)
>
> mutex_lock(A)
> flush_work(W)
>
> would go unreported whereas the current workqueue annotation will
> generate a splat.

Of course.

That's why I said we need to work on it. But it should be modified so
that the wq code becomes more clear instead of abusing weird acquire()s.

2017-08-30 06:23:55

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

On Wed, Aug 30, 2017 at 10:53:39AM +0900, Byungchul Park wrote:
> On Tue, Aug 29, 2017 at 08:57:27PM +0200, Peter Zijlstra wrote:
> > On Sat, Aug 26, 2017 at 12:49:26AM +0900, Byungchul Park wrote:
> > > > However, how would it distinguish things like flushing another work
> > >
> > > I think it must be distinguished with what it actually waits for, e.i.
> > > completion
> > > variables instead of work or wq. I will make it next week and let you know.
> >
> > So no. The existing annotations are strictly better than relying on
> > cross-release.
>
> Thank you for exaplanation but, as I already said, this is why I said
> "I don't think it's the same level currently. But, I can make it with
> some modification." to TJ:
>
> https://www.mail-archive.com/[email protected]/msg1479560.html
>
> And also I mentioned we might need the current code inevitably but, the
> existing annotations are never good and why here:
>
> https://www.mail-archive.com/[email protected]/msg1480173.html

I can read the words, but have no idea what you're trying to say.