2022-10-19 15:14:45

by John Ogness

[permalink] [raw]
Subject: [PATCH printk v2 27/38] printk: console_flush_all: use srcu console list iterator

Guarantee safe iteration of the console list by using SRCU.

Note that in the case of a handover, the SRCU read lock is also
released. This is documented in the function description and as
comments in the code. It is a bit tricky, but this preserves the
lockdep lock ordering for the context handing over the
console_lock:

console_lock()
| mutex_acquire(&console_lock_dep_map) <-- console lock
|
console_unlock()
| console_flush_all()
| | srcu_read_lock(&console_srcu) <-- srcu lock
| | console_emit_next_record()
| | | console_lock_spinning_disable_and_check()
| | | | srcu_read_unlock(&console_srcu) <-- srcu unlock
| | | | mutex_release(&console_lock_dep_map) <-- console unlock

Signed-off-by: John Ogness <[email protected]>
---
kernel/printk/printk.c | 38 ++++++++++++++++++++++++++++----------
1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7ff2fc75fc3b..c4d5d58b5977 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1851,7 +1851,7 @@ static void console_lock_spinning_enable(void)
*
* Return: 1 if the lock rights were passed, 0 otherwise.
*/
-static int console_lock_spinning_disable_and_check(void)
+static int console_lock_spinning_disable_and_check(int cookie)
{
int waiter;

@@ -1870,6 +1870,12 @@ static int console_lock_spinning_disable_and_check(void)

spin_release(&console_owner_dep_map, _THIS_IP_);

+ /*
+ * Preserve lockdep lock ordering. Release the SRCU read lock before
+ * releasing the console_lock.
+ */
+ console_srcu_read_unlock(cookie);
+
/*
* Hand off console_lock to waiter. The waiter will perform
* the up(). After this, the waiter is the console_lock owner.
@@ -2353,7 +2359,7 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
char *text, size_t text_len,
struct dev_printk_info *dev_info) { return 0; }
static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
+static int console_lock_spinning_disable_and_check(int cookie) { return 0; }
static void call_console_driver(struct console *con, const char *text, size_t len,
char *dropped_text)
{
@@ -2695,8 +2701,8 @@ static void __console_unlock(void)
* DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
*
* @handover will be set to true if a printk waiter has taken over the
- * console_lock, in which case the caller is no longer holding the
- * console_lock. Otherwise it is set to false.
+ * console_lock, in which case the caller is no longer holding both the
+ * console_lock and the SRCU read lock. Otherwise it is set to false.
*
* Returns false if the given console has no next record to print, otherwise
* true.
@@ -2704,7 +2710,7 @@ static void __console_unlock(void)
* Requires the console_lock.
*/
static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
- char *dropped_text, bool *handover)
+ char *dropped_text, bool *handover, int cookie)
{
static int panic_console_dropped;
struct printk_info info;
@@ -2764,7 +2770,7 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_

con->seq++;

- *handover = console_lock_spinning_disable_and_check();
+ *handover = console_lock_spinning_disable_and_check(cookie);
printk_safe_exit_irqrestore(flags);
skip:
return true;
@@ -2801,6 +2807,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
bool any_usable = false;
struct console *con;
bool any_progress;
+ int cookie;

*next_seq = 0;
*handover = false;
@@ -2808,7 +2815,8 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
do {
any_progress = false;

- for_each_console(con) {
+ cookie = console_srcu_read_lock();
+ for_each_console_srcu(con) {
bool progress;

if (!console_is_usable(con))
@@ -2819,12 +2827,17 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
/* Extended consoles do not print "dropped messages". */
progress = console_emit_next_record(con, &text[0],
&ext_text[0], NULL,
- handover);
+ handover, cookie);
} else {
progress = console_emit_next_record(con, &text[0],
NULL, &dropped_text[0],
- handover);
+ handover, cookie);
}
+
+ /*
+ * If a handover has occurred, the SRCU read lock
+ * is already released.
+ */
if (*handover)
return false;

@@ -2838,14 +2851,19 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove

/* Allow panic_cpu to take over the consoles safely. */
if (abandon_console_lock_in_panic())
- return false;
+ goto abandon;

if (do_cond_resched)
cond_resched();
}
+ console_srcu_read_unlock(cookie);
} while (any_progress);

return any_usable;
+
+abandon:
+ console_srcu_read_unlock(cookie);
+ return false;
}

/**
--
2.30.2


2022-10-25 15:21:52

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH printk v2 27/38] printk: console_flush_all: use srcu console list iterator

On Wed 2022-10-19 17:01:49, John Ogness wrote:
> Guarantee safe iteration of the console list by using SRCU.
>
> Note that in the case of a handover, the SRCU read lock is also
> released. This is documented in the function description and as
> comments in the code. It is a bit tricky, but this preserves the
> lockdep lock ordering for the context handing over the
> console_lock:
>
> console_lock()
> | mutex_acquire(&console_lock_dep_map) <-- console lock
> |
> console_unlock()
> | console_flush_all()
> | | srcu_read_lock(&console_srcu) <-- srcu lock
> | | console_emit_next_record()
> | | | console_lock_spinning_disable_and_check()
> | | | | srcu_read_unlock(&console_srcu) <-- srcu unlock
> | | | | mutex_release(&console_lock_dep_map) <-- console unlock

I believe that we could avoid this complexity, see below.

> Signed-off-by: John Ogness <[email protected]>
> ---
> kernel/printk/printk.c | 38 ++++++++++++++++++++++++++++----------
> 1 file changed, 28 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 7ff2fc75fc3b..c4d5d58b5977 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -2808,7 +2815,8 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
> do {
> any_progress = false;
>
> - for_each_console(con) {
> + cookie = console_srcu_read_lock();
> + for_each_console_srcu(con) {
> bool progress;
>
> if (!console_is_usable(con))
> @@ -2819,12 +2827,17 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
> /* Extended consoles do not print "dropped messages". */
> progress = console_emit_next_record(con, &text[0],
> &ext_text[0], NULL,
> - handover);
> + handover, cookie);
> } else {
> progress = console_emit_next_record(con, &text[0],
> NULL, &dropped_text[0],
> - handover);
> + handover, cookie);
> }
> +
> + /*
> + * If a handover has occurred, the SRCU read lock
> + * is already released.
> + */
> if (*handover)
> return false;

Please, release the SRCU read lock here:

if (*handover) {
console_srcu_read_unlock(cookie);
return false;
}

The lock should be released in the same function where it was taken.
It does not require passing the cookie and looks more straightforward.

We actually do the same when abandon_console_lock_in_panic()
returns true. We could share the code:

handover_abandon:
console_srcu_read_unlock(cookie);
return false;


Or do I miss anything, please?

> @@ -2838,14 +2851,19 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
>
> /* Allow panic_cpu to take over the consoles safely. */
> if (abandon_console_lock_in_panic())
> - return false;
> + goto abandon;
>
> if (do_cond_resched)
> cond_resched();
> }
> + console_srcu_read_unlock(cookie);
> } while (any_progress);
>
> return any_usable;
> +
> +abandon:
> + console_srcu_read_unlock(cookie);
> + return false;
> }

Best Regards,
Petr

2022-11-07 00:18:54

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 27/38] printk: console_flush_all: use srcu console list iterator

On 2022-10-25, Petr Mladek <[email protected]> wrote:
>> console_lock()
>> | mutex_acquire(&console_lock_dep_map) <-- console lock
>> |
>> console_unlock()
>> | console_flush_all()
>> | | srcu_read_lock(&console_srcu) <-- srcu lock
>> | | console_emit_next_record()
>> | | | console_lock_spinning_disable_and_check()
>> | | | | srcu_read_unlock(&console_srcu) <-- srcu unlock
>> | | | | mutex_release(&console_lock_dep_map) <-- console unlock
>>
>> @@ -2819,12 +2827,17 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
>> /* Extended consoles do not print "dropped messages". */
>> progress = console_emit_next_record(con, &text[0],
>> &ext_text[0], NULL,
>> - handover);
>> + handover, cookie);
>> } else {
>> progress = console_emit_next_record(con, &text[0],
>> NULL, &dropped_text[0],
>> - handover);
>> + handover, cookie);
>> }
>> +
>> + /*
>> + * If a handover has occurred, the SRCU read lock
>> + * is already released.
>> + */
>> if (*handover)
>> return false;
>
> Please, release the SRCU read lock here:
>
> if (*handover) {
> console_srcu_read_unlock(cookie);
> return false;
> }
>
> The lock should be released in the same function where it was taken.
> It does not require passing the cookie and looks more straightforward.

It looks more straight forward, but it is incorrect from a locking
perspective.

The locking order was:

console_lock()
console_srcu_read_lock()

But for a handover at this point in code, console_emit_next_record() has
already released the console_lock (to the spinning context). The
console_srcu_read_lock should have been released first.

> We actually do the same when abandon_console_lock_in_panic()
> returns true. We could share the code:
>
> handover_abandon:
> console_srcu_read_unlock(cookie);
> return false;

This case is different. Here the console_lock was not released yet so it
is fine to perform the console_srcu_read_unlock() here.

John Ogness

2022-11-07 13:09:20

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH printk v2 27/38] printk: console_flush_all: use srcu console list iterator

On Mon 2022-11-07 01:06:02, John Ogness wrote:
> On 2022-10-25, Petr Mladek <[email protected]> wrote:
> >> console_lock()
> >> | mutex_acquire(&console_lock_dep_map) <-- console lock
> >> |
> >> console_unlock()
> >> | console_flush_all()
> >> | | srcu_read_lock(&console_srcu) <-- srcu lock
> >> | | console_emit_next_record()
> >> | | | console_lock_spinning_disable_and_check()
> >> | | | | srcu_read_unlock(&console_srcu) <-- srcu unlock
> >> | | | | mutex_release(&console_lock_dep_map) <-- console unlock
> >>
> >> @@ -2819,12 +2827,17 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
> >> /* Extended consoles do not print "dropped messages". */
> >> progress = console_emit_next_record(con, &text[0],
> >> &ext_text[0], NULL,
> >> - handover);
> >> + handover, cookie);
> >> } else {
> >> progress = console_emit_next_record(con, &text[0],
> >> NULL, &dropped_text[0],
> >> - handover);
> >> + handover, cookie);
> >> }
> >> +
> >> + /*
> >> + * If a handover has occurred, the SRCU read lock
> >> + * is already released.
> >> + */
> >> if (*handover)
> >> return false;
> >
> > Please, release the SRCU read lock here:
> >
> > if (*handover) {
> > console_srcu_read_unlock(cookie);
> > return false;
> > }
> >
> > The lock should be released in the same function where it was taken.
> > It does not require passing the cookie and looks more straightforward.
>
> It looks more straight forward, but it is incorrect from a locking
> perspective.
>
> The locking order was:
>
> console_lock()
> console_srcu_read_lock()
>
> But for a handover at this point in code, console_emit_next_record() has
> already released the console_lock (to the spinning context). The
> console_srcu_read_lock should have been released first.

Ah, I see. I should be read all the comments more carefully.
I do not know about any better solution. Feel free to use:

Reviewed-by: Petr Mladek <[email protected]>


Note:

The complexity is caused by calling
mutex_release() in console_lock_spinning_disable_and_check() and
mutex_acquire() in console_trylock_spinning().

I wondered if we really need to do so. These functions actually do not
release or acquire the console_lock. But it seems that it is necessary
because lockdep is not able to track the lock when it was moved into
another process.

I even tried to replace mutex_acquire()/mutex_release() with
rwsem_acquire()/rwsem_release(). But it did not help. lockdep
still complained when I removed these calls from
the _spinning_() API. Sigh.

Best Regards,
Petr