2024-02-18 18:58:28

by John Ogness

[permalink] [raw]
Subject: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

Currently the port->lock wrappers uart_port_lock(),
uart_port_unlock() (and their variants) only lock/unlock
the spin_lock.

If the port is an nbcon console, the wrappers must also
acquire/release the console and mark the region as unsafe. This
allows general port->lock synchronization to be synchronized
with the nbcon console ownership.

Add a flag to struct uart_port to track nbcon console ownership.

Signed-off-by: John Ogness <[email protected]>
---
drivers/tty/serial/8250/8250_port.c | 1 +
include/linux/printk.h | 13 +++++
include/linux/serial_core.h | 19 ++++++-
kernel/printk/nbcon.c | 77 +++++++++++++++++++++++++++++
4 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 141627370aab..16e2705b4867 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -3284,6 +3284,7 @@ void serial8250_init_port(struct uart_8250_port *up)
struct uart_port *port = &up->port;

spin_lock_init(&port->lock);
+ port->nbcon_locked_port = false;
port->ctrl_id = 0;
port->pm = NULL;
port->ops = &serial8250_pops;
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8d5c5588eec9..ef57a4d93ae2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -9,6 +9,8 @@
#include <linux/ratelimit_types.h>
#include <linux/once_lite.h>

+struct uart_port;
+
extern const char linux_banner[];
extern const char linux_proc_banner[];

@@ -195,6 +197,8 @@ void show_regs_print_info(const char *log_lvl);
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
extern asmlinkage void dump_stack(void) __cold;
void printk_trigger_flush(void);
+extern void uart_nbcon_acquire(struct uart_port *up);
+extern void uart_nbcon_release(struct uart_port *up);
#else
static inline __printf(1, 0)
int vprintk(const char *s, va_list args)
@@ -274,6 +278,15 @@ static inline void dump_stack(void)
static inline void printk_trigger_flush(void)
{
}
+
+static inline void uart_nbcon_acquire(struct uart_port *up)
+{
+}
+
+static inline void uart_nbcon_release(struct uart_port *up)
+{
+}
+
#endif

bool this_cpu_in_panic(void);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 89f7b6c63598..d4b93d721715 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -488,6 +488,7 @@ struct uart_port {
struct uart_icount icount; /* statistics */

struct console *cons; /* struct console, if any */
+ bool nbcon_locked_port; /* True, if the port is locked by nbcon */
/* flags must be updated while holding port mutex */
upf_t flags;

@@ -595,6 +596,7 @@ struct uart_port {
static inline void uart_port_lock(struct uart_port *up)
{
spin_lock(&up->lock);
+ uart_nbcon_acquire(up);
}

/**
@@ -604,6 +606,7 @@ static inline void uart_port_lock(struct uart_port *up)
static inline void uart_port_lock_irq(struct uart_port *up)
{
spin_lock_irq(&up->lock);
+ uart_nbcon_acquire(up);
}

/**
@@ -614,6 +617,7 @@ static inline void uart_port_lock_irq(struct uart_port *up)
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
{
spin_lock_irqsave(&up->lock, *flags);
+ uart_nbcon_acquire(up);
}

/**
@@ -624,7 +628,11 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f
*/
static inline bool uart_port_trylock(struct uart_port *up)
{
- return spin_trylock(&up->lock);
+ if (!spin_trylock(&up->lock))
+ return false;
+
+ uart_nbcon_acquire(up);
+ return true;
}

/**
@@ -636,7 +644,11 @@ static inline bool uart_port_trylock(struct uart_port *up)
*/
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
{
- return spin_trylock_irqsave(&up->lock, *flags);
+ if (!spin_trylock_irqsave(&up->lock, *flags))
+ return false;
+
+ uart_nbcon_acquire(up);
+ return true;
}

/**
@@ -645,6 +657,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long
*/
static inline void uart_port_unlock(struct uart_port *up)
{
+ uart_nbcon_release(up);
spin_unlock(&up->lock);
}

@@ -654,6 +667,7 @@ static inline void uart_port_unlock(struct uart_port *up)
*/
static inline void uart_port_unlock_irq(struct uart_port *up)
{
+ uart_nbcon_release(up);
spin_unlock_irq(&up->lock);
}

@@ -664,6 +678,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up)
*/
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
{
+ uart_nbcon_release(up);
spin_unlock_irqrestore(&up->lock, flags);
}

diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 8ecd76aa22e6..02e8fdc1ea43 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -6,6 +6,7 @@
#include <linux/console.h>
#include <linux/delay.h>
#include <linux/slab.h>
+#include <linux/serial_core.h>
#include "internal.h"
/*
* Printk console printing implementation for consoles which does not depend
@@ -995,3 +996,79 @@ void nbcon_free(struct console *con)

con->pbufs = NULL;
}
+
+static inline bool uart_is_nbcon(struct uart_port *up)
+{
+ int cookie;
+ bool ret;
+
+ if (!uart_console(up))
+ return false;
+
+ cookie = console_srcu_read_lock();
+ ret = (console_srcu_read_flags(up->cons) & CON_NBCON);
+ console_srcu_read_unlock(cookie);
+ return ret;
+}
+
+/**
+ * uart_nbcon_acquire - The second half of the port locking wrapper
+ * @up: The uart port whose @lock was locked
+ *
+ * The uart_port_lock() wrappers will first lock the spin_lock @up->lock.
+ * Then this function is called to implement nbcon-specific processing.
+ *
+ * If @up is an nbcon console, this console will be acquired and marked as
+ * unsafe. Otherwise this function does nothing.
+ */
+void uart_nbcon_acquire(struct uart_port *up)
+{
+ struct console *con = up->cons;
+ struct nbcon_context ctxt;
+
+ if (!uart_is_nbcon(up))
+ return;
+
+ WARN_ON_ONCE(up->nbcon_locked_port);
+
+ do {
+ do {
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.console = con;
+ ctxt.prio = NBCON_PRIO_NORMAL;
+ } while (!nbcon_context_try_acquire(&ctxt));
+
+ } while (!nbcon_context_enter_unsafe(&ctxt));
+
+ up->nbcon_locked_port = true;
+}
+EXPORT_SYMBOL_GPL(uart_nbcon_acquire);
+
+/**
+ * uart_nbcon_release - The first half of the port unlocking wrapper
+ * @up: The uart port whose @lock is about to be unlocked
+ *
+ * The uart_port_unlock() wrappers will first call this function to implement
+ * nbcon-specific processing. Then afterwards the uart_port_unlock() wrappers
+ * will unlock the spin_lock @up->lock.
+ *
+ * If @up is an nbcon console, the console will be marked as safe and
+ * released. Otherwise this function does nothing.
+ */
+void uart_nbcon_release(struct uart_port *up)
+{
+ struct console *con = up->cons;
+ struct nbcon_context ctxt = {
+ .console = con,
+ .prio = NBCON_PRIO_NORMAL,
+ };
+
+ if (!up->nbcon_locked_port)
+ return;
+
+ if (nbcon_context_exit_unsafe(&ctxt))
+ nbcon_context_release(&ctxt);
+
+ up->nbcon_locked_port = false;
+}
+EXPORT_SYMBOL_GPL(uart_nbcon_release);
--
2.39.2



2024-02-19 12:16:49

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Sun, Feb 18, 2024 at 08:03:08PM +0106, John Ogness wrote:
> Currently the port->lock wrappers uart_port_lock(),
> uart_port_unlock() (and their variants) only lock/unlock
> the spin_lock.
>
> If the port is an nbcon console, the wrappers must also
> acquire/release the console and mark the region as unsafe. This
> allows general port->lock synchronization to be synchronized
> with the nbcon console ownership.
>
> Add a flag to struct uart_port to track nbcon console ownership.

..

> --- a/kernel/printk/nbcon.c
> +++ b/kernel/printk/nbcon.c
> @@ -6,6 +6,7 @@
> #include <linux/console.h>
> #include <linux/delay.h>
> #include <linux/slab.h>
> +#include <linux/serial_core.h>

The headers in this file is a mess. But here you can at least keep the piece
ordered, can you?

..

> +static inline bool uart_is_nbcon(struct uart_port *up)
> +{
> + int cookie;
> + bool ret;
> +
> + if (!uart_console(up))
> + return false;
> +
> + cookie = console_srcu_read_lock();
> + ret = (console_srcu_read_flags(up->cons) & CON_NBCON);

The outer parentheses are redundant.

> + console_srcu_read_unlock(cookie);
> + return ret;
> +}

..

> +void uart_nbcon_acquire(struct uart_port *up)
> +{
> + struct console *con = up->cons;
> + struct nbcon_context ctxt;
> +
> + if (!uart_is_nbcon(up))
> + return;

> + WARN_ON_ONCE(up->nbcon_locked_port);

+ include linux/bug.h

> + do {
> + do {
> + memset(&ctxt, 0, sizeof(ctxt));

+ include linux/string.h

> + ctxt.console = con;
> + ctxt.prio = NBCON_PRIO_NORMAL;
> + } while (!nbcon_context_try_acquire(&ctxt));
> +
> + } while (!nbcon_context_enter_unsafe(&ctxt));
> +
> + up->nbcon_locked_port = true;
> +}
> +EXPORT_SYMBOL_GPL(uart_nbcon_acquire);

+ include linux/export.h

--
With Best Regards,
Andy Shevchenko



2024-02-19 14:19:24

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On 2024-02-19, Andy Shevchenko <[email protected]> wrote:
>> --- a/kernel/printk/nbcon.c
>> +++ b/kernel/printk/nbcon.c
>> @@ -6,6 +6,7 @@
>> #include <linux/console.h>
>> #include <linux/delay.h>
>> #include <linux/slab.h>
>> +#include <linux/serial_core.h>
>
> The headers in this file is a mess. But here you can at least keep the
> piece ordered, can you?

Just to clarify, you would like to see this ordering and inclusion?

#include <linux/bug.h>
#include <linux/console.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/serial_core.h>
#include <linux/slab.h>
#include <linux/string.h>
#include "internal.h"

>> + ret = (console_srcu_read_flags(up->cons) & CON_NBCON);
>
> The outer parentheses are redundant.

Ack.

Thanks.

John

2024-02-19 14:36:42

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Mon, Feb 19, 2024 at 03:24:47PM +0106, John Ogness wrote:
> On 2024-02-19, Andy Shevchenko <[email protected]> wrote:

..

> >> #include <linux/console.h>
> >> #include <linux/delay.h>
> >> #include <linux/slab.h>
> >> +#include <linux/serial_core.h>
> >
> > The headers in this file is a mess. But here you can at least keep the
> > piece ordered, can you?
>
> Just to clarify, you would like to see this ordering and inclusion?

Roughly, yes. Ideally it is quite likely that kernel.h is being used as
a 'proxy' header. Nowadays, it's rare the code needs kernel.h.

> #include <linux/bug.h>
> #include <linux/console.h>
> #include <linux/delay.h>
> #include <linux/export.h>
> #include <linux/kernel.h>
> #include <linux/serial_core.h>
> #include <linux/slab.h>
> #include <linux/string.h>

--
With Best Regards,
Andy Shevchenko



2024-02-19 16:53:30

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On 2024-02-19, Andy Shevchenko <[email protected]> wrote:
>>> The headers in this file is a mess. But here you can at least keep the
>>> piece ordered, can you?
>>
>> Just to clarify, you would like to see this ordering and inclusion?
>
> Roughly, yes. Ideally it is quite likely that kernel.h is being used as
> a 'proxy' header. Nowadays, it's rare the code needs kernel.h.

So I took the time to painfully discover every header that is required
for nbcon.c without any proxy usage. It came down to this:

#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/console.h>
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/irqflags.h>
#include <linux/minmax.h>
#include <linux/percpu-defs.h>
#include <linux/preempt.h>
#include <linux/serial_core.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
#include "internal.h"

For the next version of this series I will only add the includes you
suggested, but will follow-up with a patch that fixes all proxy headers
for nbcon.c. As a separate patch it will help with bisecting in case the
ordering causes an explosion on some config/architecture.

John

2024-02-19 17:14:43

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Mon, Feb 19, 2024 at 05:58:41PM +0106, John Ogness wrote:
> On 2024-02-19, Andy Shevchenko <[email protected]> wrote:
> >>> The headers in this file is a mess. But here you can at least keep the
> >>> piece ordered, can you?
> >>
> >> Just to clarify, you would like to see this ordering and inclusion?
> >
> > Roughly, yes. Ideally it is quite likely that kernel.h is being used as
> > a 'proxy' header. Nowadays, it's rare the code needs kernel.h.
>
> So I took the time to painfully discover every header that is required
> for nbcon.c without any proxy usage. It came down to this:
>
> #include <linux/atomic.h>
> #include <linux/bug.h>

> #include <linux/compiler.h>

This is guaranteed to be included by types.h, can be dropped.

> #include <linux/console.h>
> #include <linux/delay.h>
> #include <linux/errno.h>
> #include <linux/export.h>
> #include <linux/init.h>
> #include <linux/irqflags.h>
> #include <linux/minmax.h>

> #include <linux/percpu-defs.h>

This...

> #include <linux/preempt.h>
> #include <linux/serial_core.h>
> #include <linux/slab.h>

> #include <linux/smp.h>

..and this I believe can be represented by percpu.h as most likely that is the
"main" library you are using.

> #include <linux/stddef.h>
> #include <linux/string.h>
> #include <linux/types.h>
> #include "internal.h"
>
> For the next version of this series I will only add the includes you
> suggested, but will follow-up with a patch that fixes all proxy headers
> for nbcon.c. As a separate patch it will help with bisecting in case the
> ordering causes an explosion on some config/architecture.

Sure, thanks!

--
With Best Regards,
Andy Shevchenko



2024-02-23 10:51:21

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Sun 2024-02-18 20:03:08, John Ogness wrote:
> Currently the port->lock wrappers uart_port_lock(),
> uart_port_unlock() (and their variants) only lock/unlock
> the spin_lock.
>
> If the port is an nbcon console, the wrappers must also
> acquire/release the console and mark the region as unsafe. This
> allows general port->lock synchronization to be synchronized
> with the nbcon console ownership.
>
> Add a flag to struct uart_port to track nbcon console ownership.

The patch makes sense.

My main (only) concern was the synchronization of the various accessed
variables, especially, port->cons.

Note: I am not completely sure how the early and valid console drivers
share the same struct uart_port. So, maybe I miss some important
guarantee.

Anyway. synchronization of port->cons looks like a shade area.
IMHO, the existing code expects that it is used _only when the console
is registered_. But this patch wants to access it _even before
the console is registered_!

For example, it took me quite a lot of time to shake my head around:

#define uart_console(port) \
((port)->cons && (port)->cons->index == (port)->line)

+ port->cons and port->line are updated in the uart code.
It seems that the update is not serialized by port->lock.
Something might be done under port->mutex.

+ cons->index is updated in register_console() under
console_list_lock.

Spoiler: I propose a solution which does not use uart_console().

See below for more details.

> --- a/drivers/tty/serial/8250/8250_port.c
> +++ b/drivers/tty/serial/8250/8250_port.c
> @@ -3284,6 +3284,7 @@ void serial8250_init_port(struct uart_8250_port *up)
> struct uart_port *port = &up->port;
>
> spin_lock_init(&port->lock);
> + port->nbcon_locked_port = false;

I am not sure if the variable really helps anything:

+ nbcon_context release() must handle the case when it
lost the ownership anyway.

It is the same as if it did not acquire the context
in the first place [*].


+ nbcon_acquire() is called under port->lock. It means that
nbcon_release() should always be called when the acquire
succeeded earlier.

We just need to make sure that port->cons can be cleared only
under port->lock. But this would be required even with
port->nbcon_locked_port.

[*] I am not super-happy with this semantic because it prevents
catching bugs by lockdep. But it is how nbcon_acquire/release
work and it is important part of the design.


Apology: It is possible that I suggested to add this variable. I just
see now that it does not really help much. It rather makes
a false feeling about that nbcon acquire/release are always
paired.

> port->ctrl_id = 0;
> port->pm = NULL;
> port->ops = &serial8250_pops;
> --- a/kernel/printk/nbcon.c
> +++ b/kernel/printk/nbcon.c
> @@ -995,3 +996,79 @@ void nbcon_free(struct console *con)
>
> con->pbufs = NULL;
> }
> +
> +static inline bool uart_is_nbcon(struct uart_port *up)
> +{
> + int cookie;
> + bool ret;
> +
> + if (!uart_console(up))

This function accesses up->cons. I am not completely sure how
this value is synchronized, for example:

+ serial_core_add_one_port() sets uport->cons under port->mutex.
The function is called uart_add_one_port() in various probe()
or init() calls.

+ univ8250_console_setup() sets and clears port->cons with
no explicit synchronization. The function is called from
try_enable_preferred_console() under console_list_lock.

IMHO, the assignment is done when the drivers are being initialized.
It is done when the port might already be used by early consoles.

Especially, according to my understanding, newcon->setup() callbacks
are responsible for using the same port by early and real console drivers.

I guess that uart_port_lock() API is used by early consoles as well.
It means that they might access up->cons here while it is being
manipulated by the proper driver.

A minimal solution would be access/modify port->cons using
READ_ONCE()/WRITE_ONCE().

But I think that we do not need to call uart_console() at all.
We do not really need to synchronize the consistency of
con->index and port->line.

Instead, we could read up->cons using READ_ONCE() and
check if it is defined. Or even better would be to always
set/read port->cons under the port->lock.


> + return false;
> +
> + cookie = console_srcu_read_lock();
> + ret = (console_srcu_read_flags(up->cons) & CON_NBCON);
> + console_srcu_read_unlock(cookie);

Hmm, console_srcu_read_flags(con) is called under
console_srcu_read_lock() to make sure that it does not
disappear. It makes sense when it is used by registered consoles.

But uart_port_lock() might be called even when the console
is not registered.

I suggest to remove the SRCU lock here. In this code path,
it does not guarantee anything and is rather misleading.

I would use a READ_ONCE(), for example by splitting:

/*
* Locklessly reading console->flags provides a consistent
* read value because there is at most one CPU modifying
* console->flags and that CPU is using only read-modify-write
* operations to do so.
*
* The caller must make sure that @con does not disappear.
* It can be done by console_srcu_read_lock() when used
* only for registered consoles.
*/
static inline short console_read_flags(const struct console *con)
{
return data_race(READ_ONCE(con->flags));
}

/* Locklessly reading console->flags for registered consoles */
static inline short console_srcu_read_flags(const struct console *con)
{
WARN_ON_ONCE(!console_srcu_read_lock_is_held());

console_read_flags();
}

> + return ret;
> +}
> +
> +/**
> + * uart_nbcon_acquire - The second half of the port locking wrapper
> + * @up: The uart port whose @lock was locked
> + *
> + * The uart_port_lock() wrappers will first lock the spin_lock @up->lock.
> + * Then this function is called to implement nbcon-specific processing.
> + *
> + * If @up is an nbcon console, this console will be acquired and marked as
> + * unsafe. Otherwise this function does nothing.
> + */
> +void uart_nbcon_acquire(struct uart_port *up)
> +{
> + struct console *con = up->cons;
> + struct nbcon_context ctxt;

I would add:

lockdep_assert_held(&up->lock);
> +
> + if (!uart_is_nbcon(up))
> + return;
> +
> + WARN_ON_ONCE(up->nbcon_locked_port);
> +
> + do {
> + do {
> + memset(&ctxt, 0, sizeof(ctxt));
> + ctxt.console = con;
> + ctxt.prio = NBCON_PRIO_NORMAL;
> + } while (!nbcon_context_try_acquire(&ctxt));
> +
> + } while (!nbcon_context_enter_unsafe(&ctxt));
> +
> + up->nbcon_locked_port = true;
> +}
> +EXPORT_SYMBOL_GPL(uart_nbcon_acquire);

I would prefer to split the uart and nbcon specific code, for example:

/**
* nbcon_normal_context_acquire - Acquire a generic context with
* the normal priority for nbcon console
* @con: nbcon console
*
* Context: Any context which could not be migrated to another CPU.
*
* Acquire a generic context with the normal priority for the given console.
* Prevent the release by entering the unsafe state.
*
* Note: The console might still loose the ownership by a hostile takeover.
* But it can be done only by the final flush in panic() when other
* CPUs should be stopped and other contexts interrupted.
*/
static void nbcon_normal_context_acquire(struct console *con)
{
struct nbcon_context ctxt;

do {
do {
memset(&ctxt, 0, sizeof(ctxt));
ctxt.console = con;
ctxt.prio = NBCON_PRIO_NORMAL;
} while (!nbcon_context_try_acquire(&ctxt));

} while (!nbcon_context_enter_unsafe(&ctxt));
}

/**
* uart_nbcon_acquire - Acquire nbcon console associated with the gived port.
* @up: uart port
*
* Context: Must be called under up->lock to prevent manipulating
* up->cons and migrating to another CPU.
*
* Note: The console might still loose the ownership by a hostile takeover.
* But it can be done only by the final flush in panic() when other
* CPUs should be stopped and other contexts interrupted.
*/
void uart_nbcon_acquire(struct uart_port *up)
{
struct console *con; = up->cons;

lockdep_assert_held(&up->lock);

/*
* FIXME: This would require adding WRITE_ONCE()
* on the write part.
*
* Or even better, the value should be modified under
* port->lock so that simple read would be enough here.
*/
con = data_race(READ_ONCE(up->cons));

if (!con)
return;

if (!console_read_flags(con) & CON_NBCON)
return;

nbcon_normal_context_acquire(con);
}

Note that I did not use up->nbcon_locked_port as explained above.


> +
> +/**
> + * uart_nbcon_release - The first half of the port unlocking wrapper
> + * @up: The uart port whose @lock is about to be unlocked
> + *
> + * The uart_port_unlock() wrappers will first call this function to implement
> + * nbcon-specific processing. Then afterwards the uart_port_unlock() wrappers
> + * will unlock the spin_lock @up->lock.
> + *
> + * If @up is an nbcon console, the console will be marked as safe and
> + * released. Otherwise this function does nothing.
> + */
> +void uart_nbcon_release(struct uart_port *up)
> +{
> + struct console *con = up->cons;
> + struct nbcon_context ctxt = {
> + .console = con,
> + .prio = NBCON_PRIO_NORMAL,
> + };
> +

I would add here as well.

lockdep_assert_held(&up->lock);


This deserves a comment why we do not complain when this function
is called for nbcon and it is not locked. Something like:

/*
* Even port used by nbcon console might be seen unlocked
* when it was locked and the console has been registered
* at the same time.
*/
> + if (!up->nbcon_locked_port)
> + return;

Wait, if up->cons might be set asynchronously then it might also
disapper assynchronously. Which would be really bad because we would
not be able to release the normal context.

IMHO, we really need to synchronize up->cons acceess using up->lock.

> +
> + if (nbcon_context_exit_unsafe(&ctxt))
> + nbcon_context_release(&ctxt);
> +
> + up->nbcon_locked_port = false;
> +}

Again I would better split the nbcon and uart part and create:

/**
* nbcon_normal_context_release - Release a generic context with
* the normal priority.
* @con: nbcon console
*
* Context: Any context which could not be migrated to another CPU.
*
* Release a generic context with the normal priority for the given console.
* Prevent the release by entering the unsafe state.
*
* Note: The console might have lost the ownership by a hostile takeover.
* But it should not happen in reality because the hostile
* takeover is allowed only for the final flush in panic()
* when other CPUs should be stopped and other contexts interrupted.
*/
static void nbcon_normal_context_release(struct console *con)
{
struct nbcon_context ctxt = {
.console = con,
.prio = NBCON_PRIO_NORMAL,
};

if (nbcon_context_exit_unsafe(&ctxt))
nbcon_context_release(&ctxt);
}

/**
* uart_nbcon_acquire - Release nbcon console associated with the given port.
* @up: uart port
*
* Context: Must be called under up->lock to prevent manipulating
* up->cons and migrating to another CPU.
*/
void uart_nbcon_release(struct uart_port *up)
{
struct console *con;

lockdep_assert_held(&up->lock);

/*
* FIXME: This would require adding WRITE_ONCE()
* on the write part.
*
* Or even better, the value should be modified under
* port->lock so that simple read would be enough here.
*/
con = data_race(READ_ONCE(up->cons));

if (!con)
return;

if (!console_read_flags(con) & CON_NBCON)
return;

nbcon_normal_context_release(con);
}

Best Regards,
Petr

2024-03-11 17:09:17

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

Hi Petr,

Thanks for the detailed feedback. Here is a lengthy response. I hope it
clarifies the uart port and console fields. And I think you identified a
bug relating to the setup() callback.

On 2024-02-23, Petr Mladek <[email protected]> wrote:
> My main (only) concern was the synchronization of the various accessed
> variables, especially, port->cons.

The only issue is if port->cons disappears between lock and unlock. I
see there is code setting port->cons to NULL, although I do not know
why. Once port->cons is set, there is never a reason to unset it.

Regardless, I will add port->lock synchronization when modifying
port->cons. There are only a few code sites and they are all during
driver setup.

> Note: I am not completely sure how the early and valid console drivers
> share the same struct uart_port. So, maybe I miss some important
> guarantee.

The struct uart_port is _not_ shared between the early and normal
consoles. However, the struct console is shared for normal consoles
amoung various ports of a particular driver.

> Anyway. synchronization of port->cons looks like a shade area.
> IMHO, the existing code expects that it is used _only when the console
> is registered_. But this patch wants to access it _even before
> the console is registered_!

Indeed. It is not enough for uart_is_nbcon() to check if it is an
NBCON. It must also check if it is registered, locklessly:

hlist_unhashed_lockless(&con->node);

Most importantly to be sure that nbcon_init() has already been called.
register_console() clears the nbcon state after cons->index has been
set, but before the console has been added to the list.

> For example, it took me quite a lot of time to shake my head around:
>
> #define uart_console(port) \
> ((port)->cons && (port)->cons->index == (port)->line)
>
> + port->cons and port->line are updated in the uart code.
> It seems that the update is not serialized by port->lock.
> Something might be done under port->mutex.
>
> + cons->index is updated in register_console() under
> console_list_lock.
>
> Spoiler: I propose a solution which does not use uart_console().

This check is necessary because multiple ports of a driver will set and
share the same port->cons value, even if they are not really the
console. I looked into enforcing that port->cons is NULL if it is not a
registered console, but this is tricky. port->cons is driver-internal
and hidden from printk. The driver will set port->cons in case it
becomes the console and printk will set cons->index once it has chosen
which port will be the actual console. But there is no way to unset
port->cons if a port was not chosen by printk.

The various fields have the following meaning (AFAICT):

port->line: An identifier to represent a particular port supported by a
driver.

port->cons: The struct console to use if this port is chosen to be a
console.

port->console: Boolean, true if this port was chosen to be a
console. (Used only by the tty layer.)

cons->index: The port chosen by printk to be a console.

None of those fields specify if the port is currently registered as a
console. For that you would need to check if port->cons->node is hashed
and then verify that port->line matches port->cons->index. This is what
uart_nbcon_acquire() needs to do (as well as check if it is an nbcon
console).

>> --- a/drivers/tty/serial/8250/8250_port.c
>> +++ b/drivers/tty/serial/8250/8250_port.c
>> @@ -3284,6 +3284,7 @@ void serial8250_init_port(struct uart_8250_port *up)
>> struct uart_port *port = &up->port;
>>
>> spin_lock_init(&port->lock);
>> + port->nbcon_locked_port = false;
>
> I am not sure if the variable really helps anything:
>
> + nbcon_context release() must handle the case when it
> lost the ownership anyway.

uart_nbcon_release() must first check if the provided port is a
registered nbcon console. A simple boolean check is certainly faster
than the 4 checks mentioned above. After all, if it was never locked,
there is no reason to unlock it.

> + nbcon_acquire() is called under port->lock. It means that
> nbcon_release() should always be called when the acquire
> succeeded earlier.

Same answer as above.

> We just need to make sure that port->cons can be cleared only
> under port->lock. But this would be required even with
> port->nbcon_locked_port.

Agreed. I will add this.

>> --- a/kernel/printk/nbcon.c
>> +++ b/kernel/printk/nbcon.c
>> @@ -995,3 +996,79 @@ void nbcon_free(struct console *con)
>>
>> con->pbufs = NULL;
>> }
>> +
>> +static inline bool uart_is_nbcon(struct uart_port *up)
>> +{
>> + int cookie;
>> + bool ret;
>> +
>> + if (!uart_console(up))
>
> This function accesses up->cons. I am not completely sure how
> this value is synchronized, for example:
>
> + serial_core_add_one_port() sets uport->cons under port->mutex.
> The function is called uart_add_one_port() in various probe()
> or init() calls.

I will add port->lock synchronization.

> + univ8250_console_setup() sets and clears port->cons with
> no explicit synchronization. The function is called from
> try_enable_preferred_console() under console_list_lock.

I will add port->lock synchronization.

> IMHO, the assignment is done when the drivers are being initialized.
> It is done when the port might already be used by early consoles.
>
> Especially, according to my understanding, newcon->setup() callbacks
> are responsible for using the same port by early and real console drivers.
>
> I guess that uart_port_lock() API is used by early consoles as well.
> It means that they might access up->cons here while it is being
> manipulated by the proper driver.

Note that port->lock does not synchronize early and normal
consoles. Only the console lock can do that. But you bring up a very
good point with setup(). serial8250_console_setup() can call
probe_baud(), which will write to the hardware.

I think that con->setup() needs to be called under the console lock
(just as already with unblank() and device()).

>> + return false;
>> +
>> + cookie = console_srcu_read_lock();
>> + ret = (console_srcu_read_flags(up->cons) & CON_NBCON);
>> + console_srcu_read_unlock(cookie);
>
> Hmm, console_srcu_read_flags(con) is called under
> console_srcu_read_lock() to make sure that it does not
> disappear. It makes sense when it is used by registered consoles.
>
> But uart_port_lock() might be called even when the console
> is not registered.
>
> I suggest to remove the SRCU lock here. In this code path,
> it does not guarantee anything and is rather misleading.

Agreed.

> I would use a READ_ONCE(), for example by splitting:
>
> /*
> * Locklessly reading console->flags provides a consistent
> * read value because there is at most one CPU modifying
> * console->flags and that CPU is using only read-modify-write
> * operations to do so.
> *
> * The caller must make sure that @con does not disappear.
> * It can be done by console_srcu_read_lock() when used
> * only for registered consoles.
> */
> static inline short console_read_flags(const struct console *con)
> {
> return data_race(READ_ONCE(con->flags));
> }
>
> /* Locklessly reading console->flags for registered consoles */
> static inline short console_srcu_read_flags(const struct console *con)
> {
> WARN_ON_ONCE(!console_srcu_read_lock_is_held());
>
> console_read_flags();
> }

OK.

>> +void uart_nbcon_acquire(struct uart_port *up)
>> +{
>> + struct console *con = up->cons;
>> + struct nbcon_context ctxt;
>
> I would add:
>
> lockdep_assert_held(&up->lock);

OK.

>> +
>> + if (!uart_is_nbcon(up))
>> + return;
>> +
>> + WARN_ON_ONCE(up->nbcon_locked_port);
>> +
>> + do {
>> + do {
>> + memset(&ctxt, 0, sizeof(ctxt));
>> + ctxt.console = con;
>> + ctxt.prio = NBCON_PRIO_NORMAL;
>> + } while (!nbcon_context_try_acquire(&ctxt));
>> +
>> + } while (!nbcon_context_enter_unsafe(&ctxt));
>> +
>> + up->nbcon_locked_port = true;
>> +}
>> +EXPORT_SYMBOL_GPL(uart_nbcon_acquire);
>
> I would prefer to split the uart and nbcon specific code, for example:

Can you explain why? This code will not be used anywhere else.

> /**
> * nbcon_normal_context_acquire - Acquire a generic context with
> * the normal priority for nbcon console
> * @con: nbcon console
> *
> * Context: Any context which could not be migrated to another CPU.
> *
> * Acquire a generic context with the normal priority for the given console.
> * Prevent the release by entering the unsafe state.
> *
> * Note: The console might still loose the ownership by a hostile takeover.
> * But it can be done only by the final flush in panic() when other
> * CPUs should be stopped and other contexts interrupted.
> */
> static void nbcon_normal_context_acquire(struct console *con)
> {
> struct nbcon_context ctxt;
>
> do {
> do {
> memset(&ctxt, 0, sizeof(ctxt));
> ctxt.console = con;
> ctxt.prio = NBCON_PRIO_NORMAL;
> } while (!nbcon_context_try_acquire(&ctxt));
>
> } while (!nbcon_context_enter_unsafe(&ctxt));
> }
>
> /**
> * uart_nbcon_acquire - Acquire nbcon console associated with the gived port.
> * @up: uart port
> *
> * Context: Must be called under up->lock to prevent manipulating
> * up->cons and migrating to another CPU.
> *
> * Note: The console might still loose the ownership by a hostile takeover.
> * But it can be done only by the final flush in panic() when other
> * CPUs should be stopped and other contexts interrupted.
> */
> void uart_nbcon_acquire(struct uart_port *up)
> {
> struct console *con; = up->cons;
>
> lockdep_assert_held(&up->lock);
>
> /*
> * FIXME: This would require adding WRITE_ONCE()
> * on the write part.
> *
> * Or even better, the value should be modified under
> * port->lock so that simple read would be enough here.
> */
> con = data_race(READ_ONCE(up->cons));
>
> if (!con)
> return;
>
> if (!console_read_flags(con) & CON_NBCON)
> return;
>
> nbcon_normal_context_acquire(con);
> }
>
> Note that I did not use up->nbcon_locked_port as explained above.

Note that it will not work because other ports will share the same
up->cons value even though they are not consoles. up->cons only
specifies which struct console to use _if_ printk chooses that port as a
console. It does _not_ mean that printk has chosen that port.

>> +void uart_nbcon_release(struct uart_port *up)
>> +{
>> + struct console *con = up->cons;
>> + struct nbcon_context ctxt = {
>> + .console = con,
>> + .prio = NBCON_PRIO_NORMAL,
>> + };
>> +
>
> I would add here as well.
>
> lockdep_assert_held(&up->lock);

OK.

> This deserves a comment why we do not complain when this function
> is called for nbcon and it is not locked. Something like:
>
> /*
> * Even port used by nbcon console might be seen unlocked
> * when it was locked and the console has been registered
> * at the same time.
> */

I think a more appropriate comment would be:

/*
* This function is called for ports that are not consoles
* and for ports that may be consoles but are not nbcon
* consoles. In those the cases the nbcon console was
* never locked and this context must not unlock.
*/

>> + if (!up->nbcon_locked_port)
>> + return;
>> +
>> + if (nbcon_context_exit_unsafe(&ctxt))
>> + nbcon_context_release(&ctxt);
>> +
>> + up->nbcon_locked_port = false;
>> +}
>
> Again I would better split the nbcon and uart part and create:

I can do the split, but I do not see the reason for it.

John

2024-03-13 09:51:47

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On 2024-03-11, John Ogness <[email protected]> wrote:
> And I think you identified a bug relating to the setup() callback.

Actually this bug was recently fixed by Peter Collingbourne:

https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/kernel/printk/printk.c?h=next-20240313&id=801410b26a0e8b8a16f7915b2b55c9528b69ca87

One nice thing that has risen from this is we are starting to see
exactly what the console lock is needed for. At this point I would say
its main function is synchronizing boot consoles with real
drivers. Which means we will not be able to remove the console lock
until we find a real solution to match boot consoles (which circumvent
the Linux driver model) with the real drivers.

John

2024-03-14 11:38:08

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On 2024-03-11, John Ogness <[email protected]> wrote:
>>> + if (!uart_is_nbcon(up))
>>> + return;
>>> +
>>> + WARN_ON_ONCE(up->nbcon_locked_port);
>>> +
>>> + do {
>>> + do {
>>> + memset(&ctxt, 0, sizeof(ctxt));
>>> + ctxt.console = con;
>>> + ctxt.prio = NBCON_PRIO_NORMAL;
>>> + } while (!nbcon_context_try_acquire(&ctxt));
>>> +
>>> + } while (!nbcon_context_enter_unsafe(&ctxt));
>>> +
>>> + up->nbcon_locked_port = true;
>>> +}
>>> +EXPORT_SYMBOL_GPL(uart_nbcon_acquire);
>>
>> I would prefer to split the uart and nbcon specific code, for
>> example:
>
> Can you explain why? This code will not be used anywhere else.

No need to respond to this point. The v3 will be quite different here,
but will follow your suggestion. I am splitting the uart-specific code
into serial_core.h and calling a generic nbcon function for the nbcon
locking.

John

2024-03-14 14:26:15

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Mon 2024-03-11 18:14:19, John Ogness wrote:
> Hi Petr,
>
> Thanks for the detailed feedback. Here is a lengthy response. I hope it
> clarifies the uart port and console fields. And I think you identified a
> bug relating to the setup() callback.
>
> On 2024-02-23, Petr Mladek <[email protected]> wrote:
> > My main (only) concern was the synchronization of the various accessed
> > variables, especially, port->cons.
>
> The only issue is if port->cons disappears between lock and unlock. I
> see there is code setting port->cons to NULL, although I do not know
> why. Once port->cons is set, there is never a reason to unset it.

I wonder if it might be needed for hotplugging of the device
or the driver. Well, I would expect that the structures won't exist
when the driver is not loaded and/or the device providing
the port does not exist.

But maybe, it is just a defensive programming style where unused
pointers are cleared.

> Regardless, I will add port->lock synchronization when modifying
> port->cons. There are only a few code sites and they are all during
> driver setup.
>
> > Note: I am not completely sure how the early and valid console drivers
> > share the same struct uart_port. So, maybe I miss some important
> > guarantee.
>
> The struct uart_port is _not_ shared between the early and normal
> consoles. However, the struct console is shared for normal consoles
> amoung various ports of a particular driver.

I see.

> > Anyway. synchronization of port->cons looks like a shade area.
> > IMHO, the existing code expects that it is used _only when the console
> > is registered_. But this patch wants to access it _even before
> > the console is registered_!
>
> Indeed. It is not enough for uart_is_nbcon() to check if it is an
> NBCON. It must also check if it is registered, locklessly:
>
> hlist_unhashed_lockless(&con->node);
>
> Most importantly to be sure that nbcon_init() has already been called.
> register_console() clears the nbcon state after cons->index has been
> set, but before the console has been added to the list.

Makes sense.

Well, it brings another question. Does this allow to have
the following situation?

CPU0 CPU1

some_function()
uart_port_lock()
// locked just with up->lock
// doing something with the port

register_console()
// add struct console using the same
// port as CPU0
printk()
console_try_lock()
console_unlock()
console_flush_all()
// acquire context for the newly
// registered nbcon
nbcon_context_try_acquire(ctxt)
con->write()

BANG: Both CPU0 and CPU1 are writing to the same port.

Reason: CPU0 locked only via port->lock.
CPU1 locked only by acquiring nbcon context.

Maybe, this is not possible because the console is registered when
the struct uart_port is being initialized and nobody could
use the same port in parallel, except for the early console.
Where the early console is serialized using the console_lock().

Hmm, if the parallel use of struct port_lock is not possible
during register_console then we probably do not even need
to serialize setting and clearing of port->cons.

By other words, I wonder if printk() is the only nasty user of
the uart ports. By "nasty user" I mean:

+ Using the same port even without struct uart_port by
early console.

+ Using the port via struct uart_port even before the device
is completely initialized. I assume that register_console()
is called from the driver init call.

For example, the device/port gets visible in sysfs. I wonder
if anyone could trigger an operation on the port which
it is being registered as a console.

Sigh, if we agree that the above race is possible then I can't think
of any elegant solution.

Well, it might be better to be on the safe side:

One solution would be to add nbcon consoles into the console_list
under uart_port_lock().

Another solution would be to make sure that any code serialized
by uart_port_lock() will be already synchronized by nbcon context
while the nbcon is added into the console_list. Maybe, we
could do this in con->setup() callback. Something like:

* @need_nbcon_context: true when uart_port_lock() has to acquire
nbcon context as well

struct uart_port {
bool need_nbcon_context;

int uart_console_setup(struct console *con, char *options)
{
struct uart_8250_port *up = &serial8250_ports[co->index];

spin_lock_irq(&up->lock);
up->need_nbcon_context=true;
spin_unlock_irq(&up->lock);

// do whatever the original uart_console_setup did
}

int uart_console_exit(struct console *con, char *options)
{
struct uart_8250_port *up = &serial8250_ports[co->index];

// do whatever the original uart_console_exit did

spin_lock_irq(&up->lock);
up->need_nbcon_context=false;
spin_unlock_irq(&up->lock);
}

We might even use this variable in uart_nbcon_acquire() to
decide whether we need to acquire the context or not.


> > For example, it took me quite a lot of time to shake my head around:
> >
> > #define uart_console(port) \
> > ((port)->cons && (port)->cons->index == (port)->line)
> >
> > + port->cons and port->line are updated in the uart code.
> > It seems that the update is not serialized by port->lock.
> > Something might be done under port->mutex.
> >
> > + cons->index is updated in register_console() under
> > console_list_lock.
> >
> > Spoiler: I propose a solution which does not use uart_console().
>
> This check is necessary because multiple ports of a driver will set and
> share the same port->cons value, even if they are not really the
> console. I looked into enforcing that port->cons is NULL if it is not a
> registered console, but this is tricky. port->cons is driver-internal
> and hidden from printk. The driver will set port->cons in case it
> becomes the console and printk will set cons->index once it has chosen
> which port will be the actual console. But there is no way to unset
> port->cons if a port was not chosen by printk.
>
> The various fields have the following meaning (AFAICT):
>
> port->line: An identifier to represent a particular port supported by a
> driver.
>
> port->cons: The struct console to use if this port is chosen to be a
> console.
>
> port->console: Boolean, true if this port was chosen to be a
> console. (Used only by the tty layer.)
>
> cons->index: The port chosen by printk to be a console.
>
> None of those fields specify if the port is currently registered as a
> console. For that you would need to check if port->cons->node is hashed
> and then verify that port->line matches port->cons->index. This is what
> uart_nbcon_acquire() needs to do (as well as check if it is an nbcon
> console).

This is a great description. It would be great to have it somewhere in
the sources. Maybe, above the locking/acquire functions.

> >> --- a/drivers/tty/serial/8250/8250_port.c
> >> +++ b/drivers/tty/serial/8250/8250_port.c
> >> @@ -3284,6 +3284,7 @@ void serial8250_init_port(struct uart_8250_port *up)
> >> struct uart_port *port = &up->port;
> >>
> >> spin_lock_init(&port->lock);
> >> + port->nbcon_locked_port = false;
> >
> > I am not sure if the variable really helps anything:
> >
> > + nbcon_context release() must handle the case when it
> > lost the ownership anyway.
>
> uart_nbcon_release() must first check if the provided port is a
> registered nbcon console. A simple boolean check is certainly faster
> than the 4 checks mentioned above. After all, if it was never locked,
> there is no reason to unlock it.

Fair enough.

> > We just need to make sure that port->cons can be cleared only
> > under port->lock. But this would be required even with
> > port->nbcon_locked_port.
>
> Agreed. I will add this.
>
> >> --- a/kernel/printk/nbcon.c
> >> +++ b/kernel/printk/nbcon.c
> >> +void uart_nbcon_acquire(struct uart_port *up)
> >> +{
> >> + struct console *con = up->cons;
> >> + struct nbcon_context ctxt;
> >
> > I would add:
> >
> > lockdep_assert_held(&up->lock);
>
> OK.
>
> >> +
> >> + if (!uart_is_nbcon(up))
> >> + return;
> >> +
> >> + WARN_ON_ONCE(up->nbcon_locked_port);
> >> +
> >> + do {
> >> + do {
> >> + memset(&ctxt, 0, sizeof(ctxt));
> >> + ctxt.console = con;
> >> + ctxt.prio = NBCON_PRIO_NORMAL;
> >> + } while (!nbcon_context_try_acquire(&ctxt));
> >> +
> >> + } while (!nbcon_context_enter_unsafe(&ctxt));
> >> +
> >> + up->nbcon_locked_port = true;
> >> +}
> >> +EXPORT_SYMBOL_GPL(uart_nbcon_acquire);
> >
> > I would prefer to split the uart and nbcon specific code, for example:
>
> Can you explain why? This code will not be used anywhere else.

It would have helped me with the review. The function is short
but it touches internals from both uart_port and nbcon words:

+ Implements new variant of nbcon_ctxt_acquire() API (busy loop, no timeout)

+ Checks and modifies struct uart_port details which affect
uart_port_lock() API.

IMHO, there is too much to think about in a single function ;-)


Best Regards,
Petr

2024-03-15 15:05:18

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On 2024-03-14, Petr Mladek <[email protected]> wrote:
> Well, it brings another question. Does this allow to have
> the following situation?
>
> CPU0 CPU1
>
> some_function()
> uart_port_lock()
> // locked just with up->lock
> // doing something with the port
>
> register_console()
> // add struct console using the same
> // port as CPU0
> printk()
> console_try_lock()
> console_unlock()
> console_flush_all()
> // acquire context for the newly
> // registered nbcon
> nbcon_context_try_acquire(ctxt)
> con->write()
>
> BANG: Both CPU0 and CPU1 are writing to the same port.
>
> Reason: CPU0 locked only via port->lock.
> CPU1 locked only by acquiring nbcon context.

Great catch! Yes, this is possible. :-/

When the kthread series part is introduced, there will be additional
callbacks that nbcon consoles must implement
(driver_enter()/driver_exit()). These provide driver-level
synchronization. In the case of serial uarts, the callbacks map to
locking/unlocking the port lock.

If I were to introduce those callbacks in _this_ series, they can be
used when adding a console to the list in register_console(). This
changes your example to:

CPU0 CPU1

some_function()
uart_port_lock()
// locked just with up->lock
// doing something with the port

register_console()
// add struct console using the same
// port as CPU0
newcon->driver_enter()
spin_lock(port_lock)
// spin on CPU0
uart_port_unlock()
// add new console to console list
newcon->driver_exit()
spin_unlock(port_lock)
...

If any other CPUs come in and call uart_port_lock(), they will see the
console as registered and will acquire the nbcon to avoid the BANG.

> Maybe, this is not possible because the console is registered when
> the struct uart_port is being initialized and nobody could
> use the same port in parallel, except for the early console.
> Where the early console is serialized using the console_lock().

Yes, it is possible. Just check out:

find /sys/ -name console -type f

If you echo 'Y' or 'N' into any of those files, you can dynamically
register and unregister those consoles, respectively.

I just ran some tests to verify this and was even able to trigger a
mainline bug because probe_baud() of the 8250 driver is not called under
the port lock. This is essentially the same scenario you
illustrated. But the 8250 probe_baud() issue is a driver bug and not
related to this series.

Getting back to this series, my proposal would change register_console()
like this:

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 68657d4d6649..25a0a81e8397 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3733,6 +3733,7 @@ void register_console(struct console *newcon)
struct console *con;
bool bootcon_registered = false;
bool realcon_registered = false;
+ unsigned long flags;
int err;

console_list_lock();
@@ -3831,6 +3832,19 @@ void register_console(struct console *newcon)
if (newcon->flags & CON_BOOT)
have_boot_console = true;

+ /*
+ * If another context is actively using the hardware of this new
+ * console, it will not be aware of the nbcon synchronization. This
+ * is a risk that two contexts could access the hardware
+ * simultaneously if this new console is used for atomic printing
+ * and the other context is still using the hardware.
+ *
+ * Use the driver synchronization to ensure that the hardware is not
+ * in use while this new console transitions to being registered.
+ */
+ if ((newcon->flags & CON_NBCON) && newcon->write_atomic)
+ newcon->driver_enter(newcon, &flags);
+
/*
* Put this console in the list - keep the
* preferred driver at the head of the list.
@@ -3855,6 +3869,10 @@ void register_console(struct console *newcon)
* register_console() completes.
*/

+ /* This new console is now registered. */
+ if ((newcon->flags & CON_NBCON) && newcon->write_atomic)
+ newcon->driver_exit(newcon, flags);
+
console_sysfs_notify();

/*

> One solution would be to add nbcon consoles into the console_list
> under uart_port_lock().

This is what I have proposed and I think it is the most straight forward
solution.

> Another solution would be to make sure that any code serialized
> by uart_port_lock() will be already synchronized by nbcon context
> while the nbcon is added into the console_list.

I do not think this would be acceptable. It would mean that non-console
ports would need to lock the nbcon. Not only will that slow down the
non-console ports, but it will also cause serious contention between the
ports. (Remember, all the ports share the same struct console.)

> Maybe, we could do this in con->setup() callback. Something like:

This proposal would work, but IMHO it adds too much complexity by
requiring console drivers to implement the callbacks and do special
things in those callbacks.

>> The various fields have the following meaning (AFAICT):
>>
>> port->line: An identifier to represent a particular port supported by a
>> driver.
>>
>> port->cons: The struct console to use if this port is chosen to be a
>> console.
>>
>> port->console: Boolean, true if this port was chosen to be a
>> console. (Used only by the tty layer.)
>>
>> cons->index: The port chosen by printk to be a console.
>>
> This is a great description. It would be great to have it somewhere in
> the sources. Maybe, above the locking/acquire functions.

OK.

John

2024-03-18 15:42:59

by Petr Mladek

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Fri 2024-03-15 16:10:18, John Ogness wrote:
> On 2024-03-14, Petr Mladek <[email protected]> wrote:
> > Well, it brings another question. Does this allow to have
> > the following situation?
> >
> > CPU0 CPU1
> >
> > some_function()
> > uart_port_lock()
> > // locked just with up->lock
> > // doing something with the port
> >
> > register_console()
> > // add struct console using the same
> > // port as CPU0
> > printk()
> > console_try_lock()
> > console_unlock()
> > console_flush_all()
> > // acquire context for the newly
> > // registered nbcon
> > nbcon_context_try_acquire(ctxt)
> > con->write()
> >
> > BANG: Both CPU0 and CPU1 are writing to the same port.
> >
> > Reason: CPU0 locked only via port->lock.
> > CPU1 locked only by acquiring nbcon context.
>
> Great catch! Yes, this is possible. :-/
>
> When the kthread series part is introduced, there will be additional
> callbacks that nbcon consoles must implement
> (driver_enter()/driver_exit()). These provide driver-level
> synchronization. In the case of serial uarts, the callbacks map to
> locking/unlocking the port lock.
>
> If I were to introduce those callbacks in _this_ series, they can be
> used when adding a console to the list in register_console(). This
> changes your example to:
>
> CPU0 CPU1
>
> some_function()
> uart_port_lock()
> // locked just with up->lock
> // doing something with the port
>
> register_console()
> // add struct console using the same
> // port as CPU0
> newcon->driver_enter()
> spin_lock(port_lock)
> // spin on CPU0
> uart_port_unlock()
> // add new console to console list
> newcon->driver_exit()
> spin_unlock(port_lock)
> ...
>
> If any other CPUs come in and call uart_port_lock(), they will see the
> console as registered and will acquire the nbcon to avoid the BANG.

Looks good. See below.

> > Maybe, this is not possible because the console is registered when
> > the struct uart_port is being initialized and nobody could
> > use the same port in parallel, except for the early console.
> > Where the early console is serialized using the console_lock().
>
> Yes, it is possible. Just check out:
>
> find /sys/ -name console -type f
>
> If you echo 'Y' or 'N' into any of those files, you can dynamically
> register and unregister those consoles, respectively.
>
> I just ran some tests to verify this and was even able to trigger a
> mainline bug because probe_baud() of the 8250 driver is not called under
> the port lock. This is essentially the same scenario you
> illustrated. But the 8250 probe_baud() issue is a driver bug and not
> related to this series.

Thanks a lot for checking it.

> Getting back to this series, my proposal would change register_console()
> like this:
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 68657d4d6649..25a0a81e8397 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -3733,6 +3733,7 @@ void register_console(struct console *newcon)
> struct console *con;
> bool bootcon_registered = false;
> bool realcon_registered = false;
> + unsigned long flags;
> int err;
>
> console_list_lock();
> @@ -3831,6 +3832,19 @@ void register_console(struct console *newcon)
> if (newcon->flags & CON_BOOT)
> have_boot_console = true;
>
> + /*
> + * If another context is actively using the hardware of this new
> + * console, it will not be aware of the nbcon synchronization. This
> + * is a risk that two contexts could access the hardware
> + * simultaneously if this new console is used for atomic printing
> + * and the other context is still using the hardware.
> + *
> + * Use the driver synchronization to ensure that the hardware is not
> + * in use while this new console transitions to being registered.
> + */
> + if ((newcon->flags & CON_NBCON) && newcon->write_atomic)
> + newcon->driver_enter(newcon, &flags);
> +
> /*
> * Put this console in the list - keep the
> * preferred driver at the head of the list.
> @@ -3855,6 +3869,10 @@ void register_console(struct console *newcon)
> * register_console() completes.
> */
>
> + /* This new console is now registered. */
> + if ((newcon->flags & CON_NBCON) && newcon->write_atomic)
> + newcon->driver_exit(newcon, flags);
> +
> console_sysfs_notify();
>
> /*
>
> > One solution would be to add nbcon consoles into the console_list
> > under uart_port_lock().
>
> This is what I have proposed and I think it is the most straight forward
> solution.
>
> > Another solution would be to make sure that any code serialized
> > by uart_port_lock() will be already synchronized by nbcon context
> > while the nbcon is added into the console_list.
>
> I do not think this would be acceptable. It would mean that non-console
> ports would need to lock the nbcon. Not only will that slow down the
> non-console ports, but it will also cause serious contention between the
> ports. (Remember, all the ports share the same struct console.)

I actually did not want to lock the nbcon for all ports. This is why
I proposed to do it in con->setup() where con->index is already set.
It might solve the problem without adding yet another callbacks.

That said, I like your solution with newcon->driver_enter()/exit()
callbacks. It seems to have an easier and more straightforward semantic.

Go for it, especially when you need these callbacks later in
the printk kthread.

Nit: I think about renaming the callbacks to"device_lock()
and device_unlock().

"(un)lock" probably better describes what the callbacks do.
register_console() does not want to do any operations
on the serial port. It just needs to serialize adding
the console into the list.

I suggest "device" because the callbacks will lock/unlock
the tty_driver pointed by "con->device".

>
> > Maybe, we could do this in con->setup() callback. Something like:
>
> This proposal would work, but IMHO it adds too much complexity by
> requiring console drivers to implement the callbacks and do special
> things in those callbacks.

Fair enough.

Best Regards,
Petr

2024-03-22 06:23:48

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

* John Ogness <[email protected]> [240313 09:50]:
> One nice thing that has risen from this is we are starting to see
> exactly what the console lock is needed for. At this point I would say
> its main function is synchronizing boot consoles with real
> drivers. Which means we will not be able to remove the console lock
> until we find a real solution to match boot consoles (which circumvent
> the Linux driver model) with the real drivers.

Would it help if earlycon handles all the boot consoles?
Then just have the serial driver take over when it probes?

Regards,

Tony




2024-03-27 09:35:36

by John Ogness

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On 2024-03-22, Tony Lindgren <[email protected]> wrote:
> * John Ogness <[email protected]> [240313 09:50]:
>> One nice thing that has risen from this is we are starting to see
>> exactly what the console lock is needed for. At this point I would say
>> its main function is synchronizing boot consoles with real
>> drivers. Which means we will not be able to remove the console lock
>> until we find a real solution to match boot consoles (which circumvent
>> the Linux driver model) with the real drivers.
>
> Would it help if earlycon handles all the boot consoles?
> Then just have the serial driver take over when it probes?

I think this would be very helpful. And it would also cleanup the boot
arguments. For example, we would no longer need the
architecture-specific arguments/options (such as "early_printk" and
"keep"). These architecture-specific arguments can be really
confusing. There have been so many times I see a developer cursing that
they can't get early debugging working, when I notice they are trying to
use "early_printk" on an arm64 system.

Browsing through

arch/x86/kernel/early_printk.c
arch/x86/boot/early_serial_console.c

you can see lots of examples of various early consoles and their special
tricks/hacks (such as pretending not to be a boot console when it really
is).

And pretty much every architecture has these. (git grep CON_BOOT)

Ideally it would be great if a console could register and say "taking
over for console X". Maybe with a new field:

struct console {
...
struct console *console_to_replace;
...
};

John Ogness

2024-03-27 18:09:19

by Andy Shevchenko

[permalink] [raw]
Subject: Re: [PATCH printk v2 08/26] printk: nbcon: Implement processing in port->lock wrapper

On Wed, Mar 27, 2024 at 10:38:15AM +0106, John Ogness wrote:
> On 2024-03-22, Tony Lindgren <[email protected]> wrote:
> > * John Ogness <[email protected]> [240313 09:50]:
> >> One nice thing that has risen from this is we are starting to see
> >> exactly what the console lock is needed for. At this point I would say
> >> its main function is synchronizing boot consoles with real
> >> drivers. Which means we will not be able to remove the console lock
> >> until we find a real solution to match boot consoles (which circumvent
> >> the Linux driver model) with the real drivers.
> >
> > Would it help if earlycon handles all the boot consoles?
> > Then just have the serial driver take over when it probes?
>
> I think this would be very helpful. And it would also cleanup the boot
> arguments. For example, we would no longer need the
> architecture-specific arguments/options (such as "early_printk" and
> "keep"). These architecture-specific arguments can be really
> confusing.

You may not get rid of earlyprintk as it affects *very* early at boot,
earlycon is simply not and may not be available at these stages.

--
With Best Regards,
Andy Shevchenko