2022-09-10 22:58:20

by Thomas Gleixner

[permalink] [raw]
Subject: [patch RFC 28/29] printk: Provide functions for atomic write enforcement

From: John Ogness <[email protected]>

Threaded printk is the preferred mechanism to tame the noisyness of printk,
but WARN/OOPS/PANIC require to print out immediately as the printer threads
might not be able to run.

Add per CPU state which denotes the priority/urgency of the output and
provide functions which flush the printk backlog during early boot and in
priority elevated contexts.

Signed-off-by: John Ogness <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
include/linux/console.h | 6 +
kernel/printk/printk.c | 26 +++--
kernel/printk/printk_nobkl.c | 217 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 240 insertions(+), 9 deletions(-)

--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,7 @@

#include <linux/atomic.h>
#include <linux/bits.h>
+#include <linux/irq_work.h>
#include <linux/rculist.h>
#include <linux/rcuwait.h>
#include <linux/types.h>
@@ -367,6 +368,7 @@ struct cons_context_data {
* @atomic_seq: Sequence for record tracking (32bit only)
* @kthread: Pointer to kernel thread
* @rcuwait: RCU wait for the kernel thread
+ * @irq_work: IRQ work for thread wakeup
* @kthread_running: Indicator whether the kthread is running
* @thread_txtbuf: Pointer to thread private buffer
* @write_atomic: Write callback for atomic context
@@ -400,6 +402,7 @@ struct console {
#endif
struct task_struct *kthread;
struct rcuwait rcuwait;
+ struct irq_work irq_work;
atomic_t kthread_running;
struct cons_text_buf *thread_txtbuf;

@@ -471,6 +474,9 @@ extern bool console_can_proceed(struct c
extern bool console_enter_unsafe(struct cons_write_context *wctxt);
extern bool console_exit_unsafe(struct cons_write_context *wctxt);

+extern enum cons_prio cons_atomic_enter(enum cons_prio prio);
+extern void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio);
+
extern int console_set_on_cmdline;
extern struct console *early_console;

--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1072,6 +1072,8 @@ static inline void log_buf_add_cpu(void)
#endif /* CONFIG_SMP */

static void cons_alloc_percpu_data(struct console *con);
+static void cons_atomic_flush(void);
+static void cons_wake_threads(void);

static void __init set_percpu_data_ready(void)
{
@@ -2270,17 +2272,21 @@ asmlinkage int vprintk_emit(int facility

printed_len = vprintk_store(facility, level, dev_info, fmt, args);

+ /*
+ * The caller may be holding system-critical or
+ * timing-sensitive locks. Disable preemption during
+ * printing of all remaining records to all consoles so that
+ * this context can return as soon as possible. Hopefully
+ * another printk() caller will take over the printing.
+ */
+ preempt_disable();
+
+ /* Flush the non-BKL consoles if required */
+ cons_atomic_flush();
+
/* If called from the scheduler, we can not call up(). */
if (!in_sched) {
/*
- * The caller may be holding system-critical or
- * timing-sensitive locks. Disable preemption during
- * printing of all remaining records to all consoles so that
- * this context can return as soon as possible. Hopefully
- * another printk() caller will take over the printing.
- */
- preempt_disable();
- /*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers. With the
* spinning variant, this context tries to take over the
@@ -2288,9 +2294,11 @@ asmlinkage int vprintk_emit(int facility
*/
if (console_trylock_spinning())
console_unlock();
- preempt_enable();
}

+ preempt_enable();
+
+ cons_wake_threads();
wake_up_klogd();
return printed_len;
}
--- a/kernel/printk/printk_nobkl.c
+++ b/kernel/printk/printk_nobkl.c
@@ -1231,6 +1231,222 @@ static inline void cons_kthread_wake(str
}

/**
+ * cons_irq_work - irq work to wake printk thread
+ * @irq_work: The irq work to operate on
+ */
+static void cons_irq_work(struct irq_work *irq_work)
+{
+ struct console *con = container_of(irq_work, struct console, irq_work);
+
+ cons_kthread_wake(con);
+}
+
+/**
+ * cons_wake_threads - Wake up printing threads
+ */
+static void cons_wake_threads(void)
+{
+ struct console *con;
+ int cookie;
+
+ cookie = srcu_read_lock(&console_srcu);
+ for_each_console_srcu(con) {
+ if (con->kthread && !atomic_read(&con->kthread_running))
+ irq_work_queue(&con->irq_work);
+ }
+ srcu_read_unlock(&console_srcu, cookie);
+}
+
+/**
+ * struct cons_cpu_state - Per CPU printk context state
+ * @prio: The current context priority level
+ * @nesting: Per priority nest counter
+ */
+struct cons_cpu_state {
+ enum cons_prio prio;
+ int nesting[CONS_PRIO_MAX];
+};
+
+static DEFINE_PER_CPU(struct cons_cpu_state, cons_pcpu_state);
+static struct cons_cpu_state early_cons_pcpu_state __initdata;
+
+/**
+ * cons_get_cpu_state - Get the per CPU console state pointer
+ *
+ * Returns either a pointer to the per CPU state of the current CPU or to
+ * the init data state during early boot.
+ */
+static __ref struct cons_cpu_state *cons_get_cpu_state(void)
+{
+ if (!printk_percpu_data_ready())
+ return &early_cons_pcpu_state;
+ else
+ return this_cpu_ptr(&cons_pcpu_state);
+}
+
+/**
+ * cons_get_wctxt - Get the write context for atomic printing
+ * @con: Console to operate on
+ * @prio: Priority of the context
+ *
+ * Returns either the per CPU context or the builtin context for
+ * early boot.
+ */
+static struct cons_write_context *cons_get_wctxt(struct console *con,
+ enum cons_prio prio)
+{
+ if (!con->pcpu_data)
+ return &con->ctxt_data.wctxt[prio];
+
+ return &this_cpu_ptr(con->pcpu_data)->wctxt[prio];
+}
+
+/**
+ * cons_atomic_try_acquire - Try to acquire the console for atomic printing
+ * @con: The console to acquire
+ * @ctxt: The console context instance to work on
+ * @prio: The priority of the current context
+ */
+static bool cons_atomic_try_acquire(struct console *con, struct cons_context *ctxt,
+ enum cons_prio prio)
+{
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->console = con;
+ ctxt->spinwait_max_us = 2000;
+ ctxt->prio = prio;
+ ctxt->spinwait = 1;
+
+ /* Try to acquire it directly or via a friendly handover */
+ if (cons_try_acquire(ctxt))
+ return true;
+
+ /* Investigate whether a hostile takeover is due */
+ if (ctxt->old_state.cur_prio >= prio)
+ return false;
+
+ ctxt->hostile = 1;
+ return cons_try_acquire(ctxt);
+}
+
+/**
+ * cons_atomic_flush_one - Flush one console in atomic mode
+ * @con: The console to flush
+ * @prio: The priority of the current context
+ */
+static void cons_atomic_flush_one(struct console *con, enum cons_prio prio)
+{
+ struct cons_write_context *wctxt = cons_get_wctxt(con, prio);
+ struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
+
+ if (!cons_atomic_try_acquire(con, ctxt, prio))
+ return;
+
+ do {
+ /*
+ * cons_emit_record() returns false when the console was
+ * handed over or taken over. In both cases the context is
+ * not longer valid.
+ */
+ if (!cons_emit_record(wctxt))
+ return;
+ } while (ctxt->backlog);
+
+ cons_release(ctxt);
+}
+
+/**
+ * cons_atomic_flush - Flush consoles in atomic mode if required
+ */
+static void cons_atomic_flush(void)
+{
+ struct cons_cpu_state *cpu_state;
+ struct console *con;
+ int cookie;
+
+ cpu_state = cons_get_cpu_state();
+
+ /*
+ * Let the outermost write of this priority print. This avoids
+ * nasty hackery for nested WARN() where the printing itself
+ * generates one.
+ *
+ * cpu_state->prio <= CONS_PRIO_NORMAL is not subject to nesting
+ * and it can fall through for early boot and for consoles which do
+ * not have a kthread (yet). For simplicity sake just fall through.
+ */
+ if (cpu_state->prio > CONS_PRIO_NORMAL &&
+ cpu_state->nesting[cpu_state->prio] != 1)
+ return;
+
+ cookie = srcu_read_lock(&console_srcu);
+ for_each_console_srcu(con) {
+ if (!con->write_atomic)
+ continue;
+
+ if (cpu_state->prio > CONS_PRIO_NORMAL || !con->kthread)
+ cons_atomic_flush_one(con, cpu_state->prio);
+ }
+ srcu_read_unlock(&console_srcu, cookie);
+}
+
+/**
+ * cons_atomic_enter - Enter a context which enforces atomic printing
+ * @prio: Priority of the context
+ *
+ * Returns: The previous priority which needs to be fed into
+ * the corresponding cons_atomic_exit()
+ */
+enum cons_prio cons_atomic_enter(enum cons_prio prio)
+{
+ struct cons_cpu_state *cpu_state;
+ enum cons_prio prev_prio;
+
+ migrate_disable();
+ cpu_state = cons_get_cpu_state();
+
+ prev_prio = cpu_state->prio;
+ if (prev_prio < prio)
+ cpu_state->prio = prio;
+
+ /*
+ * Increment the nesting on @cpu_state->prio so a WARN()
+ * nested into a panic printout does not attempt to
+ * scribble state.
+ */
+ cpu_state->nesting[cpu_state->prio]++;
+
+ return prev_prio;
+}
+
+/**
+ * cons_atomic_exit - Exit a context which enforces atomic printing
+ * @prio: Priority of the context to leave
+ * @prev_prio: Priority of the previous context for restore
+ *
+ * @prev_prio is the priority returned by the corresponding cons_atomic_enter().
+ */
+void cons_atomic_exit(enum cons_prio prio, enum cons_prio prev_prio)
+{
+ struct cons_cpu_state *cpu_state;
+
+ cpu_state = cons_get_cpu_state();
+
+ /*
+ * Undo the nesting of cons_atomic_enter() at the CPU state
+ * priority.
+ */
+ cpu_state->nesting[cpu_state->prio]--;
+
+ /*
+ * Restore the previous priority which was returned by
+ * cons_atomic_enter().
+ */
+ cpu_state->prio = prev_prio;
+
+ migrate_enable();
+}
+
+/**
* cons_kthread_stop - Stop a printk thread
* @con: Console to operate on
*/
@@ -1321,6 +1537,7 @@ static void cons_nobkl_init(struct conso
cons_alloc_percpu_data(con);
cons_forward_sequence(con);
rcuwait_init(&con->rcuwait);
+ init_irq_work(&con->irq_work, cons_irq_work);
cons_kthread_create(con);
cons_state_set(con, STATE_REAL, &state);
cons_kthread_wake(con);


2022-09-27 14:56:17

by John Ogness

[permalink] [raw]
Subject: Re: [patch RFC 28/29] printk: Provide functions for atomic write enforcement

Below is a fix that was used for the LPC2022 demo so that atomic
printing occurs in NMI context.

On 2022-09-11, Thomas Gleixner <[email protected]> wrote:
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -1072,6 +1072,8 @@ static inline void log_buf_add_cpu(void)
> #endif /* CONFIG_SMP */
>
> static void cons_alloc_percpu_data(struct console *con);
> +static void cons_atomic_flush(void);
> +static void cons_wake_threads(void);
>
> static void __init set_percpu_data_ready(void)
> {
> @@ -2270,17 +2272,21 @@ asmlinkage int vprintk_emit(int facility
>
> printed_len = vprintk_store(facility, level, dev_info, fmt, args);
>
> + /*
> + * The caller may be holding system-critical or
> + * timing-sensitive locks. Disable preemption during
> + * printing of all remaining records to all consoles so that
> + * this context can return as soon as possible. Hopefully
> + * another printk() caller will take over the printing.
> + */
> + preempt_disable();
> +
> + /* Flush the non-BKL consoles if required */
> + cons_atomic_flush();
> +
> /* If called from the scheduler, we can not call up(). */
> if (!in_sched) {
> /*
> - * The caller may be holding system-critical or
> - * timing-sensitive locks. Disable preemption during
> - * printing of all remaining records to all consoles so that
> - * this context can return as soon as possible. Hopefully
> - * another printk() caller will take over the printing.
> - */
> - preempt_disable();
> - /*
> * Try to acquire and then immediately release the console
> * semaphore. The release will print out buffers. With the
> * spinning variant, this context tries to take over the
> @@ -2288,9 +2294,11 @@ asmlinkage int vprintk_emit(int facility
> */
> if (console_trylock_spinning())
> console_unlock();
> - preempt_enable();
> }
>
> + preempt_enable();
> +
> + cons_wake_threads();
> wake_up_klogd();
> return printed_len;
> }

Atomic flushing also needs to occur for the cases where deferred
printing is used (NMI or explicit defer). The following hunk will take
care of that.

@@ -3648,6 +3658,8 @@ void wake_up_klogd(void)

void defer_console_output(void)
{
+ cons_atomic_flush();
+
/*
* New messages may have been added directly to the ringbuffer
* using vprintk_store(), so wake any waiters as well.

John Ogness

2022-09-27 15:03:37

by John Ogness

[permalink] [raw]
Subject: Re: [patch RFC 28/29] printk: Provide functions for atomic write enforcement

Below is a fix that was used for the LPC2022 demo so that an
interrupting context does not clobber the console state when the
interrupted context was the console owner.

On 2022-09-11, Thomas Gleixner <[email protected]> wrote:
> --- a/kernel/printk/printk_nobkl.c
> +++ b/kernel/printk/printk_nobkl.c
> +/**
> + * cons_atomic_flush_one - Flush one console in atomic mode
> + * @con: The console to flush
> + * @prio: The priority of the current context
> + */
> +static void cons_atomic_flush_one(struct console *con, enum cons_prio prio)
> +{
> + struct cons_write_context *wctxt = cons_get_wctxt(con, prio);
> + struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);

@ctxt is per-console, per-cpu, and per-prio. But it is *not*
per-context. If a CPU is in EMERGENCY priority and is interrupted by
another context that calls into printk() while the first context had the
console locked, the following cons_atomic_try_acquire() will use the
same ctxt and clobber the values used by the first context. This
corrupts the state information for the first context and results in
situations where the console is never unlocked because the owner's state
was corrupt.

> + if (!cons_atomic_try_acquire(con, ctxt, prio))
> + return;
> +
> + do {
> + /*
> + * cons_emit_record() returns false when the console was
> + * handed over or taken over. In both cases the context is
> + * not longer valid.
> + */
> + if (!cons_emit_record(wctxt))
> + return;
> + } while (ctxt->backlog);
> +
> + cons_release(ctxt);
> +}

Since it is not desirable to actively print from nested contexts
of the same priority, add an @in_use flag to wctxt, allowing to
detect the situation and avoid active printing and corrupting
the state.

Applying the following patch on top does just that:

diff --git a/include/linux/console.h b/include/linux/console.h
index e6bde0e879fc..1b3028cce3f3 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -317,6 +317,7 @@ struct cons_context {
* @len: Length to write
* @pos: Current write position in @outbuf
* @unsafe: Invoked in unsafe state due to force takeover
+ * @in_use: The context is in use
*/
struct cons_write_context {
struct cons_context __private ctxt;
@@ -324,6 +325,7 @@ struct cons_write_context {
unsigned int len;
unsigned int pos;
bool unsafe;
+ bool in_use;
};

#define CONS_MAX_NEST_LVL 8
diff --git a/kernel/printk/printk_nobkl.c b/kernel/printk/printk_nobkl.c
index 736f71361799..b513b2fb2c2d 100644
--- a/kernel/printk/printk_nobkl.c
+++ b/kernel/printk/printk_nobkl.c
@@ -1338,8 +1338,12 @@ static void cons_atomic_flush_one(struct console *con, enum cons_prio prio)
struct cons_write_context *wctxt = cons_get_wctxt(con, prio);
struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);

- if (!cons_atomic_try_acquire(con, ctxt, prio))
+ if (wctxt->in_use)
return;
+ wctxt->in_use = true;
+
+ if (!cons_atomic_try_acquire(con, ctxt, prio))
+ goto out;

do {
/*
@@ -1348,10 +1352,12 @@ static void cons_atomic_flush_one(struct console *con, enum cons_prio prio)
* not longer valid.
*/
if (!cons_emit_record(wctxt))
- return;
+ goto out;
} while (ctxt->backlog);

cons_release(ctxt);
+out:
+ wctxt->in_use = false;
}

/**

John Ogness

2022-09-27 15:45:46

by John Ogness

[permalink] [raw]
Subject: Re: [patch RFC 28/29] printk: Provide functions for atomic write enforcement

Below is a fix that was used for the LPC2022 demo to avoid unnecessarily
performing the console_lock/console_unlock dance.

Add a new global boolean @have_bkl_console to be able to quickly
identify if any legacy (bkl) consoles are registered. If there are none,
the console_lock/console_unlock stuff can be skipped. The following
patch does this and can be applied on top.

--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1830,6 +1830,7 @@ static struct lockdep_map console_owner_dep_map = {
static DEFINE_RAW_SPINLOCK(console_owner_lock);
static struct task_struct *console_owner;
static bool console_waiter;
+static bool have_bkl_console;

/**
* console_lock_spinning_enable - mark beginning of code where another
@@ -2285,7 +2286,7 @@ asmlinkage int vprintk_emit(int facility, int level,
cons_atomic_flush();

/* If called from the scheduler, we can not call up(). */
- if (!in_sched) {
+ if (!in_sched && have_bkl_console) {
/*
* Try to acquire and then immediately release the console
* semaphore. The release will print out buffers. With the
@@ -2575,7 +2576,7 @@ void resume_console(void)
*/
static int console_cpu_notify(unsigned int cpu)
{
- if (!cpuhp_tasks_frozen) {
+ if (!cpuhp_tasks_frozen && have_bkl_console) {
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();
@@ -3023,6 +3024,9 @@ void console_unblank(void)
{
struct console *c;

+ if (!have_bkl_console)
+ return;
+
/*
* console_unblank can no longer be called in interrupt context unless
* oops_in_progress is set to 1..
@@ -3052,6 +3056,9 @@ void console_unblank(void)
*/
void console_flush_on_panic(enum con_flush_mode mode)
{
+ if (!have_bkl_console)
+ return;
+
/*
* If someone else is holding the console lock, trylock will fail
* and may_schedule may be set. Ignore and proceed to unlock so
@@ -3311,6 +3318,10 @@ void register_console(struct console *newcon)
/* Initialize the nobkl data in @newcon */
cons_nobkl_init(newcon);

+ /* Has a legacy (BKL) console registered? */
+ if (!(newcon->flags & CON_NO_BKL))
+ have_bkl_console = true;
+
/*
* Put this console in the list and keep the referred driver at the
* head of the list.
@@ -3603,7 +3613,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
int pending = this_cpu_xchg(printk_pending, 0);

- if (pending & PRINTK_PENDING_OUTPUT) {
+ if (have_bkl_console && (pending & PRINTK_PENDING_OUTPUT)) {
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();

John Ogness

2022-09-27 15:49:13

by John Ogness

[permalink] [raw]
Subject: Re: [patch RFC 28/29] printk: Provide functions for atomic write enforcement

Below is a fix that was used for the LPC2022 demo so that after a
warning, the atomic printing context is not responsible for printing any
non-emergency backlog that came after the warning.

On 2022-09-11, Thomas Gleixner <[email protected]> wrote:
> --- a/kernel/printk/printk_nobkl.c
> +++ b/kernel/printk/printk_nobkl.c
> + * cons_atomic_flush_one - Flush one console in atomic mode
> + * @con: The console to flush
> + * @prio: The priority of the current context
> + */
> +static void cons_atomic_flush_one(struct console *con, enum cons_prio prio)
> +{
> + struct cons_write_context *wctxt = cons_get_wctxt(con, prio);
> + struct cons_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
> +
> + if (!cons_atomic_try_acquire(con, ctxt, prio))
> + return;
> +
> + do {
> + /*
> + * cons_emit_record() returns false when the console was
> + * handed over or taken over. In both cases the context is
> + * not longer valid.
> + */
> + if (!cons_emit_record(wctxt))
> + return;

If the CPU is no longer in an elevated priority and kthreads are
available, abort the atomic printing and let the kthreads take over. Add
the following break condition here:

/*
* If the CPU is no longer in an elevated priority, let the
* kthreads take over, if they are available.
*/
if (prio <= CONS_PRIO_NORMAL && con->kthread)
break;


> + } while (ctxt->backlog);
> +
> + cons_release(ctxt);
> +}

John Ogness