Hello,
This series removes @logbuf_lock, exposing the ringbuffer locklessly
to both readers and writers. And since @logbuf_lock was protecting
much more than just the ringbuffer, this series clarifies and cleans
up the various protections using comments, lockless accessors,
atomic types, and a new finer-grained @syslog_log.
This series takes into account the feedback from [0], which was a
series that was partially merged in 5.11.
The first few patches (1-5) are general cleanups to help simplify
later patches. They probably are important enough to stand alone.
John Ogness
[0] https://lkml.kernel.org/r/[email protected]
John Ogness (12):
printk: kmsg_dump: remove unused fields
printk: refactor kmsg_dump_get_buffer()
printk: consolidate kmsg_dump_get_buffer/syslog_print_all code
printk: define CONSOLE_LOG_MAX in printk.h
printk: use seqcount_latch for clear_seq
printk: use atomic64_t for devkmsg_user.seq
printk: add syslog_lock
printk: introduce a kmsg_dump iterator
um: synchronize kmsg_dumper
hv: synchronize kmsg_dumper
printk: remove logbuf_lock
printk: kmsg_dump: remove _nolock() variants
arch/powerpc/kernel/nvram_64.c | 12 +-
arch/powerpc/platforms/powernv/opal-kmsg.c | 3 +-
arch/powerpc/xmon/xmon.c | 6 +-
arch/um/kernel/kmsg_dump.c | 13 +-
drivers/hv/vmbus_drv.c | 17 +-
drivers/mtd/mtdoops.c | 5 +-
fs/pstore/platform.c | 5 +-
include/linux/kmsg_dump.h | 52 +--
include/linux/printk.h | 1 +
kernel/debug/kdb/kdb_main.c | 10 +-
kernel/printk/internal.h | 4 +-
kernel/printk/printk.c | 427 ++++++++++-----------
kernel/printk/printk_safe.c | 29 +-
13 files changed, 281 insertions(+), 303 deletions(-)
--
2.20.1
struct kmsg_dumper still contains some fields that were used to
iterate the old ringbuffer. They are no longer used. Remove them
and update the struct documentation.
Signed-off-by: John Ogness <[email protected]>
---
include/linux/kmsg_dump.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 3378bcbe585e..da98846098b5 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -36,6 +36,9 @@ enum kmsg_dump_reason {
* through the record iterator
* @max_reason: filter for highest reason number that should be dumped
* @registered: Flag that specifies if this is already registered
+ * @active: Flag that specifies if this is currently dumping
+ * @cur_seq: The record to dump (private)
+ * @next_seq: The first record of the next block (private)
*/
struct kmsg_dumper {
struct list_head list;
@@ -45,8 +48,6 @@ struct kmsg_dumper {
bool registered;
/* private state of the kmsg iterator */
- u32 cur_idx;
- u32 next_idx;
u64 cur_seq;
u64 next_seq;
};
--
2.20.1
kmsg_dump_get_buffer() requires nearly the same logic as
syslog_print_all(), but uses different variable names and
does not make use of the ringbuffer loop macros. Modify
kmsg_dump_get_buffer() so that the implementation is as similar
to syslog_print_all() as possible.
A follow-up commit will move this common logic into a
separate helper function.
Signed-off-by: John Ogness <[email protected]>
---
include/linux/kmsg_dump.h | 2 +-
kernel/printk/printk.c | 57 ++++++++++++++++++++-------------------
2 files changed, 30 insertions(+), 29 deletions(-)
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index da98846098b5..76cc4122d08e 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -62,7 +62,7 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
char *line, size_t size, size_t *len);
bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
- char *buf, size_t size, size_t *len);
+ char *buf, size_t size, size_t *len_out);
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c55cd1820689..1929aa372e7f 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3407,7 +3407,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
* read.
*/
bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
- char *buf, size_t size, size_t *len)
+ char *buf, size_t size, size_t *len_out)
{
struct printk_info info;
unsigned int line_count;
@@ -3415,12 +3415,10 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
unsigned long flags;
u64 seq;
u64 next_seq;
- size_t l = 0;
+ size_t len = 0;
bool ret = false;
bool time = printk_time;
- prb_rec_init_rd(&r, &info, buf, size);
-
if (!dumper->active || !buf || !size)
goto out;
@@ -3436,48 +3434,51 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
goto out;
}
- /* calculate length of entire buffer */
- seq = dumper->cur_seq;
- while (prb_read_valid_info(prb, seq, &info, &line_count)) {
- if (r.info->seq >= dumper->next_seq)
+ /*
+ * Find first record that fits, including all following records,
+ * into the user-provided buffer for this dump.
+ */
+
+ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
+ if (info.seq >= dumper->next_seq)
break;
- l += get_record_print_text_size(&info, line_count, syslog, time);
- seq = r.info->seq + 1;
+ len += get_record_print_text_size(&info, line_count, syslog, time);
}
- /* move first record forward until length fits into the buffer */
- seq = dumper->cur_seq;
- while (l >= size && prb_read_valid_info(prb, seq,
- &info, &line_count)) {
- if (r.info->seq >= dumper->next_seq)
+ /*
+ * Move first record forward until length fits into the buffer. Ignore
+ * newest messages that were not counted in the above cycle. Messages
+ * might appear and get lost in the meantime. This is the best effort
+ * that prevents an infinite loop.
+ */
+ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
+ if (len < size || info.seq >= dumper->next_seq)
break;
- l -= get_record_print_text_size(&info, line_count, syslog, time);
- seq = r.info->seq + 1;
+ len -= get_record_print_text_size(&info, line_count, syslog, time);
}
- /* last message in next interation */
+ /* Keep track of the last message for the next iteration. */
next_seq = seq;
- /* actually read text into the buffer now */
- l = 0;
- while (prb_read_valid(prb, seq, &r)) {
+ prb_rec_init_rd(&r, &info, buf, size);
+
+ len = 0;
+ prb_for_each_record(seq, prb, seq, &r) {
if (r.info->seq >= dumper->next_seq)
break;
- l += record_print_text(&r, syslog, time);
-
- /* adjust record to store to remaining buffer space */
- prb_rec_init_rd(&r, &info, buf + l, size - l);
+ len += record_print_text(&r, syslog, time);
- seq = r.info->seq + 1;
+ /* Adjust record to store to remaining buffer space. */
+ prb_rec_init_rd(&r, &info, buf + len, size - len);
}
dumper->next_seq = next_seq;
ret = true;
logbuf_unlock_irqrestore(flags);
out:
- if (len)
- *len = l;
+ if (len_out)
+ *len_out = len;
return ret;
}
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
--
2.20.1
kmsg_dump_rewind_nolock() locklessly reads @clear_seq. However,
this is not done atomically. Since @clear_seq is 64-bit, this
cannot be an atomic operation for all platforms. Therefore, use
a seqcount_latch to allow readers to always read a consistent
value.
Signed-off-by: John Ogness <[email protected]>
---
kernel/printk/printk.c | 58 ++++++++++++++++++++++++++++++++++++------
1 file changed, 50 insertions(+), 8 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5faf9c0db171..d41fd3306f4e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -402,8 +402,21 @@ static u64 console_seq;
static u64 exclusive_console_stop_seq;
static unsigned long console_dropped;
-/* the next printk record to read after the last 'clear' command */
-static u64 clear_seq;
+struct latched_seq {
+ seqcount_latch_t latch;
+ u64 val[2];
+};
+
+/*
+ * The next printk record to read after the last 'clear' command. There are
+ * two copies (updated with seqcount_latch) so that reads can locklessly
+ * access a valid value. Writers are synchronized by @logbuf_lock.
+ */
+static struct latched_seq clear_seq = {
+ .latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
+ .val[0] = 0,
+ .val[1] = 0,
+};
#ifdef CONFIG_PRINTK_CALLER
#define PREFIX_MAX 48
@@ -452,6 +465,31 @@ bool printk_percpu_data_ready(void)
return __printk_percpu_data_ready;
}
+/* Must be called under logbuf_lock. */
+static void latched_seq_write(struct latched_seq *ls, u64 val)
+{
+ raw_write_seqcount_latch(&ls->latch);
+ ls->val[0] = val;
+ raw_write_seqcount_latch(&ls->latch);
+ ls->val[1] = val;
+}
+
+/* Can be called from any context. */
+static u64 latched_seq_read_nolock(struct latched_seq *ls)
+{
+ unsigned int seq;
+ unsigned int idx;
+ u64 val;
+
+ do {
+ seq = raw_read_seqcount_latch(&ls->latch);
+ idx = seq & 0x1;
+ val = ls->val[idx];
+ } while (read_seqcount_latch_retry(&ls->latch, seq));
+
+ return val;
+}
+
/* Return log buffer address */
char *log_buf_addr_get(void)
{
@@ -797,7 +835,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
* changes no global state, and does not clear anything.
*/
- user->seq = clear_seq;
+ user->seq = latched_seq_read_nolock(&clear_seq);
break;
case SEEK_END:
/* after the last record */
@@ -955,6 +993,9 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_SIZE(atomic_long_t);
VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
+
+ VMCOREINFO_STRUCT_SIZE(latched_seq);
+ VMCOREINFO_OFFSET(latched_seq, val);
}
#endif
@@ -1543,7 +1584,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
- seq = find_first_fitting_seq(clear_seq, -1, size, &info, true, time);
+ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
+ size, &info, true, time);
prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
@@ -1570,7 +1612,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
}
if (clear)
- clear_seq = seq;
+ latched_seq_write(&clear_seq, seq);
logbuf_unlock_irq();
kfree(text);
@@ -1580,7 +1622,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
static void syslog_clear(void)
{
logbuf_lock_irq();
- clear_seq = prb_next_seq(prb);
+ latched_seq_write(&clear_seq, prb_next_seq(prb));
logbuf_unlock_irq();
}
@@ -3311,7 +3353,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
dumper->active = true;
logbuf_lock_irqsave(flags);
- dumper->cur_seq = clear_seq;
+ dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
dumper->next_seq = prb_next_seq(prb);
logbuf_unlock_irqrestore(flags);
@@ -3504,7 +3546,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
*/
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
{
- dumper->cur_seq = clear_seq;
+ dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
dumper->next_seq = prb_next_seq(prb);
}
--
2.20.1
@user->seq is indirectly protected by @logbuf_lock. Once @logbuf_lock
is removed, @user->seq will be no longer safe from an atomicity point
of view.
In preparation for the removal of @logbuf_lock, change it to
atomic64_t to provide this safety.
Signed-off-by: John Ogness <[email protected]>
---
kernel/printk/printk.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d41fd3306f4e..fe3df16315c0 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -657,7 +657,7 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
- u64 seq;
+ atomic64_t seq;
struct ratelimit_state rs;
struct mutex lock;
char buf[CONSOLE_EXT_LOG_MAX];
@@ -759,7 +759,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
return ret;
logbuf_lock_irq();
- if (!prb_read_valid(prb, user->seq, r)) {
+ if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
logbuf_unlock_irq();
@@ -768,15 +768,15 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
logbuf_unlock_irq();
ret = wait_event_interruptible(log_wait,
- prb_read_valid(prb, user->seq, r));
+ prb_read_valid(prb, atomic64_read(&user->seq), r));
if (ret)
goto out;
logbuf_lock_irq();
}
- if (user->seq < prb_first_valid_seq(prb)) {
+ if (atomic64_read(&user->seq) < prb_first_valid_seq(prb)) {
/* our last seen message is gone, return error and reset */
- user->seq = prb_first_valid_seq(prb);
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
ret = -EPIPE;
logbuf_unlock_irq();
goto out;
@@ -787,7 +787,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
&r->text_buf[0], r->info->text_len,
&r->info->dev_info);
- user->seq = r->info->seq + 1;
+ atomic64_set(&user->seq, r->info->seq + 1);
logbuf_unlock_irq();
if (len > count) {
@@ -827,7 +827,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case SEEK_SET:
/* the first record */
- user->seq = prb_first_valid_seq(prb);
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
break;
case SEEK_DATA:
/*
@@ -835,11 +835,11 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
* changes no global state, and does not clear anything.
*/
- user->seq = latched_seq_read_nolock(&clear_seq);
+ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
break;
case SEEK_END:
/* after the last record */
- user->seq = prb_next_seq(prb);
+ atomic64_set(&user->seq, prb_next_seq(prb));
break;
default:
ret = -EINVAL;
@@ -859,9 +859,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
logbuf_lock_irq();
- if (prb_read_valid(prb, user->seq, NULL)) {
+ if (prb_read_valid(prb, atomic64_read(&user->seq), NULL)) {
/* return error when data has vanished underneath us */
- if (user->seq < prb_first_valid_seq(prb))
+ if (atomic64_read(&user->seq) < prb_first_valid_seq(prb))
ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
else
ret = EPOLLIN|EPOLLRDNORM;
@@ -900,7 +900,7 @@ static int devkmsg_open(struct inode *inode, struct file *file)
&user->text_buf[0], sizeof(user->text_buf));
logbuf_lock_irq();
- user->seq = prb_first_valid_seq(prb);
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
logbuf_unlock_irq();
file->private_data = user;
--
2.20.1
The global variables @syslog_seq, @syslog_partial, @syslog_time
and write access to @clear_seq are protected by @logbuf_lock.
Once @logbuf_lock is removed, these variables will need their
own synchronization method. Introduce @syslog_lock for this
purpose.
Signed-off-by: John Ogness <[email protected]>
---
kernel/printk/printk.c | 34 ++++++++++++++++++++++++++++++----
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fe3df16315c0..506d334c065e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
printk_safe_exit_irqrestore(flags); \
} while (0)
+/* syslog_lock protects syslog_* variables and write access to clear_seq. */
+static DEFINE_RAW_SPINLOCK(syslog_lock);
+
#ifdef CONFIG_PRINTK
DECLARE_WAIT_QUEUE_HEAD(log_wait);
+/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static size_t syslog_partial;
@@ -410,7 +414,7 @@ struct latched_seq {
/*
* The next printk record to read after the last 'clear' command. There are
* two copies (updated with seqcount_latch) so that reads can locklessly
- * access a valid value. Writers are synchronized by @logbuf_lock.
+ * access a valid value. Writers are synchronized by @syslog_lock.
*/
static struct latched_seq clear_seq = {
.latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
@@ -465,7 +469,7 @@ bool printk_percpu_data_ready(void)
return __printk_percpu_data_ready;
}
-/* Must be called under logbuf_lock. */
+/* Must be called under syslog_lock. */
static void latched_seq_write(struct latched_seq *ls, u64 val)
{
raw_write_seqcount_latch(&ls->latch);
@@ -1515,7 +1519,9 @@ static int syslog_print(char __user *buf, int size)
size_t skip;
logbuf_lock_irq();
+ raw_spin_lock(&syslog_lock);
if (!prb_read_valid(prb, syslog_seq, &r)) {
+ raw_spin_unlock(&syslog_lock);
logbuf_unlock_irq();
break;
}
@@ -1545,6 +1551,7 @@ static int syslog_print(char __user *buf, int size)
syslog_partial += n;
} else
n = 0;
+ raw_spin_unlock(&syslog_lock);
logbuf_unlock_irq();
if (!n)
@@ -1611,8 +1618,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
break;
}
- if (clear)
+ if (clear) {
+ raw_spin_lock(&syslog_lock);
latched_seq_write(&clear_seq, seq);
+ raw_spin_unlock(&syslog_lock);
+ }
logbuf_unlock_irq();
kfree(text);
@@ -1622,7 +1632,9 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
static void syslog_clear(void)
{
logbuf_lock_irq();
+ raw_spin_lock(&syslog_lock);
latched_seq_write(&clear_seq, prb_next_seq(prb));
+ raw_spin_unlock(&syslog_lock);
logbuf_unlock_irq();
}
@@ -1631,6 +1643,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
bool clear = false;
static int saved_console_loglevel = LOGLEVEL_DEFAULT;
int error;
+ u64 seq;
error = check_syslog_permissions(type, source);
if (error)
@@ -1648,8 +1661,14 @@ int do_syslog(int type, char __user *buf, int len, int source)
return 0;
if (!access_ok(buf, len))
return -EFAULT;
+
+ /* Get a consistent copy of @syslog_seq. */
+ raw_spin_lock_irq(&syslog_lock);
+ seq = syslog_seq;
+ raw_spin_unlock_irq(&syslog_lock);
+
error = wait_event_interruptible(log_wait,
- prb_read_valid(prb, syslog_seq, NULL));
+ prb_read_valid(prb, seq, NULL));
if (error)
return error;
error = syslog_print(buf, len);
@@ -1698,6 +1717,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
/* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD:
logbuf_lock_irq();
+ raw_spin_lock(&syslog_lock);
if (syslog_seq < prb_first_valid_seq(prb)) {
/* messages are gone, move to first one */
syslog_seq = prb_first_valid_seq(prb);
@@ -1724,6 +1744,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
}
error -= syslog_partial;
}
+ raw_spin_unlock(&syslog_lock);
logbuf_unlock_irq();
break;
/* Size of the log buffer */
@@ -2965,7 +2986,12 @@ void register_console(struct console *newcon)
*/
exclusive_console = newcon;
exclusive_console_stop_seq = console_seq;
+
+ /* Get a consistent copy of @syslog_seq. */
+ raw_spin_lock(&syslog_lock);
console_seq = syslog_seq;
+ raw_spin_unlock(&syslog_lock);
+
logbuf_unlock_irqrestore(flags);
}
console_unlock();
--
2.20.1
CONSOLE_EXT_LOG_MAX for extended console messages is already defined
in printk.h. Define CONSOLE_LOG_MAX there as well so that future
changes can make use of the constant for non-extended console
messages.
Use CONSOLE_LOG_MAX instead of LOG_LINE_MAX + PREFIX_MAX.
Signed-off-by: John Ogness <[email protected]>
---
include/linux/printk.h | 1 +
kernel/printk/printk.c | 14 ++++++--------
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index fe7eb2351610..6d8f844bfdff 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -45,6 +45,7 @@ static inline const char *printk_skip_headers(const char *buffer)
}
#define CONSOLE_EXT_LOG_MAX 8192
+#define CONSOLE_LOG_MAX 1024
/* printk's without a loglevel use this.. */
#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ec2174882b8e..5faf9c0db171 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -410,7 +410,7 @@ static u64 clear_seq;
#else
#define PREFIX_MAX 32
#endif
-#define LOG_LINE_MAX (1024 - PREFIX_MAX)
+#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX)
#define LOG_LEVEL(v) ((v) & 0x07)
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
@@ -1463,11 +1463,11 @@ static int syslog_print(char __user *buf, int size)
char *text;
int len = 0;
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
while (size > 0) {
size_t n;
@@ -1533,7 +1533,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
u64 seq;
bool time;
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
@@ -1545,7 +1545,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
*/
seq = find_first_fitting_seq(clear_seq, -1, size, &info, true, time);
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
len = 0;
prb_for_each_record(seq, prb, seq, &r) {
@@ -2173,8 +2173,6 @@ EXPORT_SYMBOL(printk);
#else /* CONFIG_PRINTK */
-#define LOG_LINE_MAX 0
-#define PREFIX_MAX 0
#define printk_time false
#define prb_read_valid(rb, seq, r) false
@@ -2485,7 +2483,7 @@ static inline int can_use_console(void)
void console_unlock(void)
{
static char ext_text[CONSOLE_EXT_LOG_MAX];
- static char text[LOG_LINE_MAX + PREFIX_MAX];
+ static char text[CONSOLE_LOG_MAX];
unsigned long flags;
bool do_cond_resched, retry;
struct printk_info info;
--
2.20.1
Rather than store the iterator information into the registered
kmsg_dump structure, create a separate iterator structure. The
kmsg_dump_iter structure can reside on the stack of the caller,
thus allowing lockless use of the kmsg_dump functions.
This is in preparation for removal of @logbuf_lock.
Signed-off-by: John Ogness <[email protected]>
---
arch/powerpc/kernel/nvram_64.c | 12 +++--
arch/powerpc/platforms/powernv/opal-kmsg.c | 3 +-
arch/powerpc/xmon/xmon.c | 6 +--
arch/um/kernel/kmsg_dump.c | 5 +-
drivers/hv/vmbus_drv.c | 5 +-
drivers/mtd/mtdoops.c | 5 +-
fs/pstore/platform.c | 5 +-
include/linux/kmsg_dump.h | 43 ++++++++-------
kernel/debug/kdb/kdb_main.c | 10 ++--
kernel/printk/printk.c | 63 +++++++++++-----------
10 files changed, 83 insertions(+), 74 deletions(-)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 532f22637783..1ef55f4b389a 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -73,7 +73,8 @@ static const char *nvram_os_partitions[] = {
};
static void oops_to_nvram(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason);
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter);
static struct kmsg_dumper nvram_kmsg_dumper = {
.dump = oops_to_nvram
@@ -643,7 +644,8 @@ void __init nvram_init_oops_partition(int rtas_partition_exists)
* partition. If that's too much, go back and capture uncompressed text.
*/
static void oops_to_nvram(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter)
{
struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
static unsigned int oops_count = 0;
@@ -681,13 +683,13 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
return;
if (big_oops_buf) {
- kmsg_dump_get_buffer(dumper, false,
+ kmsg_dump_get_buffer(iter, false,
big_oops_buf, big_oops_buf_sz, &text_len);
rc = zip_oops(text_len);
}
if (rc != 0) {
- kmsg_dump_rewind(dumper);
- kmsg_dump_get_buffer(dumper, false,
+ kmsg_dump_rewind(iter);
+ kmsg_dump_get_buffer(iter, false,
oops_data, oops_data_sz, &text_len);
err_type = ERR_TYPE_KERNEL_PANIC;
oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
index 6c3bc4b4da98..ec862846bc82 100644
--- a/arch/powerpc/platforms/powernv/opal-kmsg.c
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -20,7 +20,8 @@
* message, it just ensures that OPAL completely flushes the console buffer.
*/
static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter)
{
/*
* Outside of a panic context the pollers will continue to run,
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 55c43a6c9111..43162b885259 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3003,7 +3003,7 @@ print_address(unsigned long addr)
static void
dump_log_buf(void)
{
- struct kmsg_dumper dumper = { .active = 1 };
+ struct kmsg_dumper_iter iter = { .active = 1 };
unsigned char buf[128];
size_t len;
@@ -3015,9 +3015,9 @@ dump_log_buf(void)
catch_memory_errors = 1;
sync();
- kmsg_dump_rewind_nolock(&dumper);
+ kmsg_dump_rewind_nolock(&iter);
xmon_start_pagination();
- while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) {
+ while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) {
buf[len] = '\0';
printf("%s", buf);
}
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index e4abac6c9727..f38349ad00ea 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -6,7 +6,8 @@
#include <os.h>
static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter)
{
static char line[1024];
struct console *con;
@@ -25,7 +26,7 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
return;
printf("kmsg_dump:\n");
- while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) {
+ while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
line[len] = '\0';
printf("%s", line);
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4fad3e6745e5..fbeddef90941 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1359,7 +1359,8 @@ static void vmbus_isr(void)
* buffer and call into Hyper-V to transfer the data.
*/
static void hv_kmsg_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter)
{
size_t bytes_written;
phys_addr_t panic_pa;
@@ -1374,7 +1375,7 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
* Write dump contents to the page. No need to synchronize; panic should
* be single-threaded.
*/
- kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE,
+ kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (bytes_written)
hyperv_report_panic_msg(panic_pa, bytes_written);
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 774970bfcf85..6bc2c728adb7 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -267,7 +267,8 @@ static void find_next_position(struct mtdoops_context *cxt)
}
static void mtdoops_do_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter)
{
struct mtdoops_context *cxt = container_of(dumper,
struct mtdoops_context, dump);
@@ -276,7 +277,7 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper,
if (reason == KMSG_DUMP_OOPS && !dump_oops)
return;
- kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
+ kmsg_dump_get_buffer(iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
record_size - MTDOOPS_HEADER_SIZE, NULL);
if (reason != KMSG_DUMP_OOPS) {
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 36714df37d5d..a939559b0c9a 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -383,7 +383,8 @@ void pstore_record_init(struct pstore_record *record,
* end of the buffer.
*/
static void pstore_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason)
+ enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter)
{
unsigned long total = 0;
const char *why;
@@ -435,7 +436,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
dst_size -= header_size;
/* Write dump contents. */
- if (!kmsg_dump_get_buffer(dumper, true, dst + header_size,
+ if (!kmsg_dump_get_buffer(iter, true, dst + header_size,
dst_size, &dump_size))
break;
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 76cc4122d08e..ecc98f549d93 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -29,6 +29,18 @@ enum kmsg_dump_reason {
KMSG_DUMP_MAX
};
+/**
+ * struct kmsg_dumper_iter - iterator for kernel crash message dumper
+ * @active: Flag that specifies if this is currently dumping
+ * @cur_seq: The record to dump (private)
+ * @next_seq: The first record of the next block (private)
+ */
+struct kmsg_dumper_iter {
+ bool active;
+ u64 cur_seq;
+ u64 next_seq;
+};
+
/**
* struct kmsg_dumper - kernel crash message dumper structure
* @list: Entry in the dumper list (private)
@@ -36,37 +48,30 @@ enum kmsg_dump_reason {
* through the record iterator
* @max_reason: filter for highest reason number that should be dumped
* @registered: Flag that specifies if this is already registered
- * @active: Flag that specifies if this is currently dumping
- * @cur_seq: The record to dump (private)
- * @next_seq: The first record of the next block (private)
*/
struct kmsg_dumper {
struct list_head list;
- void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
+ void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
+ struct kmsg_dumper_iter *iter);
enum kmsg_dump_reason max_reason;
- bool active;
bool registered;
-
- /* private state of the kmsg iterator */
- u64 cur_seq;
- u64 next_seq;
};
#ifdef CONFIG_PRINTK
void kmsg_dump(enum kmsg_dump_reason reason);
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
char *line, size_t size, size_t *len);
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
char *line, size_t size, size_t *len);
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
char *buf, size_t size, size_t *len_out);
-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
+void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter);
-void kmsg_dump_rewind(struct kmsg_dumper *dumper);
+void kmsg_dump_rewind(struct kmsg_dumper_iter *dumper_iter);
int kmsg_dump_register(struct kmsg_dumper *dumper);
@@ -78,30 +83,30 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
{
}
-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper,
+static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter,
bool syslog, const char *line,
size_t size, size_t *len)
{
return false;
}
-static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
const char *line, size_t size, size_t *len)
{
return false;
}
-static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+static inline bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
char *buf, size_t size, size_t *len)
{
return false;
}
-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
+static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
{
}
-static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
{
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 930ac1b25ec7..7ae9da245e4b 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const char **argv)
int adjust = 0;
int n = 0;
int skip = 0;
- struct kmsg_dumper dumper = { .active = 1 };
+ struct kmsg_dumper_iter iter = { .active = 1 };
size_t len;
char buf[201];
@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const char **argv)
kdb_set(2, setargs);
}
- kmsg_dump_rewind_nolock(&dumper);
- while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
+ kmsg_dump_rewind_nolock(&iter);
+ while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL))
n++;
if (lines < 0) {
@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const char **argv)
if (skip >= n || skip < 0)
return 0;
- kmsg_dump_rewind_nolock(&dumper);
- while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
+ kmsg_dump_rewind_nolock(&iter);
+ while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) {
if (skip) {
skip--;
continue;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 506d334c065e..d14a4afc5b72 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3357,6 +3357,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
*/
void kmsg_dump(enum kmsg_dump_reason reason)
{
+ struct kmsg_dumper_iter iter;
struct kmsg_dumper *dumper;
unsigned long flags;
@@ -3376,25 +3377,21 @@ void kmsg_dump(enum kmsg_dump_reason reason)
continue;
/* initialize iterator with data about the stored records */
- dumper->active = true;
-
+ iter.active = true;
logbuf_lock_irqsave(flags);
- dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
- dumper->next_seq = prb_next_seq(prb);
+ iter.cur_seq = latched_seq_read_nolock(&clear_seq);
+ iter.next_seq = prb_next_seq(prb);
logbuf_unlock_irqrestore(flags);
/* invoke dumper which will iterate over records */
- dumper->dump(dumper, reason);
-
- /* reset iterator */
- dumper->active = false;
+ dumper->dump(dumper, reason, &iter);
}
rcu_read_unlock();
}
/**
* kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
- * @dumper: registered kmsg dumper
+ * @iter: kmsg dumper iterator
* @syslog: include the "<4>" prefixes
* @line: buffer to copy the line to
* @size: maximum size of the buffer
@@ -3411,7 +3408,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
*
* The function is similar to kmsg_dump_get_line(), but grabs no locks.
*/
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
char *line, size_t size, size_t *len)
{
struct printk_info info;
@@ -3422,16 +3419,16 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
prb_rec_init_rd(&r, &info, line, size);
- if (!dumper->active)
+ if (!iter->active)
goto out;
/* Read text or count text lines? */
if (line) {
- if (!prb_read_valid(prb, dumper->cur_seq, &r))
+ if (!prb_read_valid(prb, iter->cur_seq, &r))
goto out;
l = record_print_text(&r, syslog, printk_time);
} else {
- if (!prb_read_valid_info(prb, dumper->cur_seq,
+ if (!prb_read_valid_info(prb, iter->cur_seq,
&info, &line_count)) {
goto out;
}
@@ -3440,7 +3437,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
}
- dumper->cur_seq = r.info->seq + 1;
+ iter->cur_seq = r.info->seq + 1;
ret = true;
out:
if (len)
@@ -3450,7 +3447,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
/**
* kmsg_dump_get_line - retrieve one kmsg log line
- * @dumper: registered kmsg dumper
+ * @iter: kmsg dumper iterator
* @syslog: include the "<4>" prefixes
* @line: buffer to copy the line to
* @size: maximum size of the buffer
@@ -3465,14 +3462,14 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
* A return value of FALSE indicates that there are no more records to
* read.
*/
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
char *line, size_t size, size_t *len)
{
unsigned long flags;
bool ret;
logbuf_lock_irqsave(flags);
- ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
+ ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
logbuf_unlock_irqrestore(flags);
return ret;
@@ -3481,7 +3478,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
/**
* kmsg_dump_get_buffer - copy kmsg log lines
- * @dumper: registered kmsg dumper
+ * @iter: kmsg dumper iterator
* @syslog: include the "<4>" prefixes
* @buf: buffer to copy the line to
* @size: maximum size of the buffer
@@ -3498,7 +3495,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
* A return value of FALSE indicates that there are no more records to
* read.
*/
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
char *buf, size_t size, size_t *len_out)
{
struct printk_info info;
@@ -3510,17 +3507,17 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
bool ret = false;
bool time = printk_time;
- if (!dumper->active || !buf || !size)
+ if (!iter->active || !buf || !size)
goto out;
logbuf_lock_irqsave(flags);
- if (dumper->cur_seq < prb_first_valid_seq(prb)) {
+ if (iter->cur_seq < prb_first_valid_seq(prb)) {
/* messages are gone, move to first available one */
- dumper->cur_seq = prb_first_valid_seq(prb);
+ iter->cur_seq = prb_first_valid_seq(prb);
}
/* last entry */
- if (dumper->cur_seq >= dumper->next_seq) {
+ if (iter->cur_seq >= iter->next_seq) {
logbuf_unlock_irqrestore(flags);
goto out;
}
@@ -3531,7 +3528,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
* because this function (by way of record_print_text()) will
* not write more than size-1 bytes of text into @buf.
*/
- seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq,
+ seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
size - 1, &info, syslog, time);
/* Keep track of the last message for the next iteration. */
@@ -3541,7 +3538,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
len = 0;
prb_for_each_record(seq, prb, seq, &r) {
- if (r.info->seq >= dumper->next_seq)
+ if (r.info->seq >= iter->next_seq)
break;
len += record_print_text(&r, syslog, time);
@@ -3550,7 +3547,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
prb_rec_init_rd(&r, &info, buf + len, size - len);
}
- dumper->next_seq = next_seq;
+ iter->next_seq = next_seq;
ret = true;
logbuf_unlock_irqrestore(flags);
out:
@@ -3562,7 +3559,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
/**
* kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
- * @dumper: registered kmsg dumper
+ * @iter: kmsg dumper iterator
*
* Reset the dumper's iterator so that kmsg_dump_get_line() and
* kmsg_dump_get_buffer() can be called again and used multiple
@@ -3570,26 +3567,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
*
* The function is similar to kmsg_dump_rewind(), but grabs no locks.
*/
-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
+void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
{
- dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
- dumper->next_seq = prb_next_seq(prb);
+ iter->cur_seq = latched_seq_read_nolock(&clear_seq);
+ iter->next_seq = prb_next_seq(prb);
}
/**
* kmsg_dump_rewind - reset the iterator
- * @dumper: registered kmsg dumper
+ * @iter: kmsg dumper iterator
*
* Reset the dumper's iterator so that kmsg_dump_get_line() and
* kmsg_dump_get_buffer() can be called again and used multiple
* times within the same dumper.dump() callback.
*/
-void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
{
unsigned long flags;
logbuf_lock_irqsave(flags);
- kmsg_dump_rewind_nolock(dumper);
+ kmsg_dump_rewind_nolock(iter);
logbuf_unlock_irqrestore(flags);
}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
--
2.20.1
Since the ringbuffer is lockless, there is no need for it to be
protected by @logbuf_lock. Remove @logbuf_lock.
This means that printk_nmi_direct and printk_safe_flush_on_panic()
no longer need to acquire any lock to run.
@console_seq, @exclusive_console_stop_seq, @console_dropped are
protected by @console_lock.
Signed-off-by: John Ogness <[email protected]>
---
kernel/printk/internal.h | 4 +-
kernel/printk/printk.c | 134 +++++++++++-------------------------
kernel/printk/printk_safe.c | 29 +++-----
3 files changed, 50 insertions(+), 117 deletions(-)
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 3a8fd491758c..e7acc2888c8e 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -12,8 +12,6 @@
#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000
-extern raw_spinlock_t logbuf_lock;
-
__printf(4, 0)
int vprintk_store(int facility, int level,
const struct dev_printk_info *dev_info,
@@ -59,7 +57,7 @@ void defer_console_output(void);
__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
/*
- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem
+ * In !PRINTK builds we still export console_sem
* semaphore and some of console functions (console_unlock()/etc.), so
* printk-safe must preserve the existing local IRQ guarantees.
*/
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d14a4afc5b72..b57dba7f077d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -355,41 +355,6 @@ enum log_flags {
LOG_CONT = 8, /* text is a fragment of a continuation line */
};
-/*
- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
- * within the scheduler's rq lock. It must be released before calling
- * console_unlock() or anything else that might wake up a process.
- */
-DEFINE_RAW_SPINLOCK(logbuf_lock);
-
-/*
- * Helper macros to lock/unlock logbuf_lock and switch between
- * printk-safe/unsafe modes.
- */
-#define logbuf_lock_irq() \
- do { \
- printk_safe_enter_irq(); \
- raw_spin_lock(&logbuf_lock); \
- } while (0)
-
-#define logbuf_unlock_irq() \
- do { \
- raw_spin_unlock(&logbuf_lock); \
- printk_safe_exit_irq(); \
- } while (0)
-
-#define logbuf_lock_irqsave(flags) \
- do { \
- printk_safe_enter_irqsave(flags); \
- raw_spin_lock(&logbuf_lock); \
- } while (0)
-
-#define logbuf_unlock_irqrestore(flags) \
- do { \
- raw_spin_unlock(&logbuf_lock); \
- printk_safe_exit_irqrestore(flags); \
- } while (0)
-
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_RAW_SPINLOCK(syslog_lock);
@@ -401,6 +366,7 @@ static u64 syslog_seq;
static size_t syslog_partial;
static bool syslog_time;
+/* All 3 protected by @console_sem. */
/* the next printk record to write to the console */
static u64 console_seq;
static u64 exclusive_console_stop_seq;
@@ -762,27 +728,27 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (ret)
return ret;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
goto out;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
ret = wait_event_interruptible(log_wait,
prb_read_valid(prb, atomic64_read(&user->seq), r));
if (ret)
goto out;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
}
if (atomic64_read(&user->seq) < prb_first_valid_seq(prb)) {
/* our last seen message is gone, return error and reset */
atomic64_set(&user->seq, prb_first_valid_seq(prb));
ret = -EPIPE;
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
goto out;
}
@@ -792,7 +758,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
&r->info->dev_info);
atomic64_set(&user->seq, r->info->seq + 1);
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
if (len > count) {
ret = -EINVAL;
@@ -827,7 +793,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
if (offset)
return -ESPIPE;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
switch (whence) {
case SEEK_SET:
/* the first record */
@@ -848,7 +814,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
default:
ret = -EINVAL;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
return ret;
}
@@ -862,7 +828,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
- logbuf_lock_irq();
+ printk_safe_enter_irq();
if (prb_read_valid(prb, atomic64_read(&user->seq), NULL)) {
/* return error when data has vanished underneath us */
if (atomic64_read(&user->seq) < prb_first_valid_seq(prb))
@@ -870,7 +836,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
else
ret = EPOLLIN|EPOLLRDNORM;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
return ret;
}
@@ -903,9 +869,9 @@ static int devkmsg_open(struct inode *inode, struct file *file)
prb_rec_init_rd(&user->record, &user->info,
&user->text_buf[0], sizeof(user->text_buf));
- logbuf_lock_irq();
+ printk_safe_enter_irq();
atomic64_set(&user->seq, prb_first_valid_seq(prb));
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
file->private_data = user;
return 0;
@@ -1518,11 +1484,11 @@ static int syslog_print(char __user *buf, int size)
size_t n;
size_t skip;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
raw_spin_lock(&syslog_lock);
if (!prb_read_valid(prb, syslog_seq, &r)) {
raw_spin_unlock(&syslog_lock);
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
break;
}
if (r.info->seq != syslog_seq) {
@@ -1552,7 +1518,7 @@ static int syslog_print(char __user *buf, int size)
} else
n = 0;
raw_spin_unlock(&syslog_lock);
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
if (!n)
break;
@@ -1586,7 +1552,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
return -ENOMEM;
time = printk_time;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
/*
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
@@ -1607,12 +1573,12 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
break;
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
if (copy_to_user(buf + len, text, textlen))
len = -EFAULT;
else
len += textlen;
- logbuf_lock_irq();
+ printk_safe_enter_irq();
if (len < 0)
break;
@@ -1623,7 +1589,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
latched_seq_write(&clear_seq, seq);
raw_spin_unlock(&syslog_lock);
}
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
kfree(text);
return len;
@@ -1631,11 +1597,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
static void syslog_clear(void)
{
- logbuf_lock_irq();
+ printk_safe_enter_irq();
raw_spin_lock(&syslog_lock);
latched_seq_write(&clear_seq, prb_next_seq(prb));
raw_spin_unlock(&syslog_lock);
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
}
int do_syslog(int type, char __user *buf, int len, int source)
@@ -1716,7 +1682,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
break;
/* Number of chars in the log buffer */
case SYSLOG_ACTION_SIZE_UNREAD:
- logbuf_lock_irq();
+ printk_safe_enter_irq();
raw_spin_lock(&syslog_lock);
if (syslog_seq < prb_first_valid_seq(prb)) {
/* messages are gone, move to first one */
@@ -1745,7 +1711,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
error -= syslog_partial;
}
raw_spin_unlock(&syslog_lock);
- logbuf_unlock_irq();
+ printk_safe_exit_irq();
break;
/* Size of the log buffer */
case SYSLOG_ACTION_SIZE_BUFFER:
@@ -2593,7 +2559,6 @@ void console_unlock(void)
size_t len;
printk_safe_enter_irqsave(flags);
- raw_spin_lock(&logbuf_lock);
skip:
if (!prb_read_valid(prb, console_seq, &r))
break;
@@ -2637,7 +2602,6 @@ void console_unlock(void)
console_msg_format & MSG_FORMAT_SYSLOG,
printk_time);
console_seq++;
- raw_spin_unlock(&logbuf_lock);
/*
* While actively printing out messages, if another printk()
@@ -2664,8 +2628,6 @@ void console_unlock(void)
console_locked = 0;
- raw_spin_unlock(&logbuf_lock);
-
up_console_sem();
/*
@@ -2674,9 +2636,7 @@ void console_unlock(void)
* there's a new owner and the console_unlock() from them will do the
* flush, no worries.
*/
- raw_spin_lock(&logbuf_lock);
retry = prb_read_valid(prb, console_seq, NULL);
- raw_spin_unlock(&logbuf_lock);
printk_safe_exit_irqrestore(flags);
if (retry && console_trylock())
@@ -2743,9 +2703,9 @@ void console_flush_on_panic(enum con_flush_mode mode)
if (mode == CONSOLE_REPLAY_ALL) {
unsigned long flags;
- logbuf_lock_irqsave(flags);
+ printk_safe_enter_irqsave(flags);
console_seq = prb_first_valid_seq(prb);
- logbuf_unlock_irqrestore(flags);
+ printk_safe_exit_irqrestore(flags);
}
console_unlock();
}
@@ -2973,9 +2933,7 @@ void register_console(struct console *newcon)
/*
* console_unlock(); will print out the buffered messages
* for us.
- */
- logbuf_lock_irqsave(flags);
- /*
+ *
* We're about to replay the log buffer. Only do this to the
* just-registered console to avoid excessive message spam to
* the already-registered consoles.
@@ -2988,11 +2946,9 @@ void register_console(struct console *newcon)
exclusive_console_stop_seq = console_seq;
/* Get a consistent copy of @syslog_seq. */
- raw_spin_lock(&syslog_lock);
+ raw_spin_lock_irqsave(&syslog_lock, flags);
console_seq = syslog_seq;
- raw_spin_unlock(&syslog_lock);
-
- logbuf_unlock_irqrestore(flags);
+ raw_spin_unlock_irqrestore(&syslog_lock, flags);
}
console_unlock();
console_sysfs_notify();
@@ -3359,7 +3315,6 @@ void kmsg_dump(enum kmsg_dump_reason reason)
{
struct kmsg_dumper_iter iter;
struct kmsg_dumper *dumper;
- unsigned long flags;
rcu_read_lock();
list_for_each_entry_rcu(dumper, &dump_list, list) {
@@ -3378,10 +3333,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
/* initialize iterator with data about the stored records */
iter.active = true;
- logbuf_lock_irqsave(flags);
- iter.cur_seq = latched_seq_read_nolock(&clear_seq);
- iter.next_seq = prb_next_seq(prb);
- logbuf_unlock_irqrestore(flags);
+ kmsg_dump_rewind_nolock(&iter);
/* invoke dumper which will iterate over records */
dumper->dump(dumper, reason, &iter);
@@ -3414,9 +3366,11 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
struct printk_info info;
unsigned int line_count;
struct printk_record r;
+ unsigned long flags;
size_t l = 0;
bool ret = false;
+ printk_safe_enter_irqsave(flags);
prb_rec_init_rd(&r, &info, line, size);
if (!iter->active)
@@ -3440,6 +3394,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
iter->cur_seq = r.info->seq + 1;
ret = true;
out:
+ printk_safe_exit_irqrestore(flags);
if (len)
*len = l;
return ret;
@@ -3465,14 +3420,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
char *line, size_t size, size_t *len)
{
- unsigned long flags;
- bool ret;
-
- logbuf_lock_irqsave(flags);
- ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
- logbuf_unlock_irqrestore(flags);
-
- return ret;
+ return kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
}
EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
@@ -3510,7 +3458,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
if (!iter->active || !buf || !size)
goto out;
- logbuf_lock_irqsave(flags);
+ printk_safe_enter_irqsave(flags);
if (iter->cur_seq < prb_first_valid_seq(prb)) {
/* messages are gone, move to first available one */
iter->cur_seq = prb_first_valid_seq(prb);
@@ -3518,7 +3466,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
/* last entry */
if (iter->cur_seq >= iter->next_seq) {
- logbuf_unlock_irqrestore(flags);
+ printk_safe_exit_irqrestore(flags);
goto out;
}
@@ -3549,7 +3497,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
iter->next_seq = next_seq;
ret = true;
- logbuf_unlock_irqrestore(flags);
+ printk_safe_exit_irqrestore(flags);
out:
if (len_out)
*len_out = len;
@@ -3569,8 +3517,12 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
*/
void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
{
+ unsigned long flags;
+
+ printk_safe_enter_irqsave(flags);
iter->cur_seq = latched_seq_read_nolock(&clear_seq);
iter->next_seq = prb_next_seq(prb);
+ printk_safe_exit_irqrestore(flags);
}
/**
@@ -3583,11 +3535,7 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
*/
void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
{
- unsigned long flags;
-
- logbuf_lock_irqsave(flags);
kmsg_dump_rewind_nolock(iter);
- logbuf_unlock_irqrestore(flags);
}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index a0e6f746de6c..a9a3137bd972 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -16,7 +16,7 @@
#include "internal.h"
/*
- * printk() could not take logbuf_lock in NMI context. Instead,
+ * In NMI and safe mode, printk() avoids taking locks. Instead,
* it uses an alternative implementation that temporary stores
* the strings into a per-CPU buffer. The content of the buffer
* is later flushed into the main ring buffer via IRQ work.
@@ -266,18 +266,6 @@ void printk_safe_flush(void)
*/
void printk_safe_flush_on_panic(void)
{
- /*
- * Make sure that we could access the main ring buffer.
- * Do not risk a double release when more CPUs are up.
- */
- if (raw_spin_is_locked(&logbuf_lock)) {
- if (num_online_cpus() > 1)
- return;
-
- debug_locks_off();
- raw_spin_lock_init(&logbuf_lock);
- }
-
printk_safe_flush();
}
@@ -311,9 +299,7 @@ void noinstr printk_nmi_exit(void)
* reordering.
*
* It has effect only when called in NMI context. Then printk()
- * will try to store the messages into the main logbuf directly
- * and use the per-CPU buffers only as a fallback when the lock
- * is not available.
+ * will store the messages into the main logbuf directly.
*/
void printk_nmi_direct_enter(void)
{
@@ -368,20 +354,21 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
#endif
/*
- * Try to use the main logbuf even in NMI. But avoid calling console
+ * Use the main logbuf even in NMI. But avoid calling console
* drivers that might have their own locks.
*/
- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
- raw_spin_trylock(&logbuf_lock)) {
+ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) {
+ unsigned long flags;
int len;
+ printk_safe_enter_irqsave(flags);
len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
- raw_spin_unlock(&logbuf_lock);
+ printk_safe_exit_irqrestore(flags);
defer_console_output();
return len;
}
- /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
+ /* Use extra buffer in NMI. */
if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
return vprintk_nmi(fmt, args);
--
2.20.1
kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is
no need for _nolock() variants. Remove these functions and switch all
callers of the _nolock() variants.
The functions without _nolock() were chosen because they are already
exported to kernel modules.
Signed-off-by: John Ogness <[email protected]>
---
arch/powerpc/xmon/xmon.c | 4 +--
include/linux/kmsg_dump.h | 18 +------------
kernel/debug/kdb/kdb_main.c | 8 +++---
kernel/printk/printk.c | 52 +++++--------------------------------
4 files changed, 13 insertions(+), 69 deletions(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 43162b885259..4cac114ba32d 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3015,9 +3015,9 @@ dump_log_buf(void)
catch_memory_errors = 1;
sync();
- kmsg_dump_rewind_nolock(&iter);
+ kmsg_dump_rewind(&iter);
xmon_start_pagination();
- while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) {
+ while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) {
buf[len] = '\0';
printf("%s", buf);
}
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index ecc98f549d93..8a39d898a1a5 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -60,18 +60,13 @@ struct kmsg_dumper {
#ifdef CONFIG_PRINTK
void kmsg_dump(enum kmsg_dump_reason reason);
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
- char *line, size_t size, size_t *len);
-
bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
char *line, size_t size, size_t *len);
bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
char *buf, size_t size, size_t *len_out);
-void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter);
-
-void kmsg_dump_rewind(struct kmsg_dumper_iter *dumper_iter);
+void kmsg_dump_rewind(struct kmsg_dumper_iter *iter);
int kmsg_dump_register(struct kmsg_dumper *dumper);
@@ -83,13 +78,6 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
{
}
-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter,
- bool syslog, const char *line,
- size_t size, size_t *len)
-{
- return false;
-}
-
static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
const char *line, size_t size, size_t *len)
{
@@ -102,10 +90,6 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool sysl
return false;
}
-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
-{
-}
-
static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
{
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7ae9da245e4b..dbf1d126ac5e 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const char **argv)
kdb_set(2, setargs);
}
- kmsg_dump_rewind_nolock(&iter);
- while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL))
+ kmsg_dump_rewind(&iter);
+ while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL))
n++;
if (lines < 0) {
@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const char **argv)
if (skip >= n || skip < 0)
return 0;
- kmsg_dump_rewind_nolock(&iter);
- while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) {
+ kmsg_dump_rewind(&iter);
+ while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) {
if (skip) {
skip--;
continue;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b57dba7f077d..a2d4ba00bf77 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3333,7 +3333,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
/* initialize iterator with data about the stored records */
iter.active = true;
- kmsg_dump_rewind_nolock(&iter);
+ kmsg_dump_rewind(&iter);
/* invoke dumper which will iterate over records */
dumper->dump(dumper, reason, &iter);
@@ -3342,7 +3342,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
}
/**
- * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
+ * kmsg_dump_get_line - retrieve one kmsg log line
* @iter: kmsg dumper iterator
* @syslog: include the "<4>" prefixes
* @line: buffer to copy the line to
@@ -3357,11 +3357,9 @@ void kmsg_dump(enum kmsg_dump_reason reason)
*
* A return value of FALSE indicates that there are no more records to
* read.
- *
- * The function is similar to kmsg_dump_get_line(), but grabs no locks.
*/
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
- char *line, size_t size, size_t *len)
+bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
+ char *line, size_t size, size_t *len)
{
struct printk_info info;
unsigned int line_count;
@@ -3399,29 +3397,6 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
*len = l;
return ret;
}
-
-/**
- * kmsg_dump_get_line - retrieve one kmsg log line
- * @iter: kmsg dumper iterator
- * @syslog: include the "<4>" prefixes
- * @line: buffer to copy the line to
- * @size: maximum size of the buffer
- * @len: length of line placed into buffer
- *
- * Start at the beginning of the kmsg buffer, with the oldest kmsg
- * record, and copy one record into the provided buffer.
- *
- * Consecutive calls will return the next available record moving
- * towards the end of the buffer with the youngest messages.
- *
- * A return value of FALSE indicates that there are no more records to
- * read.
- */
-bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
- char *line, size_t size, size_t *len)
-{
- return kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
-}
EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
/**
@@ -3506,16 +3481,14 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
/**
- * kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
+ * kmsg_dump_rewind - reset the iterator
* @iter: kmsg dumper iterator
*
* Reset the dumper's iterator so that kmsg_dump_get_line() and
* kmsg_dump_get_buffer() can be called again and used multiple
* times within the same dumper.dump() callback.
- *
- * The function is similar to kmsg_dump_rewind(), but grabs no locks.
*/
-void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
+void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
{
unsigned long flags;
@@ -3524,19 +3497,6 @@ void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
iter->next_seq = prb_next_seq(prb);
printk_safe_exit_irqrestore(flags);
}
-
-/**
- * kmsg_dump_rewind - reset the iterator
- * @iter: kmsg dumper iterator
- *
- * Reset the dumper's iterator so that kmsg_dump_get_line() and
- * kmsg_dump_get_buffer() can be called again and used multiple
- * times within the same dumper.dump() callback.
- */
-void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
-{
- kmsg_dump_rewind_nolock(iter);
-}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
#endif
--
2.20.1
The logic for finding records to fit into a buffer is the same for
kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper
function find_first_fitting_seq() to handle this logic.
Signed-off-by: John Ogness <[email protected]>
---
kernel/printk/printk.c | 71 ++++++++++++++++++++++++------------------
1 file changed, 41 insertions(+), 30 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1929aa372e7f..ec2174882b8e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1421,6 +1421,41 @@ static size_t get_record_print_text_size(struct printk_info *info,
return ((prefix_len * line_count) + info->text_len + 1);
}
+/*
+ * Beginning with @start_seq, find the first record where it and all following
+ * records up to (but not including) @max_seq fit into @size.
+ */
+static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
+ struct printk_info *info, bool syslog, bool time)
+{
+ unsigned int line_count;
+ size_t len = 0;
+ u64 seq;
+
+ /* Determine the size of the records up to @max_seq. */
+ prb_for_each_info(start_seq, prb, seq, info, &line_count) {
+ if (info->seq >= max_seq)
+ break;
+ len += get_record_print_text_size(info, line_count, syslog, time);
+ }
+
+ /*
+ * Move first record forward until length fits into the buffer. Ignore
+ * newest messages that were not counted in the above cycle. Messages
+ * might appear and get lost in the meantime. This is a best effort
+ * that prevents an infinite loop that could occur with a retry.
+ */
+ if (seq < max_seq)
+ max_seq = seq;
+ prb_for_each_info(start_seq, prb, seq, info, &line_count) {
+ if (len <= size || info->seq >= max_seq)
+ break;
+ len -= get_record_print_text_size(info, line_count, syslog, time);
+ }
+
+ return seq;
+}
+
static int syslog_print(char __user *buf, int size)
{
struct printk_info info;
@@ -1492,7 +1527,6 @@ static int syslog_print(char __user *buf, int size)
static int syslog_print_all(char __user *buf, int size, bool clear)
{
struct printk_info info;
- unsigned int line_count;
struct printk_record r;
char *text;
int len = 0;
@@ -1509,15 +1543,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
- len += get_record_print_text_size(&info, line_count, true, time);
-
- /* move first record forward until length fits into the buffer */
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
- if (len <= size)
- break;
- len -= get_record_print_text_size(&info, line_count, true, time);
- }
+ seq = find_first_fitting_seq(clear_seq, -1, size, &info, true, time);
prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
@@ -3410,7 +3436,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
char *buf, size_t size, size_t *len_out)
{
struct printk_info info;
- unsigned int line_count;
struct printk_record r;
unsigned long flags;
u64 seq;
@@ -3436,26 +3461,12 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
/*
* Find first record that fits, including all following records,
- * into the user-provided buffer for this dump.
+ * into the user-provided buffer for this dump. Pass in size-1
+ * because this function (by way of record_print_text()) will
+ * not write more than size-1 bytes of text into @buf.
*/
-
- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
- if (info.seq >= dumper->next_seq)
- break;
- len += get_record_print_text_size(&info, line_count, syslog, time);
- }
-
- /*
- * Move first record forward until length fits into the buffer. Ignore
- * newest messages that were not counted in the above cycle. Messages
- * might appear and get lost in the meantime. This is the best effort
- * that prevents an infinite loop.
- */
- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
- if (len < size || info.seq >= dumper->next_seq)
- break;
- len -= get_record_print_text_size(&info, line_count, syslog, time);
- }
+ seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq,
+ size - 1, &info, syslog, time);
/* Keep track of the last message for the next iteration. */
next_seq = seq;
--
2.20.1
In preparation for removing printk's @logbuf_lock, dumpers that have
assumed to be protected against parallel calls must provide their own
synchronization. Add a locally static spinlock to synchronize the
kmsg_dump call.
Signed-off-by: John Ogness <[email protected]>
---
drivers/hv/vmbus_drv.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index fbeddef90941..08db95e1a139 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1362,23 +1362,27 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason,
struct kmsg_dumper_iter *iter)
{
+ static DEFINE_SPINLOCK(lock);
size_t bytes_written;
phys_addr_t panic_pa;
+ unsigned long flags;
/* We are only interested in panics. */
if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
return;
+ if (!spin_trylock_irqsave(&lock, flags))
+ return;
+
panic_pa = virt_to_phys(hv_panic_page);
- /*
- * Write dump contents to the page. No need to synchronize; panic should
- * be single-threaded.
- */
+ /* Write dump contents to the page. */
kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
&bytes_written);
if (bytes_written)
hyperv_report_panic_msg(panic_pa, bytes_written);
+
+ spin_unlock_irqrestore(&lock, flags);
}
static struct kmsg_dumper hv_kmsg_dumper = {
--
2.20.1
In preparation for removing printk's @logbuf_lock, dumpers that have
assumed to be protected against parallel calls must provide their own
synchronization. Add a locally static spinlock to synchronize the
kmsg_dump call and temporary buffer usage.
Signed-off-by: John Ogness <[email protected]>
---
arch/um/kernel/kmsg_dump.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
index f38349ad00ea..173999422ed8 100644
--- a/arch/um/kernel/kmsg_dump.c
+++ b/arch/um/kernel/kmsg_dump.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kmsg_dump.h>
+#include <linux/spinlock.h>
#include <linux/console.h>
#include <shared/init.h>
#include <shared/kern.h>
@@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason,
struct kmsg_dumper_iter *iter)
{
+ static DEFINE_SPINLOCK(lock);
static char line[1024];
struct console *con;
+ unsigned long flags;
size_t len = 0;
/* only dump kmsg when no console is available */
@@ -25,11 +28,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
if (con)
return;
+ if (!spin_trylock_irqsave(&lock, flags))
+ return;
+
printf("kmsg_dump:\n");
while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
line[len] = '\0';
printf("%s", line);
}
+
+ spin_unlock_irqrestore(&lock, flags);
}
static struct kmsg_dumper kmsg_dumper = {
--
2.20.1
From: John Ogness <[email protected]> Sent: Tuesday, January 26, 2021 1:16 PM
>
> In preparation for removing printk's @logbuf_lock, dumpers that have
> assumed to be protected against parallel calls must provide their own
> synchronization. Add a locally static spinlock to synchronize the
> kmsg_dump call.
>
> Signed-off-by: John Ogness <[email protected]>
> ---
> drivers/hv/vmbus_drv.c | 12 ++++++++----
> 1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index fbeddef90941..08db95e1a139 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -1362,23 +1362,27 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
> enum kmsg_dump_reason reason,
> struct kmsg_dumper_iter *iter)
> {
> + static DEFINE_SPINLOCK(lock);
> size_t bytes_written;
> phys_addr_t panic_pa;
> + unsigned long flags;
>
> /* We are only interested in panics. */
> if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
> return;
>
> + if (!spin_trylock_irqsave(&lock, flags))
> + return;
> +
> panic_pa = virt_to_phys(hv_panic_page);
>
> - /*
> - * Write dump contents to the page. No need to synchronize; panic should
> - * be single-threaded.
> - */
> + /* Write dump contents to the page. */
> kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
> &bytes_written);
> if (bytes_written)
> hyperv_report_panic_msg(panic_pa, bytes_written);
> +
> + spin_unlock_irqrestore(&lock, flags);
> }
>
> static struct kmsg_dumper hv_kmsg_dumper = {
> --
> 2.20.1
I don't think this patch is needed. Per some of the unmodified code
above, and the comment, we proceed to the call to kmsg_dump_get_buffer()
only in the panic path. Code in the panic path prior to invoking kmsg_dump()
ensures that we are single-threaded on a single CPU. So I think everything
is good here without the new spin lock. The subsequent call to
hyperv_report_panic_msg() also assumes that we are single-threaded.
Michael
On 2021-01-29, Petr Mladek <[email protected]> wrote:
>> The logic for finding records to fit into a buffer is the same for
>> kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper
>> function find_first_fitting_seq() to handle this logic.
>>
>> Signed-off-by: John Ogness <[email protected]>
>> ---
>> kernel/printk/printk.c | 71 ++++++++++++++++++++++++------------------
>> 1 file changed, 41 insertions(+), 30 deletions(-)
>>
>> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
>> index 1929aa372e7f..ec2174882b8e 100644
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -1421,6 +1421,41 @@ static size_t get_record_print_text_size(struct printk_info *info,
>> return ((prefix_len * line_count) + info->text_len + 1);
>> }
>>
>> +/*
>> + * Beginning with @start_seq, find the first record where it and all following
>> + * records up to (but not including) @max_seq fit into @size.
>
> Please, mention that all existing messages are checked when @max_seq
> == -1 is used.
-1 is not special for that purpose, but I will add a comment that if
there is no required upper bound, the caller can use -1.
>> + */
>> +static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
>> + struct printk_info *info, bool syslog, bool time)
>> +{
>> + unsigned int line_count;
>> + size_t len = 0;
>> + u64 seq;
>
> Please, use local struct printk_info. It will be clear that it does
> not have any side-effects for the callers.
OK. The compiler should be able to optimize the stack usage.
>> +
>> + /* Determine the size of the records up to @max_seq. */
>> + prb_for_each_info(start_seq, prb, seq, info, &line_count) {
>> + if (info->seq >= max_seq)
>> + break;
>> + len += get_record_print_text_size(info, line_count, syslog, time);
>> + }
>> +
>> + /*
>> + * Move first record forward until length fits into the buffer. Ignore
>> + * newest messages that were not counted in the above cycle. Messages
>> + * might appear and get lost in the meantime. This is a best effort
>> + * that prevents an infinite loop that could occur with a retry.
>> + */
>> + if (seq < max_seq)
>> + max_seq = seq;
>
> This made my head twist around several times ;-)
>
> It should never be true in kmsg_dump_get_buffer().
Correct. It is there because of syslog_print_all().
> And there was nothing like this in the original syslog_print_all().
Yes, there was. But it was implemented subtly.
This upcoming second loop is subtracting string lengths from _messages
that were read in the first loop_. It would be a bug to subtract lengths
from messages that were not counted in the first loop. The comment even
mentions this:
"Ignore newest messages that were not counted in the above cycle."
With logbuf_lock, it is not possible that new messages arrive in between
these two loops. But without logbuf_lock, it _is_ possible and this
needs to be handled.
I can expand the commit message to mention this necessary change.
> But I still think that the original code had better results.
> Lost messages are typically replaced with similar amount
> of new messages. syslog_print_all() should return
> the newest messages. And the original code better
> fulfilled this requirement.
If you remove logbuf_lock, the original code is broken.
>> + prb_for_each_info(start_seq, prb, seq, info, &line_count) {
>> + if (len <= size || info->seq >= max_seq)
>> + break;
>> + len -= get_record_print_text_size(info, line_count, syslog, time);
>> + }
>> +
>> + return seq;
>> +}
>> +
>> static int syslog_print(char __user *buf, int size)
>> {
>> struct printk_info info;
>> @@ -1492,7 +1527,6 @@ static int syslog_print(char __user *buf, int size)
>> static int syslog_print_all(char __user *buf, int size, bool clear)
>> {
>> struct printk_info info;
>> - unsigned int line_count;
>> struct printk_record r;
>> char *text;
>> int len = 0;
>> @@ -1509,15 +1543,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
>> * Find first record that fits, including all following records,
>> * into the user-provided buffer for this dump.
>> */
>> - prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
>> - len += get_record_print_text_size(&info, line_count, true, time);
>> -
>> - /* move first record forward until length fits into the buffer */
>> - prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
>> - if (len <= size)
>> - break;
>> - len -= get_record_print_text_size(&info, line_count, true, time);
>> - }
>> + seq = find_first_fitting_seq(clear_seq, -1, size, &info, true, time);
>>
>> prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
>>
>> @@ -3410,7 +3436,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
>> char *buf, size_t size, size_t *len_out)
>> {
>> struct printk_info info;
>> - unsigned int line_count;
>> struct printk_record r;
>> unsigned long flags;
>> u64 seq;
>> @@ -3436,26 +3461,12 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
>>
>> /*
>> * Find first record that fits, including all following records,
>> - * into the user-provided buffer for this dump.
>> + * into the user-provided buffer for this dump. Pass in size-1
>> + * because this function (by way of record_print_text()) will
>> + * not write more than size-1 bytes of text into @buf.
>
> We should do the same also in syslog_print_all(). It must have the
> same problem. The last message might get lost when there is not
> a space for the trailing '\0' that was not counted before.
No, it does not have the same problem. I also made the mistake [0] of
thinking that.
copy_to_user() is the function filling the buffer, not
record_print_text(). And it will (and always has) fill the full
buffer. Only kmsg_dump_get_buffer() has the bizarre semantics of not
using the full buffer.
Note that syslog_print_all() uses CONSOLE_LOG_MAX for
record_print_text() and @size for copy_to_user().
The bug here is that CONSOLE_LOG_MAX is too small. But we've had this
bug for a long time.
> We should fix syslog_print_all() in a separate preparation patch
> before we introduce the common function.
>
> And it might be better to actually change the condition in
> find_first_fitting_seq(). I mean to replace:
>
> if (len <= size || info.seq >= max_seq)
> with
> if (len < size || info.seq >= max_seq)
I would prefer not twisting syslog_print_all() to act like
kmsg_dump_get_buffer().
John Ogness
[0] https://lkml.kernel.org/r/[email protected]
On Tue 2021-01-26 22:21:48, John Ogness wrote:
> In preparation for removing printk's @logbuf_lock, dumpers that have
> assumed to be protected against parallel calls must provide their own
> synchronization. Add a locally static spinlock to synchronize the
> kmsg_dump call and temporary buffer usage.
>
> Signed-off-by: John Ogness <[email protected]>
Reviewed-by: Petr Mladek <[email protected]>
Best Regards,
Petr
On 2021-01-27, Michael Kelley <[email protected]> wrote:
>> In preparation for removing printk's @logbuf_lock, dumpers that have
>> assumed to be protected against parallel calls must provide their own
>> synchronization. Add a locally static spinlock to synchronize the
>> kmsg_dump call.
>>
>> Signed-off-by: John Ogness <[email protected]>
>> ---
>> drivers/hv/vmbus_drv.c | 12 ++++++++----
>> 1 file changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
>> index fbeddef90941..08db95e1a139 100644
>> --- a/drivers/hv/vmbus_drv.c
>> +++ b/drivers/hv/vmbus_drv.c
>> @@ -1362,23 +1362,27 @@ static void hv_kmsg_dump(struct kmsg_dumper *dumper,
>> enum kmsg_dump_reason reason,
>> struct kmsg_dumper_iter *iter)
>> {
>> + static DEFINE_SPINLOCK(lock);
>> size_t bytes_written;
>> phys_addr_t panic_pa;
>> + unsigned long flags;
>>
>> /* We are only interested in panics. */
>> if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
>> return;
>>
>> + if (!spin_trylock_irqsave(&lock, flags))
>> + return;
>> +
>> panic_pa = virt_to_phys(hv_panic_page);
>>
>> - /*
>> - * Write dump contents to the page. No need to synchronize; panic should
>> - * be single-threaded.
>> - */
>> + /* Write dump contents to the page. */
>> kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
>> &bytes_written);
>> if (bytes_written)
>> hyperv_report_panic_msg(panic_pa, bytes_written);
>> +
>> + spin_unlock_irqrestore(&lock, flags);
>> }
>>
>> static struct kmsg_dumper hv_kmsg_dumper = {
>> --
>> 2.20.1
>
> I don't think this patch is needed. Per some of the unmodified code
> above, and the comment, we proceed to the call to kmsg_dump_get_buffer()
> only in the panic path. Code in the panic path prior to invoking kmsg_dump()
> ensures that we are single-threaded on a single CPU. So I think everything
> is good here without the new spin lock. The subsequent call to
> hyperv_report_panic_msg() also assumes that we are single-threaded.
You are correct. I will withdraw this patch from the series. Thanks for
the feedback.
John Ogness
On Tue 2021-01-26 22:21:46, John Ogness wrote:
> The global variables @syslog_seq, @syslog_partial, @syslog_time
> and write access to @clear_seq are protected by @logbuf_lock.
> Once @logbuf_lock is removed, these variables will need their
> own synchronization method. Introduce @syslog_lock for this
> purpose.
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
> printk_safe_exit_irqrestore(flags); \
> } while (0)
>
> +/* syslog_lock protects syslog_* variables and write access to clear_seq. */
> +static DEFINE_RAW_SPINLOCK(syslog_lock);
I am not expert on RT code but I think that it prefers the generic
spinlocks. syslog_lock seems to be used in a normal context.
IMHO, it does not need to be a raw spinlock.
Note that using normal spinlock would require switching the locking
order. logbuf_lock is a raw lock. Normal spinlock must not be taken
under a raw spinlock.
Or we could switch syslog_lock to the normal spinlock later
after logbuf_lock is removed.
> +
> #ifdef CONFIG_PRINTK
> DECLARE_WAIT_QUEUE_HEAD(log_wait);
> +/* All 3 protected by @syslog_lock. */
> /* the next printk record to read by syslog(READ) or /proc/kmsg */
> static u64 syslog_seq;
> static size_t syslog_partial;
> @@ -1631,6 +1643,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
> bool clear = false;
> static int saved_console_loglevel = LOGLEVEL_DEFAULT;
> int error;
> + u64 seq;
This allows to remove definition of the same temporary variable
for case SYSLOG_ACTION_SIZE_UNREAD.
>
> error = check_syslog_permissions(type, source);
> if (error)
> @@ -1648,8 +1661,14 @@ int do_syslog(int type, char __user *buf, int len, int source)
> return 0;
> if (!access_ok(buf, len))
> return -EFAULT;
> +
> + /* Get a consistent copy of @syslog_seq. */
> + raw_spin_lock_irq(&syslog_lock);
> + seq = syslog_seq;
> + raw_spin_unlock_irq(&syslog_lock);
> +
> error = wait_event_interruptible(log_wait,
> - prb_read_valid(prb, syslog_seq, NULL));
> + prb_read_valid(prb, seq, NULL));
Hmm, this will not detect when syslog_seq gets cleared in parallel.
I hope that nobody rely on this behavior. But who knows?
A solution might be to have also syslog_seq latched. But I am
not sure if it is worth it.
I am for taking the risk and use the patch as it is now. Let's keep
the code for now. We could always use the latched variable when
anyone complains. Just keep it in mind.
> if (error)
> return error;
> error = syslog_print(buf, len);
Otherwise, the patch looks good to me.
Best Regards,
Petr
On 2021-02-01, Petr Mladek <[email protected]> wrote:
>> The global variables @syslog_seq, @syslog_partial, @syslog_time
>> and write access to @clear_seq are protected by @logbuf_lock.
>> Once @logbuf_lock is removed, these variables will need their
>> own synchronization method. Introduce @syslog_lock for this
>> purpose.
>
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
>> printk_safe_exit_irqrestore(flags); \
>> } while (0)
>>
>> +/* syslog_lock protects syslog_* variables and write access to clear_seq. */
>> +static DEFINE_RAW_SPINLOCK(syslog_lock);
>
> I am not expert on RT code but I think that it prefers the generic
> spinlocks. syslog_lock seems to be used in a normal context.
> IMHO, it does not need to be a raw spinlock.
>
> Note that using normal spinlock would require switching the locking
> order. logbuf_lock is a raw lock. Normal spinlock must not be taken
> under a raw spinlock.
>
> Or we could switch syslog_lock to the normal spinlock later
> after logbuf_lock is removed.
I was planning on this last option because I think it is the
simplest. There are places such as syslog_print_all() where the
printk_safe_enter() and logbuf_lock locking are not at the same place as
the syslog_lock locking (and syslog_lock is inside).
Once the safe buffers are removed, syslog_lock can transition to a
spinlock. (spinlock's must not be under local_irq_save().)
>> +
>> #ifdef CONFIG_PRINTK
>> DECLARE_WAIT_QUEUE_HEAD(log_wait);
>> +/* All 3 protected by @syslog_lock. */
>> /* the next printk record to read by syslog(READ) or /proc/kmsg */
>> static u64 syslog_seq;
>> static size_t syslog_partial;
>> @@ -1631,6 +1643,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
>> bool clear = false;
>> static int saved_console_loglevel = LOGLEVEL_DEFAULT;
>> int error;
>> + u64 seq;
>
> This allows to remove definition of the same temporary variable
> for case SYSLOG_ACTION_SIZE_UNREAD.
Right. I missed that.
>>
>> error = check_syslog_permissions(type, source);
>> if (error)
>> @@ -1648,8 +1661,14 @@ int do_syslog(int type, char __user *buf, int len, int source)
>> return 0;
>> if (!access_ok(buf, len))
>> return -EFAULT;
>> +
>> + /* Get a consistent copy of @syslog_seq. */
>> + raw_spin_lock_irq(&syslog_lock);
>> + seq = syslog_seq;
>> + raw_spin_unlock_irq(&syslog_lock);
>> +
>> error = wait_event_interruptible(log_wait,
>> - prb_read_valid(prb, syslog_seq, NULL));
>> + prb_read_valid(prb, seq, NULL));
>
> Hmm, this will not detect when syslog_seq gets cleared in parallel.
> I hope that nobody rely on this behavior. But who knows?
>
> A solution might be to have also syslog_seq latched. But I am
> not sure if it is worth it.
>
> I am for taking the risk and use the patch as it is now. Let's keep
> the code for now. We could always use the latched variable when
> anyone complains. Just keep it in mind.
We could add a simple helper:
/* Get a consistent copy of @syslog_seq. */
static u64 syslog_seq_read(void)
{
unsigned long flags;
raw_spin_lock_irqsave(&syslog_lock, flags);
seq = syslog_seq;
raw_spin_unlock_irqrestore(&syslog_lock, flags);
return seq;
}
Then change the code to:
error = wait_event_interruptible(log_wait,
prb_read_valid(prb, read_syslog_seq(), NULL));
register_console() could also make use of the function. (That is why I
am suggesting the flags variant.)
John Ogness
On Tue 2021-01-26 22:21:47, John Ogness wrote:
> Rather than store the iterator information into the registered
> kmsg_dump structure, create a separate iterator structure. The
> kmsg_dump_iter structure can reside on the stack of the caller,
> thus allowing lockless use of the kmsg_dump functions.
>
> This is in preparation for removal of @logbuf_lock.
> diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
> index 76cc4122d08e..ecc98f549d93 100644
> --- a/include/linux/kmsg_dump.h
> +++ b/include/linux/kmsg_dump.h
> @@ -29,6 +29,18 @@ enum kmsg_dump_reason {
> KMSG_DUMP_MAX
> };
>
> +/**
> + * struct kmsg_dumper_iter - iterator for kernel crash message dumper
> + * @active: Flag that specifies if this is currently dumping
> + * @cur_seq: The record to dump (private)
> + * @next_seq: The first record of the next block (private)
Just to be sure. This description should get update if you agree with
the alternative one in the 1st patch.
> + */
> +struct kmsg_dumper_iter {
> + bool active;
> + u64 cur_seq;
> + u64 next_seq;
> +};
> +
Otherwise, I like this change.
Best Regards,
Petr
On 2021-02-01, Petr Mladek <[email protected]> wrote:
>> Rather than store the iterator information into the registered
>> kmsg_dump structure, create a separate iterator structure. The
>> kmsg_dump_iter structure can reside on the stack of the caller,
>> thus allowing lockless use of the kmsg_dump functions.
>>
>> This is in preparation for removal of @logbuf_lock.
>
>> diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
>> index 76cc4122d08e..ecc98f549d93 100644
>> --- a/include/linux/kmsg_dump.h
>> +++ b/include/linux/kmsg_dump.h
>> @@ -29,6 +29,18 @@ enum kmsg_dump_reason {
>> KMSG_DUMP_MAX
>> };
>>
>> +/**
>> + * struct kmsg_dumper_iter - iterator for kernel crash message dumper
>> + * @active: Flag that specifies if this is currently dumping
>> + * @cur_seq: The record to dump (private)
>> + * @next_seq: The first record of the next block (private)
>
> Just to be sure. This description should get update if you agree with
> the alternative one in the 1st patch.
Yes, I assumed so and adjusted my preparation-v2 series accordingly.
John
On Tue 2021-01-26 22:21:48, John Ogness wrote:
> In preparation for removing printk's @logbuf_lock, dumpers that have
> assumed to be protected against parallel calls must provide their own
> synchronization. Add a locally static spinlock to synchronize the
> kmsg_dump call and temporary buffer usage.
>
> Signed-off-by: John Ogness <[email protected]>
> ---
> arch/um/kernel/kmsg_dump.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
> index f38349ad00ea..173999422ed8 100644
> --- a/arch/um/kernel/kmsg_dump.c
> +++ b/arch/um/kernel/kmsg_dump.c
> @@ -1,5 +1,6 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <linux/kmsg_dump.h>
> +#include <linux/spinlock.h>
> #include <linux/console.h>
> #include <shared/init.h>
> #include <shared/kern.h>
> @@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
> enum kmsg_dump_reason reason,
> struct kmsg_dumper_iter *iter)
> {
> + static DEFINE_SPINLOCK(lock);
> static char line[1024];
> struct console *con;
> + unsigned long flags;
> size_t len = 0;
>
> /* only dump kmsg when no console is available */
> @@ -25,11 +28,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
> if (con)
> return;
>
> + if (!spin_trylock_irqsave(&lock, flags))
> + return;
> +
> printf("kmsg_dump:\n");
> while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
> line[len] = '\0';
> printf("%s", line);
> }
> +
> + spin_unlock_irqrestore(&lock, flags);
What exactly is synchronized here, please?
Access to @line buffer or @iter or both?
It looks to me that the access to @line buffer was not synchronized
before. kmsg_dump_get_line() used a lock internally but
it was not synchronized with the later printf("%s", line);
The only remaining reason might be the access to @iter. But it is on stack
in kmsg_dump(). Nobody else could have access to it.
IMHO, this patch is not needed.
Do I miss something, please?
Best Regards,
Petr
(Added CC: Thomas Meyer, Richard Weinberger)
On 2021-02-01, Petr Mladek <[email protected]> wrote:
>> In preparation for removing printk's @logbuf_lock, dumpers that have
>> assumed to be protected against parallel calls must provide their own
>> synchronization. Add a locally static spinlock to synchronize the
>> kmsg_dump call and temporary buffer usage.
>>
>> Signed-off-by: John Ogness <[email protected]>
>> ---
>> arch/um/kernel/kmsg_dump.c | 8 ++++++++
>> 1 file changed, 8 insertions(+)
>>
>> diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
>> index f38349ad00ea..173999422ed8 100644
>> --- a/arch/um/kernel/kmsg_dump.c
>> +++ b/arch/um/kernel/kmsg_dump.c
>> @@ -1,5 +1,6 @@
>> // SPDX-License-Identifier: GPL-2.0
>> #include <linux/kmsg_dump.h>
>> +#include <linux/spinlock.h>
>> #include <linux/console.h>
>> #include <shared/init.h>
>> #include <shared/kern.h>
>> @@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
>> enum kmsg_dump_reason reason,
>> struct kmsg_dumper_iter *iter)
>> {
>> + static DEFINE_SPINLOCK(lock);
>> static char line[1024];
>> struct console *con;
>> + unsigned long flags;
>> size_t len = 0;
>>
>> /* only dump kmsg when no console is available */
>> @@ -25,11 +28,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
>> if (con)
>> return;
>>
>> + if (!spin_trylock_irqsave(&lock, flags))
>> + return;
>> +
>> printf("kmsg_dump:\n");
>> while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
>> line[len] = '\0';
>> printf("%s", line);
>> }
>> +
>> + spin_unlock_irqrestore(&lock, flags);
>
> What exactly is synchronized here, please?
> Access to @line buffer or @iter or both?
@line is being synchronized. @iter does not require synchronization.
> It looks to me that the access to @line buffer was not synchronized
> before. kmsg_dump_get_line() used a lock internally but
> it was not synchronized with the later printf("%s", line);
The line was previously synchronized for the kmsg_dump_get_line()
call. But yes, it was not synchronized after the call, which is a bug if
the dump is triggered on multiple CPUs simultaneously. The commit
message should also mention that it is handling that bug.
> IMHO, this patch is not needed.
I am not familiar enough with ARCH=um to know if dumps can be triggered
on multiple CPUs simultaneously. Perhaps ThomasM or Richard can chime in
here.
John Ogness
John,
----- Ursprüngliche Mail -----
> Von: "John Ogness" <[email protected]>
> An: "Petr Mladek" <[email protected]>
> CC: "Sergey Senozhatsky" <[email protected]>, "Sergey Senozhatsky" <[email protected]>,
> "Steven Rostedt" <[email protected]>, "tglx" <[email protected]>, "linux-kernel" <[email protected]>,
> "Thomas Meyer" <[email protected]>, "richard" <[email protected]>
> Gesendet: Montag, 1. Februar 2021 17:51:14
> Betreff: Re: [PATCH printk-rework 09/12] um: synchronize kmsg_dumper
> (Added CC: Thomas Meyer, Richard Weinberger)
>
> On 2021-02-01, Petr Mladek <[email protected]> wrote:
>>> In preparation for removing printk's @logbuf_lock, dumpers that have
>>> assumed to be protected against parallel calls must provide their own
>>> synchronization. Add a locally static spinlock to synchronize the
>>> kmsg_dump call and temporary buffer usage.
>>>
>>> Signed-off-by: John Ogness <[email protected]>
>>> ---
>>> arch/um/kernel/kmsg_dump.c | 8 ++++++++
>>> 1 file changed, 8 insertions(+)
>>>
>>> diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
>>> index f38349ad00ea..173999422ed8 100644
>>> --- a/arch/um/kernel/kmsg_dump.c
>>> +++ b/arch/um/kernel/kmsg_dump.c
>>> @@ -1,5 +1,6 @@
>>> // SPDX-License-Identifier: GPL-2.0
>>> #include <linux/kmsg_dump.h>
>>> +#include <linux/spinlock.h>
>>> #include <linux/console.h>
>>> #include <shared/init.h>
>>> #include <shared/kern.h>
>>> @@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
>>> enum kmsg_dump_reason reason,
>>> struct kmsg_dumper_iter *iter)
>>> {
>>> + static DEFINE_SPINLOCK(lock);
>>> static char line[1024];
>>> struct console *con;
>>> + unsigned long flags;
>>> size_t len = 0;
>>>
>>> /* only dump kmsg when no console is available */
>>> @@ -25,11 +28,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
>>> if (con)
>>> return;
>>>
>>> + if (!spin_trylock_irqsave(&lock, flags))
>>> + return;
>>> +
>>> printf("kmsg_dump:\n");
>>> while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
>>> line[len] = '\0';
>>> printf("%s", line);
>>> }
>>> +
>>> + spin_unlock_irqrestore(&lock, flags);
>>
>> What exactly is synchronized here, please?
>> Access to @line buffer or @iter or both?
>
> @line is being synchronized. @iter does not require synchronization.
>
>> It looks to me that the access to @line buffer was not synchronized
>> before. kmsg_dump_get_line() used a lock internally but
>> it was not synchronized with the later printf("%s", line);
>
> The line was previously synchronized for the kmsg_dump_get_line()
> call. But yes, it was not synchronized after the call, which is a bug if
> the dump is triggered on multiple CPUs simultaneously. The commit
> message should also mention that it is handling that bug.
>
>> IMHO, this patch is not needed.
>
> I am not familiar enough with ARCH=um to know if dumps can be triggered
> on multiple CPUs simultaneously. Perhaps ThomasM or Richard can chime in
> here.
Well, uml has no SMP support, so no parallel dumps. :-)
Thanks,
//richard
Hi Richard,
On 2021-02-01, Richard Weinberger <[email protected]> wrote:
>>>> In preparation for removing printk's @logbuf_lock, dumpers that have
>>>> assumed to be protected against parallel calls must provide their own
>>>> synchronization. Add a locally static spinlock to synchronize the
>>>> kmsg_dump call and temporary buffer usage.
>>>>
>>>> Signed-off-by: John Ogness <[email protected]>
>>>> ---
>>>> arch/um/kernel/kmsg_dump.c | 8 ++++++++
>>>> 1 file changed, 8 insertions(+)
>>>>
>>>> diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
>>>> index f38349ad00ea..173999422ed8 100644
>>>> --- a/arch/um/kernel/kmsg_dump.c
>>>> +++ b/arch/um/kernel/kmsg_dump.c
>>>> @@ -1,5 +1,6 @@
>>>> // SPDX-License-Identifier: GPL-2.0
>>>> #include <linux/kmsg_dump.h>
>>>> +#include <linux/spinlock.h>
>>>> #include <linux/console.h>
>>>> #include <shared/init.h>
>>>> #include <shared/kern.h>
>>>> @@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
>>>> enum kmsg_dump_reason reason,
>>>> struct kmsg_dumper_iter *iter)
>>>> {
>>>> + static DEFINE_SPINLOCK(lock);
>>>> static char line[1024];
>>>> struct console *con;
>>>> + unsigned long flags;
>>>> size_t len = 0;
>>>>
>>>> /* only dump kmsg when no console is available */
>>>> @@ -25,11 +28,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
>>>> if (con)
>>>> return;
>>>>
>>>> + if (!spin_trylock_irqsave(&lock, flags))
>>>> + return;
>>>> +
>>>> printf("kmsg_dump:\n");
>>>> while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
>>>> line[len] = '\0';
>>>> printf("%s", line);
>>>> }
>>>> +
>>>> + spin_unlock_irqrestore(&lock, flags);
>>>
>>> What exactly is synchronized here, please?
>>> Access to @line buffer or @iter or both?
>>
>> @line is being synchronized. @iter does not require synchronization.
>>
>>> It looks to me that the access to @line buffer was not synchronized
>>> before. kmsg_dump_get_line() used a lock internally but
>>> it was not synchronized with the later printf("%s", line);
>>
>> The line was previously synchronized for the kmsg_dump_get_line()
>> call. But yes, it was not synchronized after the call, which is a bug if
>> the dump is triggered on multiple CPUs simultaneously. The commit
>> message should also mention that it is handling that bug.
>>
>>> IMHO, this patch is not needed.
>>
>> I am not familiar enough with ARCH=um to know if dumps can be triggered
>> on multiple CPUs simultaneously. Perhaps ThomasM or Richard can chime in
>> here.
>
> Well, uml has no SMP support, so no parallel dumps. :-)
When I grep through arch/um, I see many uses of spinlocks. This would
imply that uml at least has some sort of preemption model where they are
needed. Dumps can trigger from any context and from multiple paths.
If you are sure that this is no concern, then I will drop this patch
from my series.
John Ogness
John,
----- Ursprüngliche Mail -----
>>> The line was previously synchronized for the kmsg_dump_get_line()
>>> call. But yes, it was not synchronized after the call, which is a bug if
>>> the dump is triggered on multiple CPUs simultaneously. The commit
>>> message should also mention that it is handling that bug.
>>>
>>>> IMHO, this patch is not needed.
>>>
>>> I am not familiar enough with ARCH=um to know if dumps can be triggered
>>> on multiple CPUs simultaneously. Perhaps ThomasM or Richard can chime in
>>> here.
>>
>> Well, uml has no SMP support, so no parallel dumps. :-)
>
> When I grep through arch/um, I see many uses of spinlocks. This would
> imply that uml at least has some sort of preemption model where they are
> needed. Dumps can trigger from any context and from multiple paths.
>
> If you are sure that this is no concern, then I will drop this patch
> from my series.
Currently uml selects ARCH_NO_PREEMPT, so no preemtion too.
We have spinlocks at obvious places in arch/um/ just to be ready if uml supports
SMP at some point.
Does your patch have drawbacks right now for uml? If not, I'd suggest to keep it.
Thanks,
//richard
On Tue 2021-01-26 22:21:50, John Ogness wrote:
> Since the ringbuffer is lockless, there is no need for it to be
> protected by @logbuf_lock. Remove @logbuf_lock.
>
> This means that printk_nmi_direct and printk_safe_flush_on_panic()
> no longer need to acquire any lock to run.
>
> @console_seq, @exclusive_console_stop_seq, @console_dropped are
> protected by @console_lock.
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index d14a4afc5b72..b57dba7f077d 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -401,6 +366,7 @@ static u64 syslog_seq;
> static size_t syslog_partial;
> static bool syslog_time;
>
> +/* All 3 protected by @console_sem. */
> /* the next printk record to write to the console */
> static u64 console_seq;
> static u64 exclusive_console_stop_seq;
> @@ -762,27 +728,27 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> if (ret)
> return ret;
>
> - logbuf_lock_irq();
> + printk_safe_enter_irq();
What is the exact reason to keep this, please?
1. The primary function of the printk_safe context is to avoid deadlock
caused by logbuf_lock. It might have happened with recursive or nested
printk(). But logbuf_lock is gone now.
2. There are still some hidded locks that were guarded by this as
well. For example, console_owner_lock, or spinlock inside
console_sem, or scheduler locks taken when console_sem()
wakes another waiting process. It might still make sense
to somehow guard these.
3. It kind of prevented infinite printk() recursion by using another
code path. The other path was limited by the size of the per-cpu
buffer. Well, recursion inside printk_safe code would likely
hit end of the stack first.
IMHO, we do not need printk safe context here in devkmsg_read().
It does not belong into any categoty that is described above.
logbug_lock() is gone. devkmsg_read() is never called directly
from printk().
The same is true for almost entire patch. There are only two
or so exceptions, see below.
> if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
> if (file->f_flags & O_NONBLOCK) {
> ret = -EAGAIN;
> - logbuf_unlock_irq();
> + printk_safe_exit_irq();
> goto out;
> }
>
> - logbuf_unlock_irq();
> + printk_safe_exit_irq();
> ret = wait_event_interruptible(log_wait,
> prb_read_valid(prb, atomic64_read(&user->seq), r));
> if (ret)
> goto out;
> - logbuf_lock_irq();
> + printk_safe_enter_irq();
> }
>
> if (atomic64_read(&user->seq) < prb_first_valid_seq(prb)) {
> /* our last seen message is gone, return error and reset */
> atomic64_set(&user->seq, prb_first_valid_seq(prb));
> ret = -EPIPE;
> - logbuf_unlock_irq();
> + printk_safe_exit_irq();
> goto out;
> }
>
> @@ -2593,7 +2559,6 @@ void console_unlock(void)
> size_t len;
>
> printk_safe_enter_irqsave(flags);
> - raw_spin_lock(&logbuf_lock);
> skip:
> if (!prb_read_valid(prb, console_seq, &r))
> break;
I agree that printk_safe context makes sense in console_unlock().
It prevents eventual deadlocks caused, for example by
console_lock_owner.
It also somehow prevents an infinite loop when a console driver would
add a new message that would need to get flushed out as well which
would trigger another messages ...
> @@ -2973,9 +2933,7 @@ void register_console(struct console *newcon)
> /*
> * console_unlock(); will print out the buffered messages
> * for us.
> - */
> - logbuf_lock_irqsave(flags);
I am just curious what was the motivation to remove printk_safe
context here? It is a bit inconsistent with the other locations
where you kept it.
IMHO, it can be removed. It does not fit into any category
where it would help as described above.
Anyway, we have to be consistent and explain it in the commit message.
> - /*
> + *
> * We're about to replay the log buffer. Only do this to the
> * just-registered console to avoid excessive message spam to
> * the already-registered consoles.
> @@ -2988,11 +2946,9 @@ void register_console(struct console *newcon)
> exclusive_console_stop_seq = console_seq;
>
> /* Get a consistent copy of @syslog_seq. */
> - raw_spin_lock(&syslog_lock);
> + raw_spin_lock_irqsave(&syslog_lock, flags);
I guess that you have added _irqsafe() variant here to preserve the
original behavior.
I just wonder if it makes sense. We take the sleeping console_lock()
in this function. This should not be allowed in a context
with disabled interrupts.
IMHO, we do not need the irqsafe variant here. But it probably should
be removed in a separate patch. We should not hide it in this huge patch.
> console_seq = syslog_seq;
> - raw_spin_unlock(&syslog_lock);
> -
> - logbuf_unlock_irqrestore(flags);
> + raw_spin_unlock_irqrestore(&syslog_lock, flags);
> }
> console_unlock();
> console_sysfs_notify();
> @@ -3414,9 +3366,11 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
> struct printk_info info;
> unsigned int line_count;
> struct printk_record r;
> + unsigned long flags;
> size_t l = 0;
> bool ret = false;
>
> + printk_safe_enter_irqsave(flags);
This change is neither obvious nor documented.
I guess that this is preparation step for unfying
kmsg_dump_get_line_nolock() and kmsg_dump_get_line().
Please, do it in the next patch and keep this one strightforward.
That said, IMHO, the printk_safe() context is not needed here at all.
This code is not called from printk() directly. The recursion is
prevented by iter->next_seq or the buffer size.
> prb_rec_init_rd(&r, &info, line, size);
>
> if (!iter->active)
> @@ -3569,8 +3517,12 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
> */
> void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
> {
> + unsigned long flags;
> +
> + printk_safe_enter_irqsave(flags);
This is the same as in kmsg_dump_get_line_nolock().
Please, keep this huge patch strightforward. Either replace
logbuf_lock*() with the corresponding printk_safe*() calls.
Or do not add printk_safe*() where it is not needed.
> iter->cur_seq = latched_seq_read_nolock(&clear_seq);
> iter->next_seq = prb_next_seq(prb);
> + printk_safe_exit_irqrestore(flags);
> }
>
> /**
> diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
> index a0e6f746de6c..a9a3137bd972 100644
> --- a/kernel/printk/printk_safe.c
> +++ b/kernel/printk/printk_safe.c
> @@ -368,20 +354,21 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
> #endif
>
> /*
> - * Try to use the main logbuf even in NMI. But avoid calling console
> + * Use the main logbuf even in NMI. But avoid calling console
> * drivers that might have their own locks.
> */
> - if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
> - raw_spin_trylock(&logbuf_lock)) {
> + if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) {
> + unsigned long flags;
> int len;
>
> + printk_safe_enter_irqsave(flags);
The printk_safe context does not make much sense here. The per-context
redirection is done in vprintk_func(). It will always use this path
because PRINTK_NMI_DIRECT_CONTEXT_MASK is handled before
PRINTK_SAFE_CONTEXT_MASK.
> len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
> - raw_spin_unlock(&logbuf_lock);
> + printk_safe_exit_irqrestore(flags);
> defer_console_output();
> return len;
> }
>
> - /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
> + /* Use extra buffer in NMI. */
> if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
> return vprintk_nmi(fmt, args);
I agree that it makes sense to keep vprintk_nmi() for now. It prevents
an infinite recursion. I still hope that we will agree on a better
solution later.
Summary:
I do not have a strong opinion whether to remove printk_safe
enter/exit calls in this patch or in a separate one. I would
be fine with both solutions.
Anyway, please keep the patch as straightforward as possible.
Please, do not move printk_safe context into another locations.
Also provide more reasoning in the commit message. Why printk_safe
enter/exit calls are preserved or why they can be removed.
Feel free to mention that printk_safe context still makes sense on
some locations, explain why, and say that it will be removed later.
Best Regards,
Petr
On Tue 2021-01-26 22:21:51, John Ogness wrote:
> kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is
> no need for _nolock() variants. Remove these functions and switch all
> callers of the _nolock() variants.
>
> The functions without _nolock() were chosen because they are already
> exported to kernel modules.
>
> Signed-off-by: John Ogness <[email protected]>
Makes perfect sense. Just please move the printk_safe enter/exit
calls from the previous patch into this one. Of course, only if
you still want to keep them for now.
Reviewed-by: Petr Mladek <[email protected]>
Best Regards,
Petr
On 2021-02-02, Petr Mladek <[email protected]> wrote:
> On Tue 2021-01-26 22:21:50, John Ogness wrote:
>> Since the ringbuffer is lockless, there is no need for it to be
>> protected by @logbuf_lock. Remove @logbuf_lock.
>>
>> This means that printk_nmi_direct and printk_safe_flush_on_panic()
>> no longer need to acquire any lock to run.
>>
>> @console_seq, @exclusive_console_stop_seq, @console_dropped are
>> protected by @console_lock.
>>
>> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
>> index d14a4afc5b72..b57dba7f077d 100644
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -401,6 +366,7 @@ static u64 syslog_seq;
>> static size_t syslog_partial;
>> static bool syslog_time;
>>
>> +/* All 3 protected by @console_sem. */
>> /* the next printk record to write to the console */
>> static u64 console_seq;
>> static u64 exclusive_console_stop_seq;
>> @@ -762,27 +728,27 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
>> if (ret)
>> return ret;
>>
>> - logbuf_lock_irq();
>> + printk_safe_enter_irq();
>
> What is the exact reason to keep this, please?
As Sergey pointed out [0], logbuf_lock_irq() does 2 things: logbuf_lock
and safe buffers. This series is not trying to remove the safe buffers
(a later series will). The series is only removing logbuf_lock. So all
logbuf_lock_*() calls will turn into printk_safe_*() calls. There are a
few exceptions, which you noticed and I will respond to.
[0] https://lkml.kernel.org/r/[email protected]
> 1. The primary function of the printk_safe context is to avoid deadlock
> caused by logbuf_lock. It might have happened with recursive or nested
> printk(). But logbuf_lock is gone now.
Agreed. Deadlock is not a concern anymore.
> 2. There are still some hidded locks that were guarded by this as
> well. For example, console_owner_lock, or spinlock inside
> console_sem, or scheduler locks taken when console_sem()
> wakes another waiting process. It might still make sense
> to somehow guard these.
This was not my motivation and I do not think it is an issue. I am not
aware of any technical need for the safe buffers to protect such
synchronization.
> 3. It kind of prevented infinite printk() recursion by using another
> code path. The other path was limited by the size of the per-cpu
> buffer. Well, recursion inside printk_safe code would likely
> hit end of the stack first.
Yes, this was my main motivation. The safe buffers carry this
responsibility in mainline. So until a replacement for recursion
protection is in place, the safe buffers should remain.
And even if we decide we do not need/want recursion protection, I still
do not think this series should be the one to remove it. I only wanted
to remove logbuf_lock for now.
If we later have regressions, it will be helpful to bisect if the safe
buffers (with their local_irq_disable()) or the logbuf_lock were
involved.
> IMHO, we do not need printk safe context here in devkmsg_read().
> It does not belong into any categoty that is described above.
> logbug_lock() is gone. devkmsg_read() is never called directly
> from printk().
No. But it is calling printk_ringbuffer functions that can trigger
WARN_ONs that can trigger printk's.
> The same is true for almost entire patch. There are only two or so
> exceptions, see below.
>
>
>> if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
>> if (file->f_flags & O_NONBLOCK) {
>> ret = -EAGAIN;
>> - logbuf_unlock_irq();
>> + printk_safe_exit_irq();
>> goto out;
>> }
>>
>> - logbuf_unlock_irq();
>> + printk_safe_exit_irq();
>> ret = wait_event_interruptible(log_wait,
>> prb_read_valid(prb, atomic64_read(&user->seq), r));
>> if (ret)
>> goto out;
>> - logbuf_lock_irq();
>> + printk_safe_enter_irq();
>> }
>>
>> if (atomic64_read(&user->seq) < prb_first_valid_seq(prb)) {
>> /* our last seen message is gone, return error and reset */
>> atomic64_set(&user->seq, prb_first_valid_seq(prb));
>> ret = -EPIPE;
>> - logbuf_unlock_irq();
>> + printk_safe_exit_irq();
>> goto out;
>> }
>>
>
>
>> @@ -2593,7 +2559,6 @@ void console_unlock(void)
>> size_t len;
>>
>> printk_safe_enter_irqsave(flags);
>> - raw_spin_lock(&logbuf_lock);
>> skip:
>> if (!prb_read_valid(prb, console_seq, &r))
>> break;
>
> I agree that printk_safe context makes sense in console_unlock().
> It prevents eventual deadlocks caused, for example by
> console_lock_owner.
>
> It also somehow prevents an infinite loop when a console driver would
> add a new message that would need to get flushed out as well which
> would trigger another messages ...
>
>
>> @@ -2973,9 +2933,7 @@ void register_console(struct console *newcon)
>> /*
>> * console_unlock(); will print out the buffered messages
>> * for us.
>> - */
>> - logbuf_lock_irqsave(flags);
>
> I am just curious what was the motivation to remove printk_safe
> context here? It is a bit inconsistent with the other locations
> where you kept it.
This never should have been logbuf_lock_irqsave(flags) in the first
place. It would have been enough to just grab @logbuf_lock. The safe
buffers only make sense if printk or printk_ringbuffer functions are
called. Here we are just copying some variables (which are already
protected by console_sem and syslog_lock).
> IMHO, it can be removed. It does not fit into any category
> where it would help as described above.
>
> Anyway, we have to be consistent and explain it in the commit message.
I could add an earlier patch that changes logbuf_lock_irqsave() here to
spin_lock_irqsave() and explain. Then for this patch it would be clear
that it is just dropped.
>> - /*
>> + *
>> * We're about to replay the log buffer. Only do this to the
>> * just-registered console to avoid excessive message spam to
>> * the already-registered consoles.
>> @@ -2988,11 +2946,9 @@ void register_console(struct console *newcon)
>> exclusive_console_stop_seq = console_seq;
>>
>> /* Get a consistent copy of @syslog_seq. */
>> - raw_spin_lock(&syslog_lock);
>> + raw_spin_lock_irqsave(&syslog_lock, flags);
>
> I guess that you have added _irqsafe() variant here to preserve the
> original behavior.
Yes.
> I just wonder if it makes sense. We take the sleeping console_lock()
> in this function. This should not be allowed in a context
> with disabled interrupts.
>
> IMHO, we do not need the irqsafe variant here. But it probably should
> be removed in a separate patch. We should not hide it in this huge patch.
Yes. Let's put that in another patch. It is not related to logbuf_lock
removal.
>> console_seq = syslog_seq;
>> - raw_spin_unlock(&syslog_lock);
>> -
>> - logbuf_unlock_irqrestore(flags);
>> + raw_spin_unlock_irqrestore(&syslog_lock, flags);
>> }
>> console_unlock();
>> console_sysfs_notify();
>> @@ -3414,9 +3366,11 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
>> struct printk_info info;
>> unsigned int line_count;
>> struct printk_record r;
>> + unsigned long flags;
>> size_t l = 0;
>> bool ret = false;
>>
>> + printk_safe_enter_irqsave(flags);
>
> This change is neither obvious nor documented.
I noticed that this function was calling ringbuffer functions without
marking the region for safe buffers. I should have included a separate
patch before this one adding the safe buffers so that it would be
clear. Sorry.
> I guess that this is preparation step for unfying
> kmsg_dump_get_line_nolock() and kmsg_dump_get_line().
>
> Please, do it in the next patch and keep this one strightforward.
Or I will just do it in the following unification patch.
> That said, IMHO, the printk_safe() context is not needed here at all.
> This code is not called from printk() directly. The recursion is
> prevented by iter->next_seq or the buffer size.
My logic was: "If it is calling prb_*(), it should be protected by safe
buffers."
>> prb_rec_init_rd(&r, &info, line, size);
>>
>> if (!iter->active)
>
>
>> @@ -3569,8 +3517,12 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
>> */
>> void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
>> {
>> + unsigned long flags;
>> +
>> + printk_safe_enter_irqsave(flags);
>
> This is the same as in kmsg_dump_get_line_nolock().
>
> Please, keep this huge patch strightforward. Either replace
> logbuf_lock*() with the corresponding printk_safe*() calls.
> Or do not add printk_safe*() where it is not needed.
Agreed. Sorry.
>> iter->cur_seq = latched_seq_read_nolock(&clear_seq);
>> iter->next_seq = prb_next_seq(prb);
>> + printk_safe_exit_irqrestore(flags);
>> }
>>
>> /**
>
>> diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
>> index a0e6f746de6c..a9a3137bd972 100644
>> --- a/kernel/printk/printk_safe.c
>> +++ b/kernel/printk/printk_safe.c
>> @@ -368,20 +354,21 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
>> #endif
>>
>> /*
>> - * Try to use the main logbuf even in NMI. But avoid calling console
>> + * Use the main logbuf even in NMI. But avoid calling console
>> * drivers that might have their own locks.
>> */
>> - if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
>> - raw_spin_trylock(&logbuf_lock)) {
>> + if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) {
>> + unsigned long flags;
>> int len;
>>
>> + printk_safe_enter_irqsave(flags);
>
> The printk_safe context does not make much sense here. The per-context
> redirection is done in vprintk_func(). It will always use this path
> because PRINTK_NMI_DIRECT_CONTEXT_MASK is handled before
> PRINTK_SAFE_CONTEXT_MASK.
If the following vprintk_store() calls printk(), those printk's should
land in safe buffers. If @printk_context is not incremented, they land
here again.
vprintk_store() relies on its caller to update @printk_context.
>> len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
>> - raw_spin_unlock(&logbuf_lock);
>> + printk_safe_exit_irqrestore(flags);
>> defer_console_output();
>> return len;
>> }
>>
>> - /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
>> + /* Use extra buffer in NMI. */
>> if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
>> return vprintk_nmi(fmt, args);
>
> I agree that it makes sense to keep vprintk_nmi() for now. It prevents
> an infinite recursion. I still hope that we will agree on a better
> solution later.
>
>
> Summary:
>
> I do not have a strong opinion whether to remove printk_safe
> enter/exit calls in this patch or in a separate one. I would
> be fine with both solutions.
>
> Anyway, please keep the patch as straightforward as possible.
> Please, do not move printk_safe context into another locations.
>
> Also provide more reasoning in the commit message. Why printk_safe
> enter/exit calls are preserved or why they can be removed.
>
> Feel free to mention that printk_safe context still makes sense on
> some locations, explain why, and say that it will be removed later.
OK. Thank you for the excellent response. I will go over this again.
John Ogness
On Mon 2021-02-01 10:55:22, John Ogness wrote:
> On 2021-01-29, Petr Mladek <[email protected]> wrote:
> >> The logic for finding records to fit into a buffer is the same for
> >> kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper
> >> function find_first_fitting_seq() to handle this logic.
> >>
> >> Signed-off-by: John Ogness <[email protected]>
> >> ---
> >> kernel/printk/printk.c | 71 ++++++++++++++++++++++++------------------
> >> 1 file changed, 41 insertions(+), 30 deletions(-)
> >>
> >> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> >> index 1929aa372e7f..ec2174882b8e 100644
> >> --- a/kernel/printk/printk.c
> >> +++ b/kernel/printk/printk.c
> >> @@ -1421,6 +1421,41 @@ static size_t get_record_print_text_size(struct printk_info *info,
> >> return ((prefix_len * line_count) + info->text_len + 1);
> >> }
> >>
> >> +/*
> >> + * Beginning with @start_seq, find the first record where it and all following
> >> + * records up to (but not including) @max_seq fit into @size.
> >
> > Please, mention that all existing messages are checked when @max_seq
> > == -1 is used.
>
> -1 is not special for that purpose, but I will add a comment that if
> there is no required upper bound, the caller can use -1.
Thanks. I think that it is good to mention that the function is able
to handle this situation. For example, it will not wait until
a message with @max_seq is available ;-)
> >> + */
> >> +static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
> >> + struct printk_info *info, bool syslog, bool time)
> >> +{
> >> + /*
> >> + * Move first record forward until length fits into the buffer. Ignore
> >> + * newest messages that were not counted in the above cycle. Messages
> >> + * might appear and get lost in the meantime. This is a best effort
> >> + * that prevents an infinite loop that could occur with a retry.
> >> + */
> >> + if (seq < max_seq)
> >> + max_seq = seq;
> >
> > This made my head twist around several times ;-)
> >
> > It should never be true in kmsg_dump_get_buffer().
>
> Correct. It is there because of syslog_print_all().
>
> > And there was nothing like this in the original syslog_print_all().
>
> With logbuf_lock, it is not possible that new messages arrive in between
> these two loops. But without logbuf_lock, it _is_ possible and this
> needs to be handled.
I see.
> I can expand the commit message to mention this necessary change.
Yes, please. I am fine with the code now.
> >> @@ -3436,26 +3461,12 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
> >>
> >> /*
> >> * Find first record that fits, including all following records,
> >> - * into the user-provided buffer for this dump.
> >> + * into the user-provided buffer for this dump. Pass in size-1
> >> + * because this function (by way of record_print_text()) will
> >> + * not write more than size-1 bytes of text into @buf.
> >
> > We should do the same also in syslog_print_all(). It must have the
> > same problem. The last message might get lost when there is not
> > a space for the trailing '\0' that was not counted before.
>
> No, it does not have the same problem. I also made the mistake [0] of
> thinking that.
>
> copy_to_user() is the function filling the buffer, not
> record_print_text(). And it will (and always has) fill the full
> buffer. Only kmsg_dump_get_buffer() has the bizarre semantics of not
> using the full buffer.
Right. I got it the wrong way.
> > And it might be better to actually change the condition in
> > find_first_fitting_seq(). I mean to replace:
> >
> > if (len <= size || info.seq >= max_seq)
> > with
> > if (len < size || info.seq >= max_seq)
>
> I would prefer not twisting syslog_print_all() to act like
> kmsg_dump_get_buffer().
I agree. It is not a common problem after all.
Best Regards,
Petr
On Mon 2021-02-01 14:17:55, John Ogness wrote:
> On 2021-02-01, Petr Mladek <[email protected]> wrote:
> >> The global variables @syslog_seq, @syslog_partial, @syslog_time
> >> and write access to @clear_seq are protected by @logbuf_lock.
> >> Once @logbuf_lock is removed, these variables will need their
> >> own synchronization method. Introduce @syslog_lock for this
> >> purpose.
> >
> >> --- a/kernel/printk/printk.c
> >> +++ b/kernel/printk/printk.c
> >> @@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
> >> printk_safe_exit_irqrestore(flags); \
> >> } while (0)
> >>
> >> +/* syslog_lock protects syslog_* variables and write access to clear_seq. */
> >> +static DEFINE_RAW_SPINLOCK(syslog_lock);
> >
> > I am not expert on RT code but I think that it prefers the generic
> > spinlocks. syslog_lock seems to be used in a normal context.
> > IMHO, it does not need to be a raw spinlock.
> >
> > Note that using normal spinlock would require switching the locking
> > order. logbuf_lock is a raw lock. Normal spinlock must not be taken
> > under a raw spinlock.
> >
> > Or we could switch syslog_lock to the normal spinlock later
> > after logbuf_lock is removed.
>
> I was planning on this last option because I think it is the
> simplest. There are places such as syslog_print_all() where the
> printk_safe_enter() and logbuf_lock locking are not at the same place as
> the syslog_lock locking (and syslog_lock is inside).
>
> Once the safe buffers are removed, syslog_lock can transition to a
> spinlock. (spinlock's must not be under local_irq_save().)
Fair enough. Please, mention in the commit message that it
will get switched to normal spinlock later. And the the raw
spinlock is used to make the transition more straightforward
or something like this.
> >> +
> >> #ifdef CONFIG_PRINTK
> >> DECLARE_WAIT_QUEUE_HEAD(log_wait);
> >> +/* All 3 protected by @syslog_lock. */
> >> /* the next printk record to read by syslog(READ) or /proc/kmsg */
> >> static u64 syslog_seq;
> >> static size_t syslog_partial;
> >> @@ -1648,8 +1661,14 @@ int do_syslog(int type, char __user *buf, int len, int source)
> >> return 0;
> >> if (!access_ok(buf, len))
> >> return -EFAULT;
> >> +
> >> + /* Get a consistent copy of @syslog_seq. */
> >> + raw_spin_lock_irq(&syslog_lock);
> >> + seq = syslog_seq;
> >> + raw_spin_unlock_irq(&syslog_lock);
> >> +
> >> error = wait_event_interruptible(log_wait,
> >> - prb_read_valid(prb, syslog_seq, NULL));
> >> + prb_read_valid(prb, seq, NULL));
> >
> > Hmm, this will not detect when syslog_seq gets cleared in parallel.
> > I hope that nobody rely on this behavior. But who knows?
> >
> > A solution might be to have also syslog_seq latched. But I am
> > not sure if it is worth it.
> >
> > I am for taking the risk and use the patch as it is now. Let's keep
> > the code for now. We could always use the latched variable when
> > anyone complains. Just keep it in mind.
>
> We could add a simple helper:
>
> /* Get a consistent copy of @syslog_seq. */
> static u64 syslog_seq_read(void)
> {
> unsigned long flags;
>
> raw_spin_lock_irqsave(&syslog_lock, flags);
> seq = syslog_seq;
> raw_spin_unlock_irqrestore(&syslog_lock, flags);
> return seq;
> }
>
> Then change the code to:
>
> error = wait_event_interruptible(log_wait,
> prb_read_valid(prb, read_syslog_seq(), NULL));
>
Great idea! Please, use it but without the flags. IMHO, using flags
might be confusing when reading the come. It might create false
expectations...
> register_console() could also make use of the function. (That is why I
> am suggesting the flags variant.)
I think that flags are actually not needed in register_console() as
mentioned in the other mail. Anyway, we could keep register_console()
as is (opencoded) for now.
Best Regards,
Petr
On Mon 2021-02-01 17:57:14, John Ogness wrote:
> (Added CC: Thomas Meyer, Richard Weinberger)
>
> On 2021-02-01, Petr Mladek <[email protected]> wrote:
> >> In preparation for removing printk's @logbuf_lock, dumpers that have
> >> assumed to be protected against parallel calls must provide their own
> >> synchronization. Add a locally static spinlock to synchronize the
> >> kmsg_dump call and temporary buffer usage.
> >>
> >> Signed-off-by: John Ogness <[email protected]>
> >> ---
> >> arch/um/kernel/kmsg_dump.c | 8 ++++++++
> >> 1 file changed, 8 insertions(+)
> >>
> >> diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c
> >> index f38349ad00ea..173999422ed8 100644
> >> --- a/arch/um/kernel/kmsg_dump.c
> >> +++ b/arch/um/kernel/kmsg_dump.c
> >> @@ -1,5 +1,6 @@
> >> // SPDX-License-Identifier: GPL-2.0
> >> #include <linux/kmsg_dump.h>
> >> +#include <linux/spinlock.h>
> >> #include <linux/console.h>
> >> #include <shared/init.h>
> >> #include <shared/kern.h>
> >> @@ -9,8 +10,10 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
> >> enum kmsg_dump_reason reason,
> >> struct kmsg_dumper_iter *iter)
> >> {
> >> + static DEFINE_SPINLOCK(lock);
> >> static char line[1024];
> >> struct console *con;
> >> + unsigned long flags;
> >> size_t len = 0;
> >>
> >> /* only dump kmsg when no console is available */
> >> @@ -25,11 +28,16 @@ static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
> >> if (con)
> >> return;
> >>
> >> + if (!spin_trylock_irqsave(&lock, flags))
> >> + return;
> >> +
> >> printf("kmsg_dump:\n");
> >> while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
> >> line[len] = '\0';
> >> printf("%s", line);
> >> }
> >> +
> >> + spin_unlock_irqrestore(&lock, flags);
> >
> > What exactly is synchronized here, please?
> > Access to @line buffer or @iter or both?
>
> @line is being synchronized. @iter does not require synchronization.
>
> > It looks to me that the access to @line buffer was not synchronized
> > before. kmsg_dump_get_line() used a lock internally but
> > it was not synchronized with the later printf("%s", line);
>
> The line was previously synchronized for the kmsg_dump_get_line()
> call. But yes, it was not synchronized after the call, which is a bug if
> the dump is triggered on multiple CPUs simultaneously. The commit
> message should also mention that it is handling that bug.
Yes, please update the commit message.
In fact, I think that mentioning logbuf_lock might is a bit
misleading. This patch fixes an old bug. It was found when
auditing code that is using the kmsg_dump_get_line() and
might be affected the logbuf_lock removal.
Feel free to use:
Reviewed-by: Petr Mladek <[email protected]>
Finally thanks Richard for the input about SMP on UM arch.
Best Regards,
Petr
On Tue 2021-02-02 12:47:20, John Ogness wrote:
> On 2021-02-02, Petr Mladek <[email protected]> wrote:
> > On Tue 2021-01-26 22:21:50, John Ogness wrote:
> >> Since the ringbuffer is lockless, there is no need for it to be
> >> protected by @logbuf_lock. Remove @logbuf_lock.
> >>
> >> This means that printk_nmi_direct and printk_safe_flush_on_panic()
> >> no longer need to acquire any lock to run.
> >>
> >> @console_seq, @exclusive_console_stop_seq, @console_dropped are
> >> protected by @console_lock.
> >>
> >> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> >> index d14a4afc5b72..b57dba7f077d 100644
> >> --- a/kernel/printk/printk.c
> >> +++ b/kernel/printk/printk.c
> >> @@ -401,6 +366,7 @@ static u64 syslog_seq;
> >> static size_t syslog_partial;
> >> static bool syslog_time;
> >>
> >> +/* All 3 protected by @console_sem. */
> >> /* the next printk record to write to the console */
> >> static u64 console_seq;
> >> static u64 exclusive_console_stop_seq;
> >> @@ -762,27 +728,27 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> >> if (ret)
> >> return ret;
> >>
> >> - logbuf_lock_irq();
> >> + printk_safe_enter_irq();
> >
> > What is the exact reason to keep this, please?
>
> As Sergey pointed out [0], logbuf_lock_irq() does 2 things: logbuf_lock
> and safe buffers. This series is not trying to remove the safe buffers
> (a later series will). The series is only removing logbuf_lock. So all
> logbuf_lock_*() calls will turn into printk_safe_*() calls. There are a
> few exceptions, which you noticed and I will respond to.
>
> [0] https://lkml.kernel.org/r/[email protected]
>
> > 1. The primary function of the printk_safe context is to avoid deadlock
> > caused by logbuf_lock. It might have happened with recursive or nested
> > printk(). But logbuf_lock is gone now.
>
> Agreed. Deadlock is not a concern anymore.
>
> > 2. There are still some hidded locks that were guarded by this as
> > well. For example, console_owner_lock, or spinlock inside
> > console_sem, or scheduler locks taken when console_sem()
> > wakes another waiting process. It might still make sense
> > to somehow guard these.
>
> This was not my motivation and I do not think it is an issue. I am not
> aware of any technical need for the safe buffers to protect such
> synchronization.
Just to make it clear. These locks are explicitely protected, see
printk_safe_enter_irqsave()/printk_safe_exit_irqrestore() in
+ console_lock_spinning_enable()
+ console_lock_spinning_disable_and_check()
+ console_trylock_spinning()
+ __up_console_sem()
+ __down_trylock_console_sem()
Re-entering the code guarded by console_sem.lock or console_owner_lock
would cause a deadlock.
For example, spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_)
in console_trylock_spinning() might cause recursive printk().
These code paths are not touched by this patch because they
do not use logbuf_lock. I mentioned this primary for completnes.
We could not remove printk_safe code before we prevent these
deadlocks another way. Well, it is possible that all this
code will get removed after offloading consoles into kthreads.
Alternatively, we might simply prevent calling console in any recursive
printk.
Anyway, prink_safe is too big hammer. We do not longer have problems
to write recursive messages into the lockless ringbuffer. We only
need to somehow limit the recursion.
Anyway, this is for another patchset.
>
> > 3. It kind of prevented infinite printk() recursion by using another
> > code path. The other path was limited by the size of the per-cpu
> > buffer. Well, recursion inside printk_safe code would likely
> > hit end of the stack first.
>
> Yes, this was my main motivation. The safe buffers carry this
> responsibility in mainline. So until a replacement for recursion
> protection is in place, the safe buffers should remain.
>
> And even if we decide we do not need/want recursion protection, I still
> do not think this series should be the one to remove it. I only wanted
> to remove logbuf_lock for now.
Fair enough.
> If we later have regressions, it will be helpful to bisect if the safe
> buffers (with their local_irq_disable()) or the logbuf_lock were
> involved.
Makes perfect sense.
> > IMHO, we do not need printk safe context here in devkmsg_read().
> > It does not belong into any categoty that is described above.
> > logbug_lock() is gone. devkmsg_read() is never called directly
> > from printk().
>
> No. But it is calling printk_ringbuffer functions that can trigger
> WARN_ONs that can trigger printk's.
But printk() does not call devkmsg_read(). At least not directly.
So this does not cause any infinite recursion.
Anyway, I agree that we should just remove logbuf_lock in this patch
and keep printk_safe context where it was.
> >> @@ -2973,9 +2933,7 @@ void register_console(struct console *newcon)
> >> /*
> >> * console_unlock(); will print out the buffered messages
> >> * for us.
> >> - */
> >> - logbuf_lock_irqsave(flags);
> >
> > I am just curious what was the motivation to remove printk_safe
> > context here? It is a bit inconsistent with the other locations
> > where you kept it.
>
> This never should have been logbuf_lock_irqsave(flags) in the first
> place. It would have been enough to just grab @logbuf_lock. The safe
> buffers only make sense if printk or printk_ringbuffer functions are
> called. Here we are just copying some variables (which are already
> protected by console_sem and syslog_lock).
I think that we replaced all spin_lock_irqsave() with
logbuf_lock_irqsave(flags) at some point to be on the safe side.
> > IMHO, it can be removed. It does not fit into any category
> > where it would help as described above.
> >
> > Anyway, we have to be consistent and explain it in the commit message.
>
> I could add an earlier patch that changes logbuf_lock_irqsave() here to
> spin_lock_irqsave() and explain. Then for this patch it would be clear
> that it is just dropped.
My main concern was that you _moved_ the _location_ where the lock/printk
safe context was taken. Such a change should not be hidden in this
patch.
I suggest to simply replace logbuf_lock_irqsave(flags) with
printk_safe_enter_irqsave(flags) and keep the location. I mean to do
exactly the same thing that you did on all other locations in this
patch.
As you said. Let's keep this patch(set) for removing logbuf_lock.
And let's remove printk_safe context in another patchset later.
>
> >> - /*
> >> + *
> >> * We're about to replay the log buffer. Only do this to the
> >> * just-registered console to avoid excessive message spam to
> >> * the already-registered consoles.
> >> @@ -3414,9 +3366,11 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
> >> struct printk_info info;
> >> unsigned int line_count;
> >> struct printk_record r;
> >> + unsigned long flags;
> >> size_t l = 0;
> >> bool ret = false;
> >>
> >> + printk_safe_enter_irqsave(flags);
> >
> > This change is neither obvious nor documented.
>
> I noticed that this function was calling ringbuffer functions without
> marking the region for safe buffers. I should have included a separate
> patch before this one adding the safe buffers so that it would be
> clear. Sorry.
>
> > I guess that this is preparation step for unfying
> > kmsg_dump_get_line_nolock() and kmsg_dump_get_line().
> >
> > Please, do it in the next patch and keep this one strightforward.
>
> Or I will just do it in the following unification patch.
>
> > That said, IMHO, the printk_safe() context is not needed here at all.
> > This code is not called from printk() directly. The recursion is
> > prevented by iter->next_seq or the buffer size.
>
> My logic was: "If it is calling prb_*(), it should be protected by safe
> buffers."
IMHO, it is not necessary if there is no risk of recursion.
Let's just remove logbuf_lock in this patch. Let's keep printk_safe
enter/exit calls on the same locations where they were before.
We should neither add, remove, or move them.
> >> prb_rec_init_rd(&r, &info, line, size);
> >>
> >> if (!iter->active)
> >
> >
> >> diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
> >> index a0e6f746de6c..a9a3137bd972 100644
> >> --- a/kernel/printk/printk_safe.c
> >> +++ b/kernel/printk/printk_safe.c
> >> @@ -368,20 +354,21 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
> >> #endif
> >>
> >> /*
> >> - * Try to use the main logbuf even in NMI. But avoid calling console
> >> + * Use the main logbuf even in NMI. But avoid calling console
> >> * drivers that might have their own locks.
> >> */
> >> - if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
> >> - raw_spin_trylock(&logbuf_lock)) {
> >> + if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) {
> >> + unsigned long flags;
> >> int len;
> >>
> >> + printk_safe_enter_irqsave(flags);
> >
> > The printk_safe context does not make much sense here. The per-context
> > redirection is done in vprintk_func(). It will always use this path
> > because PRINTK_NMI_DIRECT_CONTEXT_MASK is handled before
> > PRINTK_SAFE_CONTEXT_MASK.
>
> If the following vprintk_store() calls printk(), those printk's should
> land in safe buffers. If @printk_context is not incremented, they land
> here again.
>
> vprintk_store() relies on its caller to update @printk_context.
Nevermind. Use printk_safe_enter_irqsave(flags) here as you did. It is
a straightforward replacement of the above raw_spin_trylock(&logbuf_lock)).
> >> len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
> >> - raw_spin_unlock(&logbuf_lock);
> >> + printk_safe_exit_irqrestore(flags);
> >> defer_console_output();
> >> return len;
> >> }
> >>
> >> - /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
> >> + /* Use extra buffer in NMI. */
> >> if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
> >> return vprintk_nmi(fmt, args);
> >
> OK. Thank you for the excellent response. I will go over this again.
Thanks a lot for patience. I know that reshufling non-trivial
patchsets is a pain. But I think that we are getting close.
I really like that we do it in small steps.
Best Regards,
Petr