Implement a new buffering mechanism for pr_cont messages.
Old mechanism syntax:
printk(KERN_INFO "text");
printk(KERN_CONT " continued");
printk(KERN_CONT "\n");
New mechanism syntax:
pr_cont_t c;
pr_cont_begin(&c, KERN_INFO "text");
pr_cont_append(&c, " continued");
pr_cont_end(&c);
Signed-off-by: John Ogness <[email protected]>
---
include/linux/printk.h | 19 ++++++
kernel/printk/printk.c | 137 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 156 insertions(+)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 34c1a7be3e01..4d9ce18e4afa 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -380,6 +380,25 @@ extern int kptr_restrict;
#define pr_cont(fmt, ...) \
printk(KERN_CONT fmt, ##__VA_ARGS__)
+/* opaque handle for continuous printk messages */
+typedef struct {
+ u8 index;
+ u8 loglevel;
+ u16 text_len;
+} pr_cont_t;
+
+/* initialize handle, provide loglevel and initial message text */
+int pr_cont_begin(pr_cont_t *c, const char *fmt, ...);
+
+/* append message text */
+int pr_cont_append(pr_cont_t *c, const char *fmt, ...);
+
+/* flush message to kernel buffer */
+void pr_cont_flush(pr_cont_t *c);
+
+/* flush message to kernel buffer, cleanup handle */
+void pr_cont_end(pr_cont_t *c);
+
/**
* pr_devel - Print a debug-level message conditionally
* @fmt: format string
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ad8d1dfe5fbe..10113e7ea350 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3495,3 +3495,140 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
#endif
+
+#define CONT_LINE_MAX LOG_LINE_MAX
+#define CONT_BUF_COUNT 10
+static char cont_buf[CONT_BUF_COUNT][CONT_LINE_MAX];
+static DECLARE_BITMAP(cont_buf_map, CONT_BUF_COUNT);
+
+static int get_cont_buf(void)
+{
+ int bit;
+
+ do {
+ bit = find_first_zero_bit(cont_buf_map, CONT_BUF_COUNT);
+ if (bit == CONT_BUF_COUNT)
+ break;
+ } while (test_and_set_bit(bit, cont_buf_map));
+
+ return bit;
+}
+
+static void put_cont_buf(int index)
+{
+ if (WARN_ON(index >= CONT_BUF_COUNT))
+ return;
+ if (WARN_ON(!test_bit(index, cont_buf_map)))
+ return;
+ clear_bit(index, cont_buf_map);
+}
+
+/* alloc buffer, get loglevel, setup initial text */
+int pr_cont_begin(pr_cont_t *c, const char *fmt, ...)
+{
+ int kern_level;
+ va_list args;
+ char *text;
+
+ c->index = get_cont_buf();
+ if (c->index == CONT_BUF_COUNT) {
+ /* Fallback to printk parts individually. */
+ int text_len;
+
+ va_start(args, fmt);
+ text_len = vprintk(fmt, args);
+ va_end(args);
+ return text_len;
+ }
+
+ text = &cont_buf[c->index][0];
+
+ va_start(args, fmt);
+ c->text_len = vscnprintf(text, CONT_LINE_MAX, fmt, args);
+ va_end(args);
+
+ c->loglevel = default_message_loglevel;
+ while (c->text_len > 1 &&
+ (kern_level = printk_get_level(text)) != 0) {
+ switch (kern_level) {
+ case '0' ... '7':
+ c->loglevel = kern_level - '0';
+ break;
+ }
+
+ c->text_len -= 2;
+ memmove(text, text + 2, c->text_len);
+ }
+
+ return c->text_len;
+}
+
+/* printk existing buffer, reset buffer */
+void pr_cont_flush(pr_cont_t *c)
+{
+ char *text;
+
+ if (c->index == CONT_BUF_COUNT || !c->text_len)
+ return;
+
+ text = &cont_buf[c->index][0];
+
+ printk("%c%c%s\n", KERN_SOH_ASCII, c->loglevel + '0', text);
+
+ c->text_len = 0;
+}
+
+/* printk existing buffer, free buffer */
+void pr_cont_end(pr_cont_t *c)
+{
+ if (c->index == CONT_BUF_COUNT)
+ return;
+
+ pr_cont_flush(c);
+ put_cont_buf(c->index);
+ c->index = CONT_BUF_COUNT;
+}
+
+/* append to buffer */
+int pr_cont_append(pr_cont_t *c, const char *fmt, ...)
+{
+ va_list args_copy;
+ va_list args;
+ int text_len;
+ char *text;
+
+ if (c->index == CONT_BUF_COUNT) {
+ /* Fallback to printk parts individually. */
+ va_start(args, fmt);
+ text_len = vprintk(fmt, args);
+ va_end(args);
+ return text_len;
+ }
+
+ text = &cont_buf[c->index][0];
+
+ /* Try to append directly. */
+ va_start(args, fmt);
+ va_copy(args_copy, args);
+ text_len = vsnprintf(text + c->text_len, CONT_LINE_MAX - c->text_len, fmt, args);
+ va_end(args);
+
+ if (text_len >= CONT_LINE_MAX - c->text_len) {
+ /*
+ * Not enough space remaining. Set the overwritten terminator,
+ * Flush any existing parts and start with a clean buffer.
+ * The loglevel is preserved.
+ */
+
+ text[c->text_len] = 0;
+ pr_cont_flush(c);
+
+ va_start(args_copy, fmt);
+ c->text_len = vscnprintf(text, CONT_LINE_MAX, fmt, args_copy);
+ va_end(args_copy);
+ return c->text_len;
+ }
+
+ c->text_len += text_len;
+ return text_len;
+}
--
2.20.1
On Thu, 2020-08-20 at 01:32 +0206, John Ogness wrote:
> Implement a new buffering mechanism for pr_cont messages.
>
> Old mechanism syntax:
>
> printk(KERN_INFO "text");
> printk(KERN_CONT " continued");
> printk(KERN_CONT "\n");
>
> New mechanism syntax:
>
> pr_cont_t c;
>
> pr_cont_begin(&c, KERN_INFO "text");
bikeshed:
I suggest:
printk_begin(&printk_context, fmt, ...)
printk_continue(&printk_context, fmt, ...) (maybe printk_next())
printk_end(&printk_context, fmt, ...)
and macros using pr_<level>_begin
ie:
#define pr_info_begin(context, fmt, ...) \
printk_begin(context, KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
and each continued bit would use printk_continue or printk_end
as appropriate.
KERN_<LEVEL> could be a separate argument, but it's simple
enough to use
printk_get_level on the format.
From: Joe Perches
> Sent: 20 August 2020 01:34
>
> On Thu, 2020-08-20 at 01:32 +0206, John Ogness wrote:
> > Implement a new buffering mechanism for pr_cont messages.
> >
> > Old mechanism syntax:
> >
> > printk(KERN_INFO "text");
> > printk(KERN_CONT " continued");
> > printk(KERN_CONT "\n");
> >
> > New mechanism syntax:
> >
> > pr_cont_t c;
> >
> > pr_cont_begin(&c, KERN_INFO "text");
>
> bikeshed:
>
> I suggest:
>
> printk_begin(&printk_context, fmt, ...)
> printk_continue(&printk_context, fmt, ...) (maybe printk_next())
> printk_end(&printk_context, fmt, ...)
I see some very long source lines looming ...
Blue bikeshed:
You'd probably want printk_end(&ctx, NULL) to work.
Although the example doesn't show where the '\n' comes from.
Although I guess it is now inferred and actually deleted
from 'normal' printk() call.
I've no idea how you'd 'size' the number of buffers.
You could use kmalloc(), perhaps falling back on a local buffer.
While might lead to:
pr_init(&ctx, level, GFP_KERNEL);
pr_cont(&ctx, fmt, ...);
pr_flush(&ctx);
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Thu, 2020-08-20 at 07:44 +0000, David Laight wrote:
> I've no idea how you'd 'size' the number of buffers.
I believe they are static and assume no more than 10
simultaneous uses of
printk_begin
+#define CONT_LINE_MAX LOG_LINE_MAX
+#define CONT_BUF_COUNT 10
+static char cont_buf[CONT_BUF_COUNT][CONT_LINE_MAX];
+static DECLARE_BITMAP(cont_buf_map, CONT_BUF_COUNT);
Though I don't see much value in a separate define
for CONT_LINE_MAx
From: Joe Perches
> Sent: 20 August 2020 09:00
>
> On Thu, 2020-08-20 at 07:44 +0000, David Laight wrote:
> > I've no idea how you'd 'size' the number of buffers.
>
> I believe they are static and assume no more than 10
> simultaneous uses of printk_begin
What I meant was how you'd work out whether 10 was
in any way appropriate.
ISTM it is either 'too many' or 'nowhere near enough'
depending on exactly what the system is doing.
And if code 'leaks' them you are in deep doo-doos.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On 2020-08-20, David Laight <[email protected]> wrote:
>> On Thu, 2020-08-20 at 07:44 +0000, David Laight wrote:
>>> I've no idea how you'd 'size' the number of buffers.
>>
>> I believe they are static and assume no more than 10
>> simultaneous uses of printk_begin
>
> What I meant was how you'd work out whether 10 was in any way
> appropriate. ISTM it is either 'too many' or 'nowhere near enough'
> depending on exactly what the system is doing.
Right now mainline has 1, which breaks pr_cont just booting your system.
I expect we will be increasing the number of buffers, regardless if we
adapt a new API or continue with what we have now.
> And if code 'leaks' them you are in deep doo-doos.
Not really. It falls back to printing individual parts. Also, the printk
subsystem has access to the open buffers and could even track the users
lockdep style.
But this discussion has little to do with the API. These are
implementation details that may end up under the hood of the current
mainline API.
John Ogness
On Thu 2020-08-20 01:32:28, John Ogness wrote:
> Implement a new buffering mechanism for pr_cont messages.
>
> Old mechanism syntax:
>
> printk(KERN_INFO "text");
> printk(KERN_CONT " continued");
> printk(KERN_CONT "\n");
>
> New mechanism syntax:
>
> pr_cont_t c;
>
> pr_cont_begin(&c, KERN_INFO "text");
> pr_cont_append(&c, " continued");
> pr_cont_end(&c);
>
> Signed-off-by: John Ogness <[email protected]>
> ---
> include/linux/printk.h | 19 ++++++
> kernel/printk/printk.c | 137 +++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 156 insertions(+)
>
> diff --git a/include/linux/printk.h b/include/linux/printk.h
> index 34c1a7be3e01..4d9ce18e4afa 100644
> --- a/include/linux/printk.h
> +++ b/include/linux/printk.h
> @@ -380,6 +380,25 @@ extern int kptr_restrict;
> #define pr_cont(fmt, ...) \
> printk(KERN_CONT fmt, ##__VA_ARGS__)
>
> +/* opaque handle for continuous printk messages */
> +typedef struct {
> + u8 index;
> + u8 loglevel;
> + u16 text_len;
> +} pr_cont_t;
> +
> +/* initialize handle, provide loglevel and initial message text */
> +int pr_cont_begin(pr_cont_t *c, const char *fmt, ...);
> +
> +/* append message text */
> +int pr_cont_append(pr_cont_t *c, const char *fmt, ...);
> +
> +/* flush message to kernel buffer */
> +void pr_cont_flush(pr_cont_t *c);
> +
> +/* flush message to kernel buffer, cleanup handle */
> +void pr_cont_end(pr_cont_t *c);
> +
> /**
> * pr_devel - Print a debug-level message conditionally
> * @fmt: format string
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index ad8d1dfe5fbe..10113e7ea350 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -3495,3 +3495,140 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
> EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
>
> #endif
> +
> +#define CONT_LINE_MAX LOG_LINE_MAX
> +#define CONT_BUF_COUNT 10
> +static char cont_buf[CONT_BUF_COUNT][CONT_LINE_MAX];
> +static DECLARE_BITMAP(cont_buf_map, CONT_BUF_COUNT);
> +
> +static int get_cont_buf(void)
> +{
> + int bit;
> +
> + do {
> + bit = find_first_zero_bit(cont_buf_map, CONT_BUF_COUNT);
> + if (bit == CONT_BUF_COUNT)
> + break;
> + } while (test_and_set_bit(bit, cont_buf_map));
> +
> + return bit;
> +}
My big problem with this solution is a forgotten pr_cont_end(). It
will cause:
+ the message will never get printed
+ several calls into such a broken code will exhaust the pool of
cont buffers and it will stop working for the entire system
The above might be solved with some fallback mechanism (timer,
watchdog) but then it needs some synchronization.
Why is it so bad?
+ it will happen like other bugs
+ it is hard to notice; it is easier to notice hard-lockup than
a missing message
Now that I think about it. This is the biggest problem with any temporary buffer
for pr_cont() lines. I am more and more convinced that we should just
_keep the current behavior_. It is not ideal. But sometimes mixed
messages are always better than lost ones.
That said, some printk() API using local buffer would be still
valuable for pieces of code where people really want to avoid
mixed lines. But it should be optional and people should be
aware of the risks.
Best Regards,
Petr
From: Petr Mladek
> Sent: 20 August 2020 11:16
...
> Now that I think about it. This is the biggest problem with any temporary buffer
> for pr_cont() lines. I am more and more convinced that we should just
> _keep the current behavior_. It is not ideal. But sometimes mixed
> messages are always better than lost ones.
Maybe a marker to say 'more expected' might be useful.
OTOH lack of a trailing '\n' probably signifies that a
pr_cont() is likely to be next.
Unexpected pr_cont() could be output with a leading "... "
to help indicate the message is a continuation.
> That said, some printk() API using local buffer would be still
> valuable for pieces of code where people really want to avoid
> mixed lines. But it should be optional and people should be
> aware of the risks.
That could be very useful if it supported multi-line output.
Thing like the stack backtrace code could use it avoid
the mess that happens when multiple processes generate
tracebacks at the same time.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Thu, 2020-08-20 at 07:44 +0000, David Laight wrote:
> From: Joe Perches
> > Sent: 20 August 2020 01:34
> >
> > On Thu, 2020-08-20 at 01:32 +0206, John Ogness wrote:
> > > Implement a new buffering mechanism for pr_cont messages.
> > >
> > > Old mechanism syntax:
> > >
> > > printk(KERN_INFO "text");
> > > printk(KERN_CONT " continued");
> > > printk(KERN_CONT "\n");
> > >
> > > New mechanism syntax:
> > >
> > > pr_cont_t c;
> > >
> > > pr_cont_begin(&c, KERN_INFO "text");
> >
> > bikeshed:
> >
> > I suggest:
> >
> > printk_begin(&printk_context, fmt, ...)
> > printk_continue(&printk_context, fmt, ...) (maybe printk_next())
> > printk_end(&printk_context, fmt, ...)
>
> I see some very long source lines looming ...
Macros make that pretty short.
pr_<level>_start
pr_next
pr_end
Doesn't seem too bad.
On (20/08/20 12:16), Petr Mladek wrote:
> Now that I think about it. This is the biggest problem with any temporary buffer
> for pr_cont() lines. I am more and more convinced that we should just
> _keep the current behavior_. It is not ideal. But sometimes mixed
> messages are always better than lost ones.
Agreed. John, let's move this into a backlog for the time being? We can
continue discussions, but first let's fix the empty lines for cont line
print outs in the current dmesg output.
-ss
On (20/08/20 01:32), John Ogness wrote:
> +#define CONT_LINE_MAX LOG_LINE_MAX
> +#define CONT_BUF_COUNT 10
> +static char cont_buf[CONT_BUF_COUNT][CONT_LINE_MAX];
> +static DECLARE_BITMAP(cont_buf_map, CONT_BUF_COUNT);
> +
> +static int get_cont_buf(void)
> +{
> + int bit;
> +
> + do {
> + bit = find_first_zero_bit(cont_buf_map, CONT_BUF_COUNT);
> + if (bit == CONT_BUF_COUNT)
> + break;
> + } while (test_and_set_bit(bit, cont_buf_map));
> +
> + return bit;
> +}
> +static void put_cont_buf(int index)
> +{
> + if (WARN_ON(index >= CONT_BUF_COUNT))
> + return;
> + if (WARN_ON(!test_bit(index, cont_buf_map)))
> + return;
Doesn't WARN_ON() do pr_cont() print outs as well? I'm a bit worried by
the path that re-enters pr_cont() from "broken" pr_cont() path.
Just ideas, to keep the discussion alive:
Overall, I wonder if pr_cont buffers should hold more info, e.g. owner
context. If the same context does "normal" printk() while still owning
the unfinished cont buffer then we should flush cont buffer. I may be
in minority here, I don't see a very strong reason to keep the global
order of the messages - e.g. if pid 234 does printk on CPU1 then we
probably don't need to flush pid's 8889 cont buffer (which runs on CPU42,
for instance), but keeping the order of the messages within the context
is somehow much more important. That is, if pid 8889 starts pr_cont buffer
and then triggers a pr_warn() or pr_alert() or any other printk() then I
think we need to flush the cont buffer. Just 5 cents.
-ss
On Thu 2020-08-20 12:33:23, David Laight wrote:
> From: Petr Mladek
> > Sent: 20 August 2020 11:16
> ...
> > Now that I think about it. This is the biggest problem with any temporary buffer
> > for pr_cont() lines. I am more and more convinced that we should just
> > _keep the current behavior_. It is not ideal. But sometimes mixed
> > messages are always better than lost ones.
>
> Maybe a marker to say 'more expected' might be useful.
> OTOH lack of a trailing '\n' probably signifies that a
> pr_cont() is likely to be next.
The problem is the "probably". Lack of trailing '\n' might also mean
that the author did not care. Note that newline is not strictly
required at the moment. The next message is concatenated only when
pr_cont() is used from the same process.
I would personally hate printk when I debugged some hard-to-reproduce
bug, finally succeeded, and some message was missing just because
of a missing newline.
> Unexpected pr_cont() could be output with a leading "... "
> to help indicate the message is a continuation.
Interesting idea. It might help to catch broken code. Well, I am still
not sure that people would appreciate this printk() behavior.
> > That said, some printk() API using local buffer would be still
> > valuable for pieces of code where people really want to avoid
> > mixed lines. But it should be optional and people should be
> > aware of the risks.
>
> That could be very useful if it supported multi-line output.
> Thing like the stack backtrace code could use it avoid
> the mess that happens when multiple processes generate
> tracebacks at the same time.
Honestly, I am going to push against it. I agree that would be useful
but it is an evil path.
There has to be some limits. Backtraces are often printed in situation
where the buffer could not get allocated dynamically. There always
be a situation when a buffer will not be big enough.
If we add this feature, people will want to use it also for other
purposes that might need even bigger buffers and the size might
be even harder to predict.
And the temporary buffer is only part of the problem. The message also
has to be stored into the lockless ringbuffer, show on consoles, passed
to userspace, stored during panic by pstore.
The printk() design already is very complicated. And multiple lines
support would stretch it in yet another dimension.
And it is going to be less important. We are going to always store the
information about the printk caller. So that it will be much easier
to grep related lines.
Best Regards,
Petr
From: Petr Mladek
> Sent: 25 August 2020 14:11
>
> On Thu 2020-08-20 12:33:23, David Laight wrote:
> > From: Petr Mladek
> > > Sent: 20 August 2020 11:16
> > ...
> > > Now that I think about it. This is the biggest problem with any temporary buffer
> > > for pr_cont() lines. I am more and more convinced that we should just
> > > _keep the current behavior_. It is not ideal. But sometimes mixed
> > > messages are always better than lost ones.
> >
> > Maybe a marker to say 'more expected' might be useful.
> > OTOH lack of a trailing '\n' probably signifies that a
> > pr_cont() is likely to be next.
>
> The problem is the "probably". Lack of trailing '\n' might also mean
> that the author did not care. Note that newline is not strictly
> required at the moment. The next message is concatenated only when
> pr_cont() is used from the same process.
Thinks.... (smoke comes out of ears...):
If the 'trace entry' contained the pid and whether it was a pr_cont
then the trace reader could merge continuation lines even if
there was a small number of interleaved other traces.
So anything reading continuously might break a continuation
(as might happen if there is a trace from an ISR).
But the output from dmesg and /var/log/messages will
almost always be correct.
This moves all the complexity away from the trace writing code.
> I would personally hate printk when I debugged some hard-to-reproduce
> bug, finally succeeded, and some message was missing just because
> of a missing newline.
My annoyance is that I want printk() to come out on the console
(like it used to 30 years ago).
I don't want the console output to be from a syslogd process.
On SYSV killing syslogd (mostly) did the trick.
x86 is annoying because you need to have setup a serial console
(or have a high speed camera pointing at the vga monitor).
> > Unexpected pr_cont() could be output with a leading "... "
> > to help indicate the message is a continuation.
>
> Interesting idea. It might help to catch broken code. Well, I am still
> not sure that people would appreciate this printk() behavior.
I was thinking of just catching the case where pr_cont()
data can't be appended to the earlier line.
For direct tty output this would happen if an intervening
printk() had to add an extra '\n' to terminate the partial line.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On Tue 2020-08-25 13:38:57, David Laight wrote:
> From: Petr Mladek
> > Sent: 25 August 2020 14:11
> >
> > On Thu 2020-08-20 12:33:23, David Laight wrote:
> > > From: Petr Mladek
> > > > Sent: 20 August 2020 11:16
> > > ...
> > > > Now that I think about it. This is the biggest problem with any temporary buffer
> > > > for pr_cont() lines. I am more and more convinced that we should just
> > > > _keep the current behavior_. It is not ideal. But sometimes mixed
> > > > messages are always better than lost ones.
> > >
> > > Maybe a marker to say 'more expected' might be useful.
> > > OTOH lack of a trailing '\n' probably signifies that a
> > > pr_cont() is likely to be next.
> >
> > The problem is the "probably". Lack of trailing '\n' might also mean
> > that the author did not care. Note that newline is not strictly
> > required at the moment. The next message is concatenated only when
> > pr_cont() is used from the same process.
>
> Thinks.... (smoke comes out of ears...):
> If the 'trace entry' contained the pid and whether it was a pr_cont
> then the trace reader could merge continuation lines even if
> there was a small number of interleaved other traces.
>
> So anything reading continuously might break a continuation
> (as might happen if there is a trace from an ISR).
> But the output from dmesg and /var/log/messages will
> almost always be correct.
>
> This moves all the complexity away from the trace writing code.
Yeah, this was the original plan. Unfortunately, it would require
changes on the reader side and it would break existing readers (userspace),
see
https://lore.kernel.org/lkml/20200811160551.GC12903@alley/
And it is not acceptable, see
https://lore.kernel.org/lkml/CAHk-=wivdy6-i=iqJ1ZG9YrRzaS0_LHHEPwb9KJg-S8i-Wm30w@mail.gmail.com/
Best Regards,
Petr