This is the combined repost of the following two patches with only
minor updates - warning message and comments added as suggested by
Andrew. I think it'd be best to route the changes through -mm.
[1] [PATCHSET] printk: implement extended console support
[2] [PATCHSET v3] netconsole: implement extended console support
The netconsole part has been reviewed by David[3].
This patchset updates netconsole so that it can emit messages with the
same header as used in /dev/kmsg which gives neconsole receiver full
log information which enables things like structured logging and
detection of lost messages.
Please see the head messages of the original postings for more
detailed info.
0001-printk-guard-the-amount-written-per-line-by-devkmsg_.patch
0002-printk-factor-out-message-formatting-from-devkmsg_re.patch
0003-printk-implement-support-for-extended-console-driver.patch
0004-netconsole-remove-unnecessary-netconsole_target_get-.patch
0005-netconsole-make-netconsole_target-enabled-a-bool.patch
0006-netconsole-make-all-dynamic-netconsoles-share-a-mute.patch
0007-netconsole-implement-extended-console-support.patch
0001-0003 implement extended console support in printk.
0004-0007 update netconsole to support extended console.
The patchset is also available in the following git branch.
git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc.git review-netconsole-ext-console
diffstat follows. Thanks.
Documentation/ABI/testing/dev-kmsg | 9 +
Documentation/networking/netconsole.txt | 35 +++++
drivers/net/netconsole.c | 169 ++++++++++++++++++++++---
include/linux/console.h | 1
include/linux/printk.h | 2
kernel/printk/printk.c | 214 +++++++++++++++++++++-----------
6 files changed, 341 insertions(+), 89 deletions(-)
--
tejun
[1] http://lkml.kernel.org/g/[email protected]
[2] http://lkml.kernel.org/g/[email protected]
[3] http://lkml.kernel.org/g/[email protected]
devkmsg_read() uses 8k buffer and assumes that the formatted output
message won't overrun which seems safe given LOG_LINE_MAX, the current
use of dict and the escaping method being used; however, we're
planning to use devkmsg formatting wider and accounting for the buffer
size properly isn't that complicated.
This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates
devkmsg_read() so that it limits output accordingly.
Signed-off-by: Tejun Heo <[email protected]>
Cc: Kay Sievers <[email protected]>
Cc: Petr Mladek <[email protected]>
---
include/linux/printk.h | 2 ++
kernel/printk/printk.c | 34 ++++++++++++++++++++++------------
2 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9b30871..58b1fec 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer)
return buffer;
}
+#define CONSOLE_EXT_LOG_MAX 8192
+
/* printk's without a loglevel use this.. */
#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c099b08..a115490 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -505,6 +505,11 @@ int check_syslog_permissions(int type, bool from_file)
return security_syslog(type);
}
+static void append_char(char **pp, char *e, char c)
+{
+ if (*pp < e)
+ *(*pp)++ = c;
+}
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
@@ -512,7 +517,7 @@ struct devkmsg_user {
u32 idx;
enum log_flags prev;
struct mutex lock;
- char buf[8192];
+ char buf[CONSOLE_EXT_LOG_MAX];
};
static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
@@ -570,6 +575,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
{
struct devkmsg_user *user = file->private_data;
struct printk_log *msg;
+ char *p, *e;
u64 ts_usec;
size_t i;
char cont = '-';
@@ -579,6 +585,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (!user)
return -EBADF;
+ p = user->buf;
+ e = user->buf + sizeof(user->buf);
+
ret = mutex_lock_interruptible(&user->lock);
if (ret)
return ret;
@@ -625,9 +634,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
cont = '+';
- len = sprintf(user->buf, "%u,%llu,%llu,%c;",
- (msg->facility << 3) | msg->level,
- user->seq, ts_usec, cont);
+ p += scnprintf(p, e - p, "%u,%llu,%llu,%c;",
+ (msg->facility << 3) | msg->level,
+ user->seq, ts_usec, cont);
user->prev = msg->flags;
/* escape non-printable characters */
@@ -635,11 +644,11 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
unsigned char c = log_text(msg)[i];
if (c < ' ' || c >= 127 || c == '\\')
- len += sprintf(user->buf + len, "\\x%02x", c);
+ p += scnprintf(p, e - p, "\\x%02x", c);
else
- user->buf[len++] = c;
+ append_char(&p, e, c);
}
- user->buf[len++] = '\n';
+ append_char(&p, e, '\n');
if (msg->dict_len) {
bool line = true;
@@ -648,30 +657,31 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
unsigned char c = log_dict(msg)[i];
if (line) {
- user->buf[len++] = ' ';
+ append_char(&p, e, ' ');
line = false;
}
if (c == '\0') {
- user->buf[len++] = '\n';
+ append_char(&p, e, '\n');
line = true;
continue;
}
if (c < ' ' || c >= 127 || c == '\\') {
- len += sprintf(user->buf + len, "\\x%02x", c);
+ p += scnprintf(p, e - p, "\\x%02x", c);
continue;
}
- user->buf[len++] = c;
+ append_char(&p, e, c);
}
- user->buf[len++] = '\n';
+ append_char(&p, e, '\n');
}
user->idx = log_next(user->idx);
user->seq++;
raw_spin_unlock_irq(&logbuf_lock);
+ len = p - user->buf;
if (len > count) {
ret = -EINVAL;
goto out;
--
2.1.0
The extended message formatting used for /dev/kmsg will be used
implement extended consoles. Factor out msg_print_ext_header() and
msg_print_ext_body() from devkmsg_read().
This is pure restructuring.
Signed-off-by: Tejun Heo <[email protected]>
Cc: Kay Sievers <[email protected]>
Cc: Petr Mladek <[email protected]>
---
kernel/printk/printk.c | 146 +++++++++++++++++++++++++++----------------------
1 file changed, 80 insertions(+), 66 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a115490..51ce4f1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -511,6 +511,81 @@ static void append_char(char **pp, char *e, char c)
*(*pp)++ = c;
}
+static ssize_t msg_print_ext_header(char *buf, size_t size,
+ struct printk_log *msg, u64 seq,
+ enum log_flags prev_flags)
+{
+ u64 ts_usec = msg->ts_nsec;
+ char cont = '-';
+
+ do_div(ts_usec, 1000);
+
+ /*
+ * If we couldn't merge continuation line fragments during the print,
+ * export the stored flags to allow an optional external merge of the
+ * records. Merging the records isn't always neccessarily correct, like
+ * when we hit a race during printing. In most cases though, it produces
+ * better readable output. 'c' in the record flags mark the first
+ * fragment of a line, '+' the following.
+ */
+ if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT))
+ cont = 'c';
+ else if ((msg->flags & LOG_CONT) ||
+ ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
+ cont = '+';
+
+ return scnprintf(buf, size, "%u,%llu,%llu,%c;",
+ (msg->facility << 3) | msg->level, seq, ts_usec, cont);
+}
+
+static ssize_t msg_print_ext_body(char *buf, size_t size,
+ char *dict, size_t dict_len,
+ char *text, size_t text_len)
+{
+ char *p = buf, *e = buf + size;
+ size_t i;
+
+ /* escape non-printable characters */
+ for (i = 0; i < text_len; i++) {
+ unsigned char c = text[i];
+
+ if (c < ' ' || c >= 127 || c == '\\')
+ p += scnprintf(p, e - p, "\\x%02x", c);
+ else
+ append_char(&p, e, c);
+ }
+ append_char(&p, e, '\n');
+
+ if (dict_len) {
+ bool line = true;
+
+ for (i = 0; i < dict_len; i++) {
+ unsigned char c = dict[i];
+
+ if (line) {
+ append_char(&p, e, ' ');
+ line = false;
+ }
+
+ if (c == '\0') {
+ append_char(&p, e, '\n');
+ line = true;
+ continue;
+ }
+
+ if (c < ' ' || c >= 127 || c == '\\') {
+ p += scnprintf(p, e - p, "\\x%02x", c);
+ continue;
+ }
+
+ append_char(&p, e, c);
+ }
+ append_char(&p, e, '\n');
+ }
+
+ return p - buf;
+}
+
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
u64 seq;
@@ -575,19 +650,12 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
{
struct devkmsg_user *user = file->private_data;
struct printk_log *msg;
- char *p, *e;
- u64 ts_usec;
- size_t i;
- char cont = '-';
size_t len;
ssize_t ret;
if (!user)
return -EBADF;
- p = user->buf;
- e = user->buf + sizeof(user->buf);
-
ret = mutex_lock_interruptible(&user->lock);
if (ret)
return ret;
@@ -617,71 +685,17 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
}
msg = log_from_idx(user->idx);
- ts_usec = msg->ts_nsec;
- do_div(ts_usec, 1000);
+ len = msg_print_ext_header(user->buf, sizeof(user->buf),
+ msg, user->seq, user->prev);
+ len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
+ log_dict(msg), msg->dict_len,
+ log_text(msg), msg->text_len);
- /*
- * If we couldn't merge continuation line fragments during the print,
- * export the stored flags to allow an optional external merge of the
- * records. Merging the records isn't always neccessarily correct, like
- * when we hit a race during printing. In most cases though, it produces
- * better readable output. 'c' in the record flags mark the first
- * fragment of a line, '+' the following.
- */
- if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT))
- cont = 'c';
- else if ((msg->flags & LOG_CONT) ||
- ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
- cont = '+';
-
- p += scnprintf(p, e - p, "%u,%llu,%llu,%c;",
- (msg->facility << 3) | msg->level,
- user->seq, ts_usec, cont);
user->prev = msg->flags;
-
- /* escape non-printable characters */
- for (i = 0; i < msg->text_len; i++) {
- unsigned char c = log_text(msg)[i];
-
- if (c < ' ' || c >= 127 || c == '\\')
- p += scnprintf(p, e - p, "\\x%02x", c);
- else
- append_char(&p, e, c);
- }
- append_char(&p, e, '\n');
-
- if (msg->dict_len) {
- bool line = true;
-
- for (i = 0; i < msg->dict_len; i++) {
- unsigned char c = log_dict(msg)[i];
-
- if (line) {
- append_char(&p, e, ' ');
- line = false;
- }
-
- if (c == '\0') {
- append_char(&p, e, '\n');
- line = true;
- continue;
- }
-
- if (c < ' ' || c >= 127 || c == '\\') {
- p += scnprintf(p, e - p, "\\x%02x", c);
- continue;
- }
-
- append_char(&p, e, c);
- }
- append_char(&p, e, '\n');
- }
-
user->idx = log_next(user->idx);
user->seq++;
raw_spin_unlock_irq(&logbuf_lock);
- len = p - user->buf;
if (len > count) {
ret = -EINVAL;
goto out;
--
2.1.0
printk log_buf keeps various metadata for each message including its
sequence number and timestamp. The metadata is currently available
only through /dev/kmsg and stripped out before passed onto console
drivers. We want this metadata to be available to console drivers too
so that console consumers can get full information including the
metadata and dictionary, which among other things can be used to
detect whether messages got lost in transit.
This patch implements support for extended console drivers. Consoles
can indicate that they want extended messages by setting the new
CON_EXTENDED flag and they'll be fed messages formatted the same way
as /dev/kmsg.
"<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n"
If extended consoles exist, in-kernel fragment assembly is disabled.
This ensures that all messages emitted to consoles have full metadata
including sequence number. The contflag carries enough information to
reassemble the fragments from the reader side trivially. Note that
this only affects /dev/kmsg. Regular console and /proc/kmsg outputs
are not affected by this change.
* Extended message formatting for console drivers is enabled iff there
are registered extended consoles.
* Comment describing /dev/kmsg message format updated to add missing
contflag field and help distinguishing variable from verbatim terms.
v3: Added pr_info() to inform the user to expect more fragments when
ext console is enabled. Also, Documentation/ABI/testing/dev-kmsg
is updated accordingly as suggested by Andrew.
v2: Disable in-kernel cont buffer if extended consoles are present
which simplifies things quite a bit. Hinted by Petr.
Signed-off-by: Tejun Heo <[email protected]>
Cc: Kay Sievers <[email protected]>
Cc: Petr Mladek <[email protected]>
Cc: Andrew Morton <[email protected]>
---
Documentation/ABI/testing/dev-kmsg | 9 ++++++
include/linux/console.h | 1 +
kernel/printk/printk.c | 66 +++++++++++++++++++++++++++++++++-----
3 files changed, 68 insertions(+), 8 deletions(-)
diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg
index bb820be..fff817e 100644
--- a/Documentation/ABI/testing/dev-kmsg
+++ b/Documentation/ABI/testing/dev-kmsg
@@ -98,4 +98,13 @@ Description: The /dev/kmsg character device node provides userspace access
logic is used internally when messages are printed to the
console, /proc/kmsg or the syslog() syscall.
+ By default, kernel tries to avoid fragments by concatenating
+ when it can and fragments are rare; however, when extended
+ console support is enabled, the in-kernel concatenation is
+ disabled and /dev/kmsg output will contain more fragments. If
+ the log consumer performs concatenation, the end result
+ should be the same. In the future, the in-kernel concatenation
+ may be removed entirely and /dev/kmsg users are recommended to
+ implement fragment handling.
+
Users: dmesg(1), userspace kernel log consumers
diff --git a/include/linux/console.h b/include/linux/console.h
index 9f50fb4..bd19434 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -115,6 +115,7 @@ static inline int con_debug_leave(void)
#define CON_BOOT (8)
#define CON_ANYTIME (16) /* Safe to call when cpu is offline */
#define CON_BRL (32) /* Used for a braille device */
+#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
struct console {
char name[16];
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 51ce4f1..ae980dc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -85,6 +85,18 @@ static struct lockdep_map console_lock_dep_map = {
#endif
/*
+ * Number of registered extended console drivers.
+ *
+ * If extended consoles are present, in-kernel cont reassembly is disabled
+ * and each fragment is stored as a separate log entry with proper
+ * continuation flag so that every emitted message has full metadata. This
+ * doesn't change the result for regular consoles or /proc/kmsg. For
+ * /dev/kmsg, as long as the reader concatenates messages according to
+ * consecutive continuation flags, the end result should be the same too.
+ */
+static int nr_ext_console_drivers;
+
+/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
*/
@@ -195,7 +207,7 @@ static int console_may_schedule;
* need to be changed in the future, when the requirements change.
*
* /dev/kmsg exports the structured data in the following line format:
- * "level,sequnum,timestamp;<message text>\n"
+ * "<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n"
*
* The optional key/value pairs are attached as continuation lines starting
* with a space character and terminated by a newline. All possible
@@ -1417,7 +1429,9 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
* log_buf[start] to log_buf[end - 1].
* The console_lock must be held.
*/
-static void call_console_drivers(int level, const char *text, size_t len)
+static void call_console_drivers(int level,
+ const char *ext_text, size_t ext_len,
+ const char *text, size_t len)
{
struct console *con;
@@ -1438,7 +1452,10 @@ static void call_console_drivers(int level, const char *text, size_t len)
if (!cpu_online(smp_processor_id()) &&
!(con->flags & CON_ANYTIME))
continue;
- con->write(con, text, len);
+ if (con->flags & CON_EXTENDED)
+ con->write(con, ext_text, ext_len);
+ else
+ con->write(con, text, len);
}
}
@@ -1581,8 +1598,12 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
if (cont.len && cont.flushed)
return false;
- if (cont.len + len > sizeof(cont.buf)) {
- /* the line gets too long, split it up in separate records */
+ /*
+ * If ext consoles are present, flush and skip in-kernel
+ * continuation. See nr_ext_console_drivers definition. Also, if
+ * the line gets too long, split it up in separate records.
+ */
+ if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
cont_flush(LOG_CONT);
return false;
}
@@ -1917,9 +1938,19 @@ static struct cont {
u8 level;
bool flushed:1;
} cont;
+static char *log_text(const struct printk_log *msg) { return NULL; }
+static char *log_dict(const struct printk_log *msg) { return NULL; }
static struct printk_log *log_from_idx(u32 idx) { return NULL; }
static u32 log_next(u32 idx) { return 0; }
-static void call_console_drivers(int level, const char *text, size_t len) {}
+static ssize_t msg_print_ext_header(char *buf, size_t size,
+ struct printk_log *msg, u64 seq,
+ enum log_flags prev_flags) { return 0; }
+static ssize_t msg_print_ext_body(char *buf, size_t size,
+ char *dict, size_t dict_len,
+ char *text, size_t text_len) { return 0; }
+static void call_console_drivers(int level,
+ const char *ext_text, size_t ext_len,
+ const char *text, size_t len) {}
static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
bool syslog, char *buf, size_t size) { return 0; }
static size_t cont_print_text(char *text, size_t size) { return 0; }
@@ -2172,7 +2203,7 @@ static void console_cont_flush(char *text, size_t size)
len = cont_print_text(text, size);
raw_spin_unlock(&logbuf_lock);
stop_critical_timings();
- call_console_drivers(cont.level, text, len);
+ call_console_drivers(cont.level, NULL, 0, text, len);
start_critical_timings();
local_irq_restore(flags);
return;
@@ -2196,6 +2227,7 @@ out:
*/
void console_unlock(void)
{
+ static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[LOG_LINE_MAX + PREFIX_MAX];
static u64 seen_seq;
unsigned long flags;
@@ -2214,6 +2246,7 @@ void console_unlock(void)
again:
for (;;) {
struct printk_log *msg;
+ size_t ext_len = 0;
size_t len;
int level;
@@ -2259,13 +2292,22 @@ skip:
level = msg->level;
len += msg_print_text(msg, console_prev, false,
text + len, sizeof(text) - len);
+ if (nr_ext_console_drivers) {
+ ext_len = msg_print_ext_header(ext_text,
+ sizeof(ext_text),
+ msg, console_seq, console_prev);
+ ext_len += msg_print_ext_body(ext_text + ext_len,
+ sizeof(ext_text) - ext_len,
+ log_dict(msg), msg->dict_len,
+ log_text(msg), msg->text_len);
+ }
console_idx = log_next(console_idx);
console_seq++;
console_prev = msg->flags;
raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
- call_console_drivers(level, text, len);
+ call_console_drivers(level, ext_text, ext_len, text, len);
start_critical_timings();
local_irq_restore(flags);
}
@@ -2521,6 +2563,11 @@ void register_console(struct console *newcon)
newcon->next = console_drivers->next;
console_drivers->next = newcon;
}
+
+ if (newcon->flags & CON_EXTENDED)
+ if (!nr_ext_console_drivers++)
+ pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
+
if (newcon->flags & CON_PRINTBUFFER) {
/*
* console_unlock(); will print out the buffered messages
@@ -2593,6 +2640,9 @@ int unregister_console(struct console *console)
}
}
+ if (!res && (console->flags & CON_EXTENDED))
+ nr_ext_console_drivers--;
+
/*
* If this isn't the last console and it has CON_CONSDEV set, we
* need to set it on the next preferred console.
--
2.1.0
write_msg() grabs target_list_lock and walks target_list invoking
netpool_send_udp() on each target. Curiously, it protects each
iteration with netconsole_target_get/put() even though it never
releases target_list_lock which protects all the members.
While this doesn't harm anything, it doesn't serve any purpose either.
The items on the list can't go away while target_list_lock is held.
Remove the unnecessary get/put pair.
Signed-off-by: Tejun Heo <[email protected]>
Cc: David Miller <[email protected]>
---
drivers/net/netconsole.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 15731d1..30c0524 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -744,7 +744,6 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
spin_lock_irqsave(&target_list_lock, flags);
list_for_each_entry(nt, &target_list, list) {
- netconsole_target_get(nt);
if (nt->enabled && netif_running(nt->np.dev)) {
/*
* We nest this inside the for-each-target loop above
@@ -760,7 +759,6 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
left -= frag;
}
}
- netconsole_target_put(nt);
}
spin_unlock_irqrestore(&target_list_lock, flags);
}
--
2.1.0
netconsole uses both bool and int for boolean values. Let's convert
nt->enabled to bool for consistency.
Signed-off-by: Tejun Heo <[email protected]>
Cc: David Miller <[email protected]>
---
drivers/net/netconsole.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 30c0524..8dd1e55 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -104,7 +104,7 @@ struct netconsole_target {
#ifdef CONFIG_NETCONSOLE_DYNAMIC
struct config_item item;
#endif
- int enabled;
+ bool enabled;
struct mutex mutex;
struct netpoll np;
};
@@ -197,7 +197,7 @@ static struct netconsole_target *alloc_param_target(char *target_config)
if (err)
goto fail;
- nt->enabled = 1;
+ nt->enabled = true;
return nt;
@@ -322,13 +322,13 @@ static ssize_t store_enabled(struct netconsole_target *nt,
return err;
if (enabled < 0 || enabled > 1)
return -EINVAL;
- if (enabled == nt->enabled) {
+ if ((bool)enabled == nt->enabled) {
pr_info("network logging has already %s\n",
nt->enabled ? "started" : "stopped");
return -EINVAL;
}
- if (enabled) { /* 1 */
+ if (enabled) { /* true */
/*
* Skip netpoll_parse_options() -- all the attributes are
* already configured via configfs. Just print them out.
@@ -340,13 +340,13 @@ static ssize_t store_enabled(struct netconsole_target *nt,
return err;
pr_info("netconsole: network logging started\n");
- } else { /* 0 */
+ } else { /* false */
/* We need to disable the netconsole before cleaning it up
* otherwise we might end up in write_msg() with
- * nt->np.dev == NULL and nt->enabled == 1
+ * nt->np.dev == NULL and nt->enabled == true
*/
spin_lock_irqsave(&target_list_lock, flags);
- nt->enabled = 0;
+ nt->enabled = false;
spin_unlock_irqrestore(&target_list_lock, flags);
netpoll_cleanup(&nt->np);
}
@@ -594,7 +594,7 @@ static struct config_item *make_netconsole_target(struct config_group *group,
/*
* Allocate and initialize with defaults.
- * Target is disabled at creation (enabled == 0).
+ * Target is disabled at creation (!enabled).
*/
nt = kzalloc(sizeof(*nt), GFP_KERNEL);
if (!nt)
@@ -695,7 +695,7 @@ restart:
spin_lock_irqsave(&target_list_lock, flags);
dev_put(nt->np.dev);
nt->np.dev = NULL;
- nt->enabled = 0;
+ nt->enabled = false;
stopped = true;
netconsole_target_put(nt);
goto restart;
--
2.1.0
Currently, each dynamic netconsole_target uses its own separate mutex
to synchronize the configuration operations. This patch replaces the
per-netconsole_target mutexes with a single mutex -
dynamic_netconsole_mutex. The reduced granularity doesn't hurt
anything, the code is minutely simpler and this'd allow adding
operations which should be synchronized across all dynamic
netconsoles.
Signed-off-by: Tejun Heo <[email protected]>
Cc: David Miller <[email protected]>
---
drivers/net/netconsole.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 8dd1e55..9b0c81e 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -105,13 +105,13 @@ struct netconsole_target {
struct config_item item;
#endif
bool enabled;
- struct mutex mutex;
struct netpoll np;
};
#ifdef CONFIG_NETCONSOLE_DYNAMIC
static struct configfs_subsystem netconsole_subsys;
+static DEFINE_MUTEX(dynamic_netconsole_mutex);
static int __init dynamic_netconsole_init(void)
{
@@ -185,7 +185,6 @@ static struct netconsole_target *alloc_param_target(char *target_config)
strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ);
nt->np.local_port = 6665;
nt->np.remote_port = 6666;
- mutex_init(&nt->mutex);
eth_broadcast_addr(nt->np.remote_mac);
/* Parse parameters and setup netpoll */
@@ -562,10 +561,10 @@ static ssize_t netconsole_target_attr_store(struct config_item *item,
struct netconsole_target_attr *na =
container_of(attr, struct netconsole_target_attr, attr);
- mutex_lock(&nt->mutex);
+ mutex_lock(&dynamic_netconsole_mutex);
if (na->store)
ret = na->store(nt, buf, count);
- mutex_unlock(&nt->mutex);
+ mutex_unlock(&dynamic_netconsole_mutex);
return ret;
}
@@ -604,7 +603,6 @@ static struct config_item *make_netconsole_target(struct config_group *group,
strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ);
nt->np.local_port = 6665;
nt->np.remote_port = 6666;
- mutex_init(&nt->mutex);
eth_broadcast_addr(nt->np.remote_mac);
/* Initialize the config_item member */
--
2.1.0
printk logbuf keeps various metadata and optional key=value dictionary
for structured messages, both of which are stripped when messages are
handed to regular console drivers.
It can be useful to have this metadata and dictionary available to
netconsole consumers. This obviously makes logging via netconsole
more complete and the sequence number in particular is useful in
environments where messages may be lost or reordered in transit -
e.g. when netconsole is used to collect messages in a large cluster
where packets may have to travel congested hops to reach the
aggregator. The lost and reordered messages can easily be identified
and handled accordingly using the sequence numbers.
printk recently added extended console support which can be selected
by setting CON_EXTENDED flag. From console driver side, not much
changes. The only difference is that the text passed to the write
callback is formatted the same way as /dev/kmsg.
This patch implements extended console support for netconsole which
can be enabled by either prepending "+" to a netconsole boot param
entry or echoing 1 to "extended" file in configfs. When enabled,
netconsole transmits extended log messages with headers identical to
/dev/kmsg output.
There's one complication due to message fragments. netconsole limits
the maximum message size to 1k and messages longer than that are split
into multiple fragments. As all extended console messages should
carry matching headers and be uniquely identifiable, each extended
message fragment carries full copy of the metadata and an extra header
field to identify the specific fragment. The optional header is of
the form "ncfrag=OFF/LEN" where OFF is the byte offset into the
message body and LEN is the total length.
To avoid unnecessarily making printk format extended messages,
Extended netconsole is registered with printk when the first extended
netconsole is configured.
v5: Comment on static buf[] exclusion added in send_ext_msg_udp() as
suggested by Andrew Morton.
v4: send_ext_msg_udp() restructured.
v3: Tweaked documentation to make clarify that the example assumes a
lot smaller chunk size. Updated to apply on top of spurious
target get/put removal in write_msg().
v2: Dropped dynamic unregistration of extended console driver, which
added complexity while not being too beneficial given that most
netconsole configurations are static. ncfrag updated to use just
byte offset and message length.
Signed-off-by: Tejun Heo <[email protected]>
Cc: Tetsuo Handa <[email protected]>
Cc: David Miller <[email protected]>
Cc: Andrew Morton <[email protected]>
---
Documentation/networking/netconsole.txt | 35 +++++++-
drivers/net/netconsole.c | 141 +++++++++++++++++++++++++++++++-
2 files changed, 173 insertions(+), 3 deletions(-)
diff --git a/Documentation/networking/netconsole.txt b/Documentation/networking/netconsole.txt
index a5d574a..30409a3 100644
--- a/Documentation/networking/netconsole.txt
+++ b/Documentation/networking/netconsole.txt
@@ -2,6 +2,7 @@
started by Ingo Molnar <[email protected]>, 2001.09.17
2.6 port and netpoll api by Matt Mackall <[email protected]>, Sep 9 2003
IPv6 support by Cong Wang <[email protected]>, Jan 1 2013
+Extended console support by Tejun Heo <[email protected]>, May 1 2015
Please send bug reports to Matt Mackall <[email protected]>
Satyam Sharma <[email protected]>, and Cong Wang <[email protected]>
@@ -24,9 +25,10 @@ Sender and receiver configuration:
It takes a string configuration parameter "netconsole" in the
following format:
- netconsole=[src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+ netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
where
+ + if present, enable extended console support
src-port source for UDP packets (defaults to 6665)
src-ip source IP to use (interface address)
dev network interface (eth0)
@@ -107,6 +109,7 @@ To remove a target:
The interface exposes these parameters of a netconsole target to userspace:
enabled Is this target currently enabled? (read-write)
+ extended Extended mode enabled (read-write)
dev_name Local network interface name (read-write)
local_port Source UDP port to use (read-write)
remote_port Remote agent's UDP port (read-write)
@@ -132,6 +135,36 @@ You can also update the local interface dynamically. This is especially
useful if you want to use interfaces that have newly come up (and may not
have existed when netconsole was loaded / initialized).
+Extended console:
+=================
+
+If '+' is prefixed to the configuration line or "extended" config file
+is set to 1, extended console support is enabled. An example boot
+param follows.
+
+ linux [email protected]/eth1,[email protected]/12:34:56:78:9a:bc
+
+Log messages are transmitted with extended metadata header in the
+following format which is the same as /dev/kmsg.
+
+ <level>,<sequnum>,<timestamp>,<contflag>;<message text>
+
+Non printable characters in <message text> are escaped using "\xff"
+notation. If the message contains optional dictionary, verbatim
+newline is used as the delimeter.
+
+If a message doesn't fit in certain number of bytes (currently 1000),
+the message is split into multiple fragments by netconsole. These
+fragments are transmitted with "ncfrag" header field added.
+
+ ncfrag=<byte-offset>/<total-bytes>
+
+For example, assuming a lot smaller chunk size, a message "the first
+chunk, the 2nd chunk." may be split as follows.
+
+ 6,416,1758426,-,ncfrag=0/31;the first chunk,
+ 6,416,1758426,-,ncfrag=16/31; the 2nd chunk.
+
Miscellaneous notes:
====================
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 9b0c81e..97f3acd 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -79,6 +79,12 @@ static LIST_HEAD(target_list);
/* This needs to be a spinlock because write_msg() cannot sleep */
static DEFINE_SPINLOCK(target_list_lock);
+/*
+ * Console driver for extended netconsoles. Registered on the first use to
+ * avoid unnecessarily enabling ext message formatting.
+ */
+static struct console netconsole_ext;
+
/**
* struct netconsole_target - Represents a configured netconsole target.
* @list: Links this target into the target_list.
@@ -105,6 +111,7 @@ struct netconsole_target {
struct config_item item;
#endif
bool enabled;
+ bool extended;
struct netpoll np;
};
@@ -187,6 +194,11 @@ static struct netconsole_target *alloc_param_target(char *target_config)
nt->np.remote_port = 6666;
eth_broadcast_addr(nt->np.remote_mac);
+ if (*target_config == '+') {
+ nt->extended = true;
+ target_config++;
+ }
+
/* Parse parameters and setup netpoll */
err = netpoll_parse_options(&nt->np, target_config);
if (err)
@@ -257,6 +269,11 @@ static ssize_t show_enabled(struct netconsole_target *nt, char *buf)
return snprintf(buf, PAGE_SIZE, "%d\n", nt->enabled);
}
+static ssize_t show_extended(struct netconsole_target *nt, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", nt->extended);
+}
+
static ssize_t show_dev_name(struct netconsole_target *nt, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%s\n", nt->np.dev_name);
@@ -328,6 +345,11 @@ static ssize_t store_enabled(struct netconsole_target *nt,
}
if (enabled) { /* true */
+ if (nt->extended && !(netconsole_ext.flags & CON_ENABLED)) {
+ netconsole_ext.flags |= CON_ENABLED;
+ register_console(&netconsole_ext);
+ }
+
/*
* Skip netpoll_parse_options() -- all the attributes are
* already configured via configfs. Just print them out.
@@ -355,6 +377,30 @@ static ssize_t store_enabled(struct netconsole_target *nt,
return strnlen(buf, count);
}
+static ssize_t store_extended(struct netconsole_target *nt,
+ const char *buf,
+ size_t count)
+{
+ int extended;
+ int err;
+
+ if (nt->enabled) {
+ pr_err("target (%s) is enabled, disable to update parameters\n",
+ config_item_name(&nt->item));
+ return -EINVAL;
+ }
+
+ err = kstrtoint(buf, 10, &extended);
+ if (err < 0)
+ return err;
+ if (extended < 0 || extended > 1)
+ return -EINVAL;
+
+ nt->extended = extended;
+
+ return strnlen(buf, count);
+}
+
static ssize_t store_dev_name(struct netconsole_target *nt,
const char *buf,
size_t count)
@@ -507,6 +553,7 @@ static struct netconsole_target_attr netconsole_target_##_name = \
__CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, show_##_name, store_##_name)
NETCONSOLE_TARGET_ATTR_RW(enabled);
+NETCONSOLE_TARGET_ATTR_RW(extended);
NETCONSOLE_TARGET_ATTR_RW(dev_name);
NETCONSOLE_TARGET_ATTR_RW(local_port);
NETCONSOLE_TARGET_ATTR_RW(remote_port);
@@ -517,6 +564,7 @@ NETCONSOLE_TARGET_ATTR_RW(remote_mac);
static struct configfs_attribute *netconsole_target_attrs[] = {
&netconsole_target_enabled.attr,
+ &netconsole_target_extended.attr,
&netconsole_target_dev_name.attr,
&netconsole_target_local_port.attr,
&netconsole_target_remote_port.attr,
@@ -727,6 +775,82 @@ static struct notifier_block netconsole_netdev_notifier = {
.notifier_call = netconsole_netdev_event,
};
+/**
+ * send_ext_msg_udp - send extended log message to target
+ * @nt: target to send message to
+ * @msg: extended log message to send
+ * @msg_len: length of message
+ *
+ * Transfer extended log @msg to @nt. If @msg is longer than
+ * MAX_PRINT_CHUNK, it'll be split and transmitted in multiple chunks with
+ * ncfrag header field added to identify them.
+ */
+static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg,
+ int msg_len)
+{
+ static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */
+ const char *header, *body;
+ int offset = 0;
+ int header_len, body_len;
+
+ if (msg_len <= MAX_PRINT_CHUNK) {
+ netpoll_send_udp(&nt->np, msg, msg_len);
+ return;
+ }
+
+ /* need to insert extra header fields, detect header and body */
+ header = msg;
+ body = memchr(msg, ';', msg_len);
+ if (WARN_ON_ONCE(!body))
+ return;
+
+ header_len = body - header;
+ body_len = msg_len - header_len - 1;
+ body++;
+
+ /*
+ * Transfer multiple chunks with the following extra header.
+ * "ncfrag=<byte-offset>/<total-bytes>"
+ */
+ memcpy(buf, header, header_len);
+
+ while (offset < body_len) {
+ int this_header = header_len;
+ int this_chunk;
+
+ this_header += scnprintf(buf + this_header,
+ sizeof(buf) - this_header,
+ ",ncfrag=%d/%d;", offset, body_len);
+
+ this_chunk = min(body_len - offset,
+ MAX_PRINT_CHUNK - this_header);
+ if (WARN_ON_ONCE(this_chunk <= 0))
+ return;
+
+ memcpy(buf + this_header, body + offset, this_chunk);
+
+ netpoll_send_udp(&nt->np, buf, this_header + this_chunk);
+
+ offset += this_chunk;
+ }
+}
+
+static void write_ext_msg(struct console *con, const char *msg,
+ unsigned int len)
+{
+ struct netconsole_target *nt;
+ unsigned long flags;
+
+ if ((oops_only && !oops_in_progress) || list_empty(&target_list))
+ return;
+
+ spin_lock_irqsave(&target_list_lock, flags);
+ list_for_each_entry(nt, &target_list, list)
+ if (nt->extended && nt->enabled && netif_running(nt->np.dev))
+ send_ext_msg_udp(nt, msg, len);
+ spin_unlock_irqrestore(&target_list_lock, flags);
+}
+
static void write_msg(struct console *con, const char *msg, unsigned int len)
{
int frag, left;
@@ -742,7 +866,7 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
spin_lock_irqsave(&target_list_lock, flags);
list_for_each_entry(nt, &target_list, list) {
- if (nt->enabled && netif_running(nt->np.dev)) {
+ if (!nt->extended && nt->enabled && netif_running(nt->np.dev)) {
/*
* We nest this inside the for-each-target loop above
* so that we're able to get as much logging out to
@@ -761,6 +885,12 @@ static void write_msg(struct console *con, const char *msg, unsigned int len)
spin_unlock_irqrestore(&target_list_lock, flags);
}
+static struct console netconsole_ext = {
+ .name = "netcon_ext",
+ .flags = CON_EXTENDED, /* starts disabled, registered on first use */
+ .write = write_ext_msg,
+};
+
static struct console netconsole = {
.name = "netcon",
.flags = CON_ENABLED,
@@ -783,7 +913,11 @@ static int __init init_netconsole(void)
goto fail;
}
/* Dump existing printks when we register */
- netconsole.flags |= CON_PRINTBUFFER;
+ if (nt->extended)
+ netconsole_ext.flags |= CON_PRINTBUFFER |
+ CON_ENABLED;
+ else
+ netconsole.flags |= CON_PRINTBUFFER;
spin_lock_irqsave(&target_list_lock, flags);
list_add(&nt->list, &target_list);
@@ -799,6 +933,8 @@ static int __init init_netconsole(void)
if (err)
goto undonotifier;
+ if (netconsole_ext.flags & CON_ENABLED)
+ register_console(&netconsole_ext);
register_console(&netconsole);
pr_info("network logging started\n");
@@ -827,6 +963,7 @@ static void __exit cleanup_netconsole(void)
{
struct netconsole_target *nt, *tmp;
+ unregister_console(&netconsole_ext);
unregister_console(&netconsole);
dynamic_netconsole_exit();
unregister_netdevice_notifier(&netconsole_netdev_notifier);
--
2.1.0
On Thu 2015-05-14 12:06:20, Tejun Heo wrote:
> devkmsg_read() uses 8k buffer and assumes that the formatted output
> message won't overrun which seems safe given LOG_LINE_MAX, the current
> use of dict and the escaping method being used; however, we're
> planning to use devkmsg formatting wider and accounting for the buffer
> size properly isn't that complicated.
>
> This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates
> devkmsg_read() so that it limits output accordingly.
>
> Signed-off-by: Tejun Heo <[email protected]>
Reviewed-by: Petr Mladek <[email protected]>
It is more secure now.
Best Regards,
Petr
> Cc: Kay Sievers <[email protected]>
> Cc: Petr Mladek <[email protected]>
> ---
> include/linux/printk.h | 2 ++
> kernel/printk/printk.c | 34 ++++++++++++++++++++++------------
> 2 files changed, 24 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/printk.h b/include/linux/printk.h
> index 9b30871..58b1fec 100644
> --- a/include/linux/printk.h
> +++ b/include/linux/printk.h
> @@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer)
> return buffer;
> }
>
> +#define CONSOLE_EXT_LOG_MAX 8192
> +
> /* printk's without a loglevel use this.. */
> #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index c099b08..a115490 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -505,6 +505,11 @@ int check_syslog_permissions(int type, bool from_file)
> return security_syslog(type);
> }
>
> +static void append_char(char **pp, char *e, char c)
> +{
> + if (*pp < e)
> + *(*pp)++ = c;
> +}
>
> /* /dev/kmsg - userspace message inject/listen interface */
> struct devkmsg_user {
> @@ -512,7 +517,7 @@ struct devkmsg_user {
> u32 idx;
> enum log_flags prev;
> struct mutex lock;
> - char buf[8192];
> + char buf[CONSOLE_EXT_LOG_MAX];
> };
>
> static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
> @@ -570,6 +575,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> {
> struct devkmsg_user *user = file->private_data;
> struct printk_log *msg;
> + char *p, *e;
> u64 ts_usec;
> size_t i;
> char cont = '-';
> @@ -579,6 +585,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> if (!user)
> return -EBADF;
>
> + p = user->buf;
> + e = user->buf + sizeof(user->buf);
> +
> ret = mutex_lock_interruptible(&user->lock);
> if (ret)
> return ret;
> @@ -625,9 +634,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
> cont = '+';
>
> - len = sprintf(user->buf, "%u,%llu,%llu,%c;",
> - (msg->facility << 3) | msg->level,
> - user->seq, ts_usec, cont);
> + p += scnprintf(p, e - p, "%u,%llu,%llu,%c;",
> + (msg->facility << 3) | msg->level,
> + user->seq, ts_usec, cont);
> user->prev = msg->flags;
>
> /* escape non-printable characters */
> @@ -635,11 +644,11 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> unsigned char c = log_text(msg)[i];
>
> if (c < ' ' || c >= 127 || c == '\\')
> - len += sprintf(user->buf + len, "\\x%02x", c);
> + p += scnprintf(p, e - p, "\\x%02x", c);
> else
> - user->buf[len++] = c;
> + append_char(&p, e, c);
> }
> - user->buf[len++] = '\n';
> + append_char(&p, e, '\n');
>
> if (msg->dict_len) {
> bool line = true;
> @@ -648,30 +657,31 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> unsigned char c = log_dict(msg)[i];
>
> if (line) {
> - user->buf[len++] = ' ';
> + append_char(&p, e, ' ');
> line = false;
> }
>
> if (c == '\0') {
> - user->buf[len++] = '\n';
> + append_char(&p, e, '\n');
> line = true;
> continue;
> }
>
> if (c < ' ' || c >= 127 || c == '\\') {
> - len += sprintf(user->buf + len, "\\x%02x", c);
> + p += scnprintf(p, e - p, "\\x%02x", c);
> continue;
> }
>
> - user->buf[len++] = c;
> + append_char(&p, e, c);
> }
> - user->buf[len++] = '\n';
> + append_char(&p, e, '\n');
> }
>
> user->idx = log_next(user->idx);
> user->seq++;
> raw_spin_unlock_irq(&logbuf_lock);
>
> + len = p - user->buf;
> if (len > count) {
> ret = -EINVAL;
> goto out;
> --
> 2.1.0
>
On Thu 2015-05-14 12:06:21, Tejun Heo wrote:
> The extended message formatting used for /dev/kmsg will be used
> implement extended consoles. Factor out msg_print_ext_header() and
> msg_print_ext_body() from devkmsg_read().
>
> This is pure restructuring.
>
> Signed-off-by: Tejun Heo <[email protected]>
Reviewed-by: Petr Mladek <[email protected]>
I like the split of the long function.
Best Regards,
Petr
> Cc: Kay Sievers <[email protected]>
> Cc: Petr Mladek <[email protected]>
> ---
> kernel/printk/printk.c | 146 +++++++++++++++++++++++++++----------------------
> 1 file changed, 80 insertions(+), 66 deletions(-)
>
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index a115490..51ce4f1 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -511,6 +511,81 @@ static void append_char(char **pp, char *e, char c)
> *(*pp)++ = c;
> }
>
> +static ssize_t msg_print_ext_header(char *buf, size_t size,
> + struct printk_log *msg, u64 seq,
> + enum log_flags prev_flags)
> +{
> + u64 ts_usec = msg->ts_nsec;
> + char cont = '-';
> +
> + do_div(ts_usec, 1000);
> +
> + /*
> + * If we couldn't merge continuation line fragments during the print,
> + * export the stored flags to allow an optional external merge of the
> + * records. Merging the records isn't always neccessarily correct, like
> + * when we hit a race during printing. In most cases though, it produces
> + * better readable output. 'c' in the record flags mark the first
> + * fragment of a line, '+' the following.
> + */
> + if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT))
> + cont = 'c';
> + else if ((msg->flags & LOG_CONT) ||
> + ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
> + cont = '+';
> +
> + return scnprintf(buf, size, "%u,%llu,%llu,%c;",
> + (msg->facility << 3) | msg->level, seq, ts_usec, cont);
> +}
> +
> +static ssize_t msg_print_ext_body(char *buf, size_t size,
> + char *dict, size_t dict_len,
> + char *text, size_t text_len)
> +{
> + char *p = buf, *e = buf + size;
> + size_t i;
> +
> + /* escape non-printable characters */
> + for (i = 0; i < text_len; i++) {
> + unsigned char c = text[i];
> +
> + if (c < ' ' || c >= 127 || c == '\\')
> + p += scnprintf(p, e - p, "\\x%02x", c);
> + else
> + append_char(&p, e, c);
> + }
> + append_char(&p, e, '\n');
> +
> + if (dict_len) {
> + bool line = true;
> +
> + for (i = 0; i < dict_len; i++) {
> + unsigned char c = dict[i];
> +
> + if (line) {
> + append_char(&p, e, ' ');
> + line = false;
> + }
> +
> + if (c == '\0') {
> + append_char(&p, e, '\n');
> + line = true;
> + continue;
> + }
> +
> + if (c < ' ' || c >= 127 || c == '\\') {
> + p += scnprintf(p, e - p, "\\x%02x", c);
> + continue;
> + }
> +
> + append_char(&p, e, c);
> + }
> + append_char(&p, e, '\n');
> + }
> +
> + return p - buf;
> +}
> +
> /* /dev/kmsg - userspace message inject/listen interface */
> struct devkmsg_user {
> u64 seq;
> @@ -575,19 +650,12 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> {
> struct devkmsg_user *user = file->private_data;
> struct printk_log *msg;
> - char *p, *e;
> - u64 ts_usec;
> - size_t i;
> - char cont = '-';
> size_t len;
> ssize_t ret;
>
> if (!user)
> return -EBADF;
>
> - p = user->buf;
> - e = user->buf + sizeof(user->buf);
> -
> ret = mutex_lock_interruptible(&user->lock);
> if (ret)
> return ret;
> @@ -617,71 +685,17 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
> }
>
> msg = log_from_idx(user->idx);
> - ts_usec = msg->ts_nsec;
> - do_div(ts_usec, 1000);
> + len = msg_print_ext_header(user->buf, sizeof(user->buf),
> + msg, user->seq, user->prev);
> + len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
> + log_dict(msg), msg->dict_len,
> + log_text(msg), msg->text_len);
>
> - /*
> - * If we couldn't merge continuation line fragments during the print,
> - * export the stored flags to allow an optional external merge of the
> - * records. Merging the records isn't always neccessarily correct, like
> - * when we hit a race during printing. In most cases though, it produces
> - * better readable output. 'c' in the record flags mark the first
> - * fragment of a line, '+' the following.
> - */
> - if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT))
> - cont = 'c';
> - else if ((msg->flags & LOG_CONT) ||
> - ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
> - cont = '+';
> -
> - p += scnprintf(p, e - p, "%u,%llu,%llu,%c;",
> - (msg->facility << 3) | msg->level,
> - user->seq, ts_usec, cont);
> user->prev = msg->flags;
> -
> - /* escape non-printable characters */
> - for (i = 0; i < msg->text_len; i++) {
> - unsigned char c = log_text(msg)[i];
> -
> - if (c < ' ' || c >= 127 || c == '\\')
> - p += scnprintf(p, e - p, "\\x%02x", c);
> - else
> - append_char(&p, e, c);
> - }
> - append_char(&p, e, '\n');
> -
> - if (msg->dict_len) {
> - bool line = true;
> -
> - for (i = 0; i < msg->dict_len; i++) {
> - unsigned char c = log_dict(msg)[i];
> -
> - if (line) {
> - append_char(&p, e, ' ');
> - line = false;
> - }
> -
> - if (c == '\0') {
> - append_char(&p, e, '\n');
> - line = true;
> - continue;
> - }
> -
> - if (c < ' ' || c >= 127 || c == '\\') {
> - p += scnprintf(p, e - p, "\\x%02x", c);
> - continue;
> - }
> -
> - append_char(&p, e, c);
> - }
> - append_char(&p, e, '\n');
> - }
> -
> user->idx = log_next(user->idx);
> user->seq++;
> raw_spin_unlock_irq(&logbuf_lock);
>
> - len = p - user->buf;
> if (len > count) {
> ret = -EINVAL;
> goto out;
> --
> 2.1.0
>
On Thu 2015-05-14 12:06:22, Tejun Heo wrote:
> printk log_buf keeps various metadata for each message including its
> sequence number and timestamp. The metadata is currently available
> only through /dev/kmsg and stripped out before passed onto console
> drivers. We want this metadata to be available to console drivers too
> so that console consumers can get full information including the
> metadata and dictionary, which among other things can be used to
> detect whether messages got lost in transit.
>
> This patch implements support for extended console drivers. Consoles
> can indicate that they want extended messages by setting the new
> CON_EXTENDED flag and they'll be fed messages formatted the same way
> as /dev/kmsg.
>
> "<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n"
>
> If extended consoles exist, in-kernel fragment assembly is disabled.
> This ensures that all messages emitted to consoles have full metadata
> including sequence number. The contflag carries enough information to
> reassemble the fragments from the reader side trivially. Note that
> this only affects /dev/kmsg. Regular console and /proc/kmsg outputs
> are not affected by this change.
>
> * Extended message formatting for console drivers is enabled iff there
> are registered extended consoles.
>
> * Comment describing /dev/kmsg message format updated to add missing
> contflag field and help distinguishing variable from verbatim terms.
>
> v3: Added pr_info() to inform the user to expect more fragments when
> ext console is enabled. Also, Documentation/ABI/testing/dev-kmsg
> is updated accordingly as suggested by Andrew.
>
> v2: Disable in-kernel cont buffer if extended consoles are present
> which simplifies things quite a bit. Hinted by Petr.
>
> Signed-off-by: Tejun Heo <[email protected]>
Reviewed-by: Petr Mladek <[email protected]>
It is much easier than the previous approach and pretty
straightforward. If there are any bugs in the LOG_CONT flag usage,
it will make them more visible but they should get fixed anyway.
Best Regards,
Petr
> Cc: Kay Sievers <[email protected]>
> Cc: Petr Mladek <[email protected]>
> Cc: Andrew Morton <[email protected]>
> ---
> Documentation/ABI/testing/dev-kmsg | 9 ++++++
> include/linux/console.h | 1 +
> kernel/printk/printk.c | 66 +++++++++++++++++++++++++++++++++-----
> 3 files changed, 68 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg
> index bb820be..fff817e 100644
> --- a/Documentation/ABI/testing/dev-kmsg
> +++ b/Documentation/ABI/testing/dev-kmsg
> @@ -98,4 +98,13 @@ Description: The /dev/kmsg character device node provides userspace access
> logic is used internally when messages are printed to the
> console, /proc/kmsg or the syslog() syscall.
>
> + By default, kernel tries to avoid fragments by concatenating
> + when it can and fragments are rare; however, when extended
> + console support is enabled, the in-kernel concatenation is
> + disabled and /dev/kmsg output will contain more fragments. If
> + the log consumer performs concatenation, the end result
> + should be the same. In the future, the in-kernel concatenation
> + may be removed entirely and /dev/kmsg users are recommended to
> + implement fragment handling.
> +
> Users: dmesg(1), userspace kernel log consumers
> diff --git a/include/linux/console.h b/include/linux/console.h
> index 9f50fb4..bd19434 100644
> --- a/include/linux/console.h
> +++ b/include/linux/console.h
> @@ -115,6 +115,7 @@ static inline int con_debug_leave(void)
> #define CON_BOOT (8)
> #define CON_ANYTIME (16) /* Safe to call when cpu is offline */
> #define CON_BRL (32) /* Used for a braille device */
> +#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
>
> struct console {
> char name[16];
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 51ce4f1..ae980dc 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -85,6 +85,18 @@ static struct lockdep_map console_lock_dep_map = {
> #endif
>
> /*
> + * Number of registered extended console drivers.
> + *
> + * If extended consoles are present, in-kernel cont reassembly is disabled
> + * and each fragment is stored as a separate log entry with proper
> + * continuation flag so that every emitted message has full metadata. This
> + * doesn't change the result for regular consoles or /proc/kmsg. For
> + * /dev/kmsg, as long as the reader concatenates messages according to
> + * consecutive continuation flags, the end result should be the same too.
> + */
> +static int nr_ext_console_drivers;
> +
> +/*
> * Helper macros to handle lockdep when locking/unlocking console_sem. We use
> * macros instead of functions so that _RET_IP_ contains useful information.
> */
> @@ -195,7 +207,7 @@ static int console_may_schedule;
> * need to be changed in the future, when the requirements change.
> *
> * /dev/kmsg exports the structured data in the following line format:
> - * "level,sequnum,timestamp;<message text>\n"
> + * "<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n"
> *
> * The optional key/value pairs are attached as continuation lines starting
> * with a space character and terminated by a newline. All possible
> @@ -1417,7 +1429,9 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
> * log_buf[start] to log_buf[end - 1].
> * The console_lock must be held.
> */
> -static void call_console_drivers(int level, const char *text, size_t len)
> +static void call_console_drivers(int level,
> + const char *ext_text, size_t ext_len,
> + const char *text, size_t len)
> {
> struct console *con;
>
> @@ -1438,7 +1452,10 @@ static void call_console_drivers(int level, const char *text, size_t len)
> if (!cpu_online(smp_processor_id()) &&
> !(con->flags & CON_ANYTIME))
> continue;
> - con->write(con, text, len);
> + if (con->flags & CON_EXTENDED)
> + con->write(con, ext_text, ext_len);
> + else
> + con->write(con, text, len);
> }
> }
>
> @@ -1581,8 +1598,12 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
> if (cont.len && cont.flushed)
> return false;
>
> - if (cont.len + len > sizeof(cont.buf)) {
> - /* the line gets too long, split it up in separate records */
> + /*
> + * If ext consoles are present, flush and skip in-kernel
> + * continuation. See nr_ext_console_drivers definition. Also, if
> + * the line gets too long, split it up in separate records.
> + */
> + if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
> cont_flush(LOG_CONT);
> return false;
> }
> @@ -1917,9 +1938,19 @@ static struct cont {
> u8 level;
> bool flushed:1;
> } cont;
> +static char *log_text(const struct printk_log *msg) { return NULL; }
> +static char *log_dict(const struct printk_log *msg) { return NULL; }
> static struct printk_log *log_from_idx(u32 idx) { return NULL; }
> static u32 log_next(u32 idx) { return 0; }
> -static void call_console_drivers(int level, const char *text, size_t len) {}
> +static ssize_t msg_print_ext_header(char *buf, size_t size,
> + struct printk_log *msg, u64 seq,
> + enum log_flags prev_flags) { return 0; }
> +static ssize_t msg_print_ext_body(char *buf, size_t size,
> + char *dict, size_t dict_len,
> + char *text, size_t text_len) { return 0; }
> +static void call_console_drivers(int level,
> + const char *ext_text, size_t ext_len,
> + const char *text, size_t len) {}
> static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
> bool syslog, char *buf, size_t size) { return 0; }
> static size_t cont_print_text(char *text, size_t size) { return 0; }
> @@ -2172,7 +2203,7 @@ static void console_cont_flush(char *text, size_t size)
> len = cont_print_text(text, size);
> raw_spin_unlock(&logbuf_lock);
> stop_critical_timings();
> - call_console_drivers(cont.level, text, len);
> + call_console_drivers(cont.level, NULL, 0, text, len);
> start_critical_timings();
> local_irq_restore(flags);
> return;
> @@ -2196,6 +2227,7 @@ out:
> */
> void console_unlock(void)
> {
> + static char ext_text[CONSOLE_EXT_LOG_MAX];
> static char text[LOG_LINE_MAX + PREFIX_MAX];
> static u64 seen_seq;
> unsigned long flags;
> @@ -2214,6 +2246,7 @@ void console_unlock(void)
> again:
> for (;;) {
> struct printk_log *msg;
> + size_t ext_len = 0;
> size_t len;
> int level;
>
> @@ -2259,13 +2292,22 @@ skip:
> level = msg->level;
> len += msg_print_text(msg, console_prev, false,
> text + len, sizeof(text) - len);
> + if (nr_ext_console_drivers) {
> + ext_len = msg_print_ext_header(ext_text,
> + sizeof(ext_text),
> + msg, console_seq, console_prev);
> + ext_len += msg_print_ext_body(ext_text + ext_len,
> + sizeof(ext_text) - ext_len,
> + log_dict(msg), msg->dict_len,
> + log_text(msg), msg->text_len);
> + }
> console_idx = log_next(console_idx);
> console_seq++;
> console_prev = msg->flags;
> raw_spin_unlock(&logbuf_lock);
>
> stop_critical_timings(); /* don't trace print latency */
> - call_console_drivers(level, text, len);
> + call_console_drivers(level, ext_text, ext_len, text, len);
> start_critical_timings();
> local_irq_restore(flags);
> }
> @@ -2521,6 +2563,11 @@ void register_console(struct console *newcon)
> newcon->next = console_drivers->next;
> console_drivers->next = newcon;
> }
> +
> + if (newcon->flags & CON_EXTENDED)
> + if (!nr_ext_console_drivers++)
> + pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
> +
> if (newcon->flags & CON_PRINTBUFFER) {
> /*
> * console_unlock(); will print out the buffered messages
> @@ -2593,6 +2640,9 @@ int unregister_console(struct console *console)
> }
> }
>
> + if (!res && (console->flags & CON_EXTENDED))
> + nr_ext_console_drivers--;
> +
> /*
> * If this isn't the last console and it has CON_CONSDEV set, we
> * need to set it on the next preferred console.
> --
> 2.1.0
>