Not all consoles are created equal: depending on the actual hardware,
the latency of a printk() call can vary dramatically. The worst examples
are serial consoles, where it can spin for tens of milliseconds banging
the UART to emit a message, which can cause application-level problems
when the kernel spews onto the console.
At Facebook we use netconsole to monitor our fleet, but we still have
serial consoles attached on each host for live debugging, and the latter
has caused problems. An obvious solution is to disable the kernel
console output to ttyS0, but this makes live debugging frustrating,
since crashes become silent and opaque to the ttyS0 user. Enabling it on
the fly when needed isn't feasible, since boxes you need to debug via
serial are likely to be borked in ways that make this impossible.
That puts us between a rock and a hard place: we'd love to set
kernel.printk to KERN_INFO and get all the logs. But while netconsole is
fast enough to permit that without perturbing userspace, ttyS0 is not,
and we're forced to limit console logging to KERN_WARNING and higher.
This patch introduces a new per-console loglevel setting, and changes
console_unlock() to use max(global_level, per_console_level) when
deciding whether or not to emit a given log message.
This lets us have our cake and eat it too: instead of being forced to
limit all consoles verbosity based on the speed of the slowest one, we
can "promote" the faster console while still using a conservative system
loglevel setting to avoid disturbing applications.
Cc: Petr Mladek <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Sergey Senozhatsky <[email protected]>
Signed-off-by: Calvin Owens <[email protected]>
---
(V1: https://lkml.org/lkml/2017/4/4/783)
Changes in V2:
* Honor the ignore_loglevel setting in all cases
* Change semantics to use max(global, console) as the loglevel
for a console, instead of the previous patch where we treated
the per-console one as a filter downstream of the global one.
include/linux/console.h | 1 +
kernel/printk/printk.c | 38 +++++++++++++++++++-------------------
2 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/include/linux/console.h b/include/linux/console.h
index b8920a0..a5b5d79 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -147,6 +147,7 @@ struct console {
int cflag;
void *data;
struct console *next;
+ int level;
};
/*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 512f7c2..3f1675e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1141,9 +1141,14 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(ignore_loglevel,
"ignore loglevel setting (prints all kernel messages to the console)");
-static bool suppress_message_printing(int level)
+static int effective_loglevel(struct console *con)
{
- return (level >= console_loglevel && !ignore_loglevel);
+ return max(console_loglevel, con ? con->level : LOGLEVEL_EMERG);
+}
+
+static bool suppress_message_printing(int level, struct console *con)
+{
+ return (level >= effective_loglevel(con) && !ignore_loglevel);
}
#ifdef CONFIG_BOOT_PRINTK_DELAY
@@ -1175,7 +1180,7 @@ static void boot_delay_msec(int level)
unsigned long timeout;
if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
- || suppress_message_printing(level)) {
+ || suppress_message_printing(level, NULL)) {
return;
}
@@ -1549,7 +1554,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
* The console_lock must be held.
*/
static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len)
+ const char *text, size_t len, int level)
{
struct console *con;
@@ -1568,6 +1573,8 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
if (!cpu_online(smp_processor_id()) &&
!(con->flags & CON_ANYTIME))
continue;
+ if (suppress_message_printing(level, con))
+ continue;
if (con->flags & CON_EXTENDED)
con->write(con, ext_text, ext_len);
else
@@ -1856,10 +1863,9 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
char *dict, size_t dict_len,
char *text, size_t text_len) { return 0; }
static void call_console_drivers(const char *ext_text, size_t ext_len,
- const char *text, size_t len) {}
+ const char *text, size_t len, int level) {}
static size_t msg_print_text(const struct printk_log *msg,
bool syslog, char *buf, size_t size) { return 0; }
-static bool suppress_message_printing(int level) { return false; }
#endif /* CONFIG_PRINTK */
@@ -2199,22 +2205,11 @@ void console_unlock(void)
} else {
len = 0;
}
-skip:
+
if (console_seq == log_next_seq)
break;
msg = log_from_idx(console_idx);
- if (suppress_message_printing(msg->level)) {
- /*
- * Skip record we have buffered and already printed
- * directly to the console when we received it, and
- * record that has level above the console loglevel.
- */
- console_idx = log_next(console_idx);
- console_seq++;
- goto skip;
- }
-
len += msg_print_text(msg, false, text + len, sizeof(text) - len);
if (nr_ext_console_drivers) {
ext_len = msg_print_ext_header(ext_text,
@@ -2230,7 +2225,7 @@ void console_unlock(void)
raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
- call_console_drivers(ext_text, ext_len, text, len);
+ call_console_drivers(ext_text, ext_len, text, len, msg->level);
start_critical_timings();
printk_safe_exit_irqrestore(flags);
@@ -2497,6 +2492,11 @@ void register_console(struct console *newcon)
newcon->flags &= ~CON_PRINTBUFFER;
/*
+ * By default, the per-console minimum forces no messages through.
+ */
+ newcon->level = LOGLEVEL_EMERG;
+
+ /*
* Put this console in the list - keep the
* preferred driver at the head of the list.
*/
--
2.9.5
This adds a new sysfs interface that contains a directory for each
console registered on the system. Each directory contains a single
"loglevel" file for reading and setting the per-console loglevel.
We can let kobject destruction race with console removal: if it does,
loglevel_{show,store}() will safely fail with -ENODEV. This is a little
weird, but avoids embedding the kobject and therefore needing to totally
refactor the way we handle console struct lifetime.
Cc: Petr Mladek <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Sergey Senozhatsky <[email protected]>
Signed-off-by: Calvin Owens <[email protected]>
---
(V1: https://lkml.org/lkml/2017/4/4/784)
Changes in V2:
* Honor minimum_console_loglevel when setting loglevels
* Added entry in Documentation/ABI/testing
Documentation/ABI/testing/sysfs-consoles | 13 +++++
include/linux/console.h | 1 +
kernel/printk/printk.c | 88 ++++++++++++++++++++++++++++++++
3 files changed, 102 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-consoles
diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
new file mode 100644
index 0000000..6a1593e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-consoles
@@ -0,0 +1,13 @@
+What: /sys/consoles/
+Date: September 2017
+KernelVersion: 4.15
+Contact: Calvin Owens <[email protected]>
+Description: The /sys/consoles tree contains a directory for each console
+ configured on the system. These directories contain the
+ following attributes:
+
+ * "loglevel" Set the per-console loglevel: the kernel uses
+ max(system_loglevel, perconsole_loglevel) when
+ deciding whether to emit a given message. The
+ default is 0, which means max() always yields
+ the system setting in the kernel.printk sysctl.
diff --git a/include/linux/console.h b/include/linux/console.h
index a5b5d79..76840be 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -148,6 +148,7 @@ struct console {
void *data;
struct console *next;
int level;
+ struct kobject *kobj;
};
/*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 3f1675e..488bda3 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -105,6 +105,8 @@ enum devkmsg_log_masks {
static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
+static struct kobject *consoles_dir_kobj;
+
static int __control_devkmsg(char *str)
{
if (!str)
@@ -2371,6 +2373,82 @@ static int __init keep_bootcon_setup(char *str)
early_param("keep_bootcon", keep_bootcon_setup);
+static ssize_t loglevel_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct console *con;
+ ssize_t ret = -ENODEV;
+
+ console_lock();
+ for_each_console(con) {
+ if (con->kobj == kobj) {
+ ret = sprintf(buf, "%d\n", con->level);
+ break;
+ }
+ }
+ console_unlock();
+
+ return ret;
+}
+
+static ssize_t loglevel_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct console *con;
+ ssize_t ret;
+ int tmp;
+
+ ret = kstrtoint(buf, 10, &tmp);
+ if (ret < 0)
+ return ret;
+
+ if (tmp < LOGLEVEL_EMERG)
+ return -ERANGE;
+
+ /*
+ * Mimic the behavior of /dev/kmsg with respect to minimum_loglevel
+ */
+ if (tmp < minimum_console_loglevel)
+ tmp = minimum_console_loglevel;
+
+ ret = -ENODEV;
+ console_lock();
+ for_each_console(con) {
+ if (con->kobj == kobj) {
+ con->level = tmp;
+ ret = count;
+ break;
+ }
+ }
+ console_unlock();
+
+ return ret;
+}
+
+static const struct kobj_attribute console_loglevel_attr =
+ __ATTR(loglevel, 0644, loglevel_show, loglevel_store);
+
+static void console_register_sysfs(struct console *newcon)
+{
+ /*
+ * We might be called very early from register_console(): in that case,
+ * printk_late_init() will take care of this later.
+ */
+ if (!consoles_dir_kobj)
+ return;
+
+ newcon->kobj = kobject_create_and_add(newcon->name, consoles_dir_kobj);
+ if (WARN_ON(!newcon->kobj))
+ return;
+
+ WARN_ON(sysfs_create_file(newcon->kobj, &console_loglevel_attr.attr));
+}
+
+static void console_unregister_sysfs(struct console *oldcon)
+{
+ kobject_put(oldcon->kobj);
+}
+
/*
* The console driver calls this routine during kernel initialization
* to register the console printing procedure with printk() and to
@@ -2495,6 +2573,7 @@ void register_console(struct console *newcon)
* By default, the per-console minimum forces no messages through.
*/
newcon->level = LOGLEVEL_EMERG;
+ newcon->kobj = NULL;
/*
* Put this console in the list - keep the
@@ -2531,6 +2610,7 @@ void register_console(struct console *newcon)
*/
exclusive_console = newcon;
}
+ console_register_sysfs(newcon);
console_unlock();
console_sysfs_notify();
@@ -2597,6 +2677,7 @@ int unregister_console(struct console *console)
console_drivers->flags |= CON_CONSDEV;
console->flags &= ~CON_ENABLED;
+ console_unregister_sysfs(console);
console_unlock();
console_sysfs_notify();
return res;
@@ -2672,6 +2753,13 @@ static int __init printk_late_init(void)
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
console_cpu_notify, NULL);
WARN_ON(ret < 0);
+
+ consoles_dir_kobj = kobject_create_and_add("consoles", NULL);
+ WARN_ON(!consoles_dir_kobj);
+
+ for_each_console(con)
+ console_register_sysfs(con);
+
return 0;
}
late_initcall(printk_late_init);
--
2.9.5
This extends the "console=" interface to allow setting the per-console
loglevel by adding "/N" to the string, where N is the desired loglevel
expressed as a base 10 integer. Invalid values are silently ignored.
Cc: Petr Mladek <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Sergey Senozhatsky <[email protected]>
Signed-off-by: Calvin Owens <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 6 ++---
kernel/printk/console_cmdline.h | 1 +
kernel/printk/printk.c | 30 ++++++++++++++++++++-----
3 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0549662..f22b992 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -607,10 +607,10 @@
ttyS<n>[,options]
ttyUSB0[,options]
Use the specified serial port. The options are of
- the form "bbbbpnf", where "bbbb" is the baud rate,
+ the form "bbbbpnf/l", where "bbbb" is the baud rate,
"p" is parity ("n", "o", or "e"), "n" is number of
- bits, and "f" is flow control ("r" for RTS or
- omit it). Default is "9600n8".
+ bits, "f" is flow control ("r" for RTS or omit it),
+ and "l" is the loglevel on [0,7]. Default is "9600n8".
See Documentation/admin-guide/serial-console.rst for more
information. See
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
index 2ca4a8b..269e666 100644
--- a/kernel/printk/console_cmdline.h
+++ b/kernel/printk/console_cmdline.h
@@ -5,6 +5,7 @@ struct console_cmdline
{
char name[16]; /* Name of the driver */
int index; /* Minor dev. to use */
+ int loglevel; /* Loglevel to use */
char *options; /* Options for the driver */
#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
char *brl_options; /* Options for braille driver */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 488bda3..4c14cf2 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1892,7 +1892,7 @@ asmlinkage __visible void early_printk(const char *fmt, ...)
#endif
static int __add_preferred_console(char *name, int idx, char *options,
- char *brl_options)
+ int loglevel, char *brl_options)
{
struct console_cmdline *c;
int i;
@@ -1918,6 +1918,7 @@ static int __add_preferred_console(char *name, int idx, char *options,
c->options = options;
braille_set_options(c, brl_options);
+ c->loglevel = loglevel;
c->index = idx;
return 0;
}
@@ -1928,8 +1929,8 @@ static int __add_preferred_console(char *name, int idx, char *options,
static int __init console_setup(char *str)
{
char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
- char *s, *options, *brl_options = NULL;
- int idx;
+ char *s, *options, *llevel, *brl_options = NULL;
+ int idx, loglevel = LOGLEVEL_EMERG;
if (_braille_console_setup(&str, &brl_options))
return 1;
@@ -1947,6 +1948,14 @@ static int __init console_setup(char *str)
options = strchr(str, ',');
if (options)
*(options++) = 0;
+
+ llevel = strchr(str, '/');
+ if (llevel) {
+ *(llevel++) = 0;
+ if (kstrtoint(llevel, 10, &loglevel))
+ loglevel = LOGLEVEL_EMERG;
+ }
+
#ifdef __sparc__
if (!strcmp(str, "ttya"))
strcpy(buf, "ttyS0");
@@ -1959,7 +1968,7 @@ static int __init console_setup(char *str)
idx = simple_strtoul(s, NULL, 10);
*s = 0;
- __add_preferred_console(buf, idx, options, brl_options);
+ __add_preferred_console(buf, idx, options, loglevel, brl_options);
console_set_on_cmdline = 1;
return 1;
}
@@ -1980,7 +1989,8 @@ __setup("console=", console_setup);
*/
int add_preferred_console(char *name, int idx, char *options)
{
- return __add_preferred_console(name, idx, options, NULL);
+ return __add_preferred_console(name, idx, options, LOGLEVEL_EMERG,
+ NULL);
}
bool console_suspend_enabled = true;
@@ -2475,6 +2485,7 @@ void register_console(struct console *newcon)
struct console *bcon = NULL;
struct console_cmdline *c;
static bool has_preferred;
+ bool extant = false;
if (console_drivers)
for_each_console(bcon)
@@ -2541,6 +2552,12 @@ void register_console(struct console *newcon)
if (newcon->index < 0)
newcon->index = c->index;
+ /*
+ * Carry over the loglevel from the cmdline
+ */
+ newcon->level = c->loglevel;
+ extant = true;
+
if (_braille_register_console(newcon, c))
return;
@@ -2572,8 +2589,9 @@ void register_console(struct console *newcon)
/*
* By default, the per-console minimum forces no messages through.
*/
- newcon->level = LOGLEVEL_EMERG;
newcon->kobj = NULL;
+ if (!extant)
+ newcon->level = LOGLEVEL_EMERG;
/*
* Put this console in the list - keep the
--
2.9.5
On (09/28/17 17:43), Calvin Owens wrote:
> Not all consoles are created equal: depending on the actual hardware,
> the latency of a printk() call can vary dramatically. The worst examples
> are serial consoles, where it can spin for tens of milliseconds banging
> the UART to emit a message, which can cause application-level problems
> when the kernel spews onto the console.
>
> At Facebook we use netconsole to monitor our fleet, but we still have
> serial consoles attached on each host for live debugging, and the latter
> has caused problems. An obvious solution is to disable the kernel
> console output to ttyS0, but this makes live debugging frustrating,
> since crashes become silent and opaque to the ttyS0 user. Enabling it on
> the fly when needed isn't feasible, since boxes you need to debug via
> serial are likely to be borked in ways that make this impossible.
>
> That puts us between a rock and a hard place: we'd love to set
> kernel.printk to KERN_INFO and get all the logs. But while netconsole is
> fast enough to permit that without perturbing userspace, ttyS0 is not,
> and we're forced to limit console logging to KERN_WARNING and higher.
>
> This patch introduces a new per-console loglevel setting, and changes
> console_unlock() to use max(global_level, per_console_level) when
> deciding whether or not to emit a given log message.
>
> This lets us have our cake and eat it too: instead of being forced to
> limit all consoles verbosity based on the speed of the slowest one, we
> can "promote" the faster console while still using a conservative system
> loglevel setting to avoid disturbing applications.
Hi Calvin,
Do you have time to address the review feedback and re-spin v2?
-ss
On Friday 10/19 at 09:04 +0900, Sergey Senozhatsky wrote:
> On (09/28/17 17:43), Calvin Owens wrote:
> > Not all consoles are created equal: depending on the actual hardware,
> > the latency of a printk() call can vary dramatically. The worst examples
> > are serial consoles, where it can spin for tens of milliseconds banging
> > the UART to emit a message, which can cause application-level problems
> > when the kernel spews onto the console.
> >
> > At Facebook we use netconsole to monitor our fleet, but we still have
> > serial consoles attached on each host for live debugging, and the latter
> > has caused problems. An obvious solution is to disable the kernel
> > console output to ttyS0, but this makes live debugging frustrating,
> > since crashes become silent and opaque to the ttyS0 user. Enabling it on
> > the fly when needed isn't feasible, since boxes you need to debug via
> > serial are likely to be borked in ways that make this impossible.
> >
> > That puts us between a rock and a hard place: we'd love to set
> > kernel.printk to KERN_INFO and get all the logs. But while netconsole is
> > fast enough to permit that without perturbing userspace, ttyS0 is not,
> > and we're forced to limit console logging to KERN_WARNING and higher.
> >
> > This patch introduces a new per-console loglevel setting, and changes
> > console_unlock() to use max(global_level, per_console_level) when
> > deciding whether or not to emit a given log message.
> >
> > This lets us have our cake and eat it too: instead of being forced to
> > limit all consoles verbosity based on the speed of the slowest one, we
> > can "promote" the faster console while still using a conservative system
> > loglevel setting to avoid disturbing applications.
>
> Hi Calvin,
>
> Do you have time to address the review feedback and re-spin v2?
Hi Sergey,
It's in-progress, I'm sorry it hasn't happened sooner.
By embedding the kobject in the console struct, we end up needing to refactor
the console drivers to use the kobject refcount instead of simply calling
kfree(), which is what I'm working on. It ends up being tedious but not
particularly complicated, my goal is to have this up soon :)
Thanks,
Calvin
On (10/19/18 22:03), Calvin Owens wrote:
> Hi Sergey,
>
> It's in-progress, I'm sorry it hasn't happened sooner.
Great!
> By embedding the kobject in the console struct, we end up needing to refactor
> the console drivers to use the kobject refcount instead of simply calling
> kfree(), which is what I'm working on. It ends up being tedious but not
> particularly complicated, my goal is to have this up soon :)
OK :)
-ss
On Wed, Nov 08, 2017 at 01:32:53PM -0800, Calvin Owens wrote:
> On 11/03/2017 07:32 AM, Kroah-Hartman wrote:
> > On Fri, Nov 03, 2017 at 03:21:14PM +0100, Petr Mladek wrote:
> > > On Thu 2017-09-28 17:43:56, Calvin Owens wrote:
> > > > This adds a new sysfs interface that contains a directory for each
> > > > console registered on the system. Each directory contains a single
> > > > "loglevel" file for reading and setting the per-console loglevel.
> > > >
> > > > We can let kobject destruction race with console removal: if it does,
> > > > loglevel_{show,store}() will safely fail with -ENODEV. This is a little
> > > > weird, but avoids embedding the kobject and therefore needing to totally
> > > > refactor the way we handle console struct lifetime.
> > >
> > > It looks like a sane approach. It might be worth a comment in the code.
> > >
> > >
> > > > Documentation/ABI/testing/sysfs-consoles | 13 +++++
> > > > include/linux/console.h | 1 +
> > > > kernel/printk/printk.c | 88 ++++++++++++++++++++++++++++++++
> > > > 3 files changed, 102 insertions(+)
> > > > create mode 100644 Documentation/ABI/testing/sysfs-consoles
> > > >
> > > > diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
> > > > new file mode 100644
> > > > index 0000000..6a1593e
> > > > --- /dev/null
> > > > +++ b/Documentation/ABI/testing/sysfs-consoles
> > > > @@ -0,0 +1,13 @@
> > > > +What: /sys/consoles/
> >
> > Eeek, what!
> >
> > > I rather add Greg in CC. I am not 100% sure that the top level
> > > directory is the right thing to do.
> >
> > Neither do I.
>
> Sure. This is a placeholder I choose arbitrarily pending some real input on
> the location, sorry I didn't make that clear.
>
> > > Alternative might be to hide this under /sys/kernel/consoles/.
> >
> > No no no.
> >
> > > > +Date: September 2017
> > > > +KernelVersion: 4.15
> > > > +Contact: Calvin Owens <[email protected]>
> > > > +Description: The /sys/consoles tree contains a directory for each console
> > > > + configured on the system. These directories contain the
> > > > + following attributes:
> > > > +
> > > > + * "loglevel" Set the per-console loglevel: the kernel uses
> > > > + max(system_loglevel, perconsole_loglevel) when
> > > > + deciding whether to emit a given message. The
> > > > + default is 0, which means max() always yields
> > > > + the system setting in the kernel.printk sysctl.
> > >
> > > I would call the attribute "min_loglevel". The name "loglevel" should
> > > be reserved for the really used loglevel that depends also on the
> > > global loglevel value.
> > >
> > >
> > > > diff --git a/include/linux/console.h b/include/linux/console.h
> > > > index a5b5d79..76840be 100644
> > > > --- a/include/linux/console.h
> > > > +++ b/include/linux/console.h
> > > > @@ -148,6 +148,7 @@ struct console {
> > > > void *data;
> > > > struct console *next;
> > > > int level;
> > > > + struct kobject *kobj;
> >
> > Why are you using "raw" kobjects and not a "real" struct device? This
> > is a device, use that interface instead please.
> >
> > If you need a console 'bus' to place them on, fine, but the virtual bus
> > is probably best and simpler to use.
>
> The problem is that the console corresponds to no actual device (this is what
> Petr was getting at in the other mail). A console *may* be associated with a real
> TTY device, but this isn't universally true (for example, see netconsole_ext).
>
> Embedding a device struct in the console structure is problematic for the same
> reason embedding a raw kobject is: we'd need to rewrite all the code to deal with
> the new refcount/release semantics.
That's ok, that is what you _should_ do :)
> While that's certainly possible, it ends up being a much bigger thorny change. If
> we deal with the "get()/deregister()" race in a safe way, it becomes very simple.
>
> (If it were as trivial as replacing kfrees with puts and adding release callbacks,
> that'd be the obvious way to go, but of course it doesn't end up being that nice...)
I agree it's not trivial, but it's the correct change here, don't try to
abuse the driver core / kobjects, they will come back to bite you :)
> > That is if you _really_ feel you need sysfs interaction with the console
> > layer (hint, I am not yet convinced...)
>
> How would you expose this setting if not via sysfs? All I care about is having the
> setting, how exactly userspace pokes it is not at all important :)
A per-console log-level? I don't know, how do per-console settings work
today, ioctls?
"Fixing" consoles properly would be great work to undertake...
thanks,
greg k-h
From 1583535216802341354@xxx Wed Nov 08 21:34:25 +0000 2017
X-GM-THRID: 1579832740601012796
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On 11/03/2017 07:32 AM, Kroah-Hartman wrote:
> On Fri, Nov 03, 2017 at 03:21:14PM +0100, Petr Mladek wrote:
>> On Thu 2017-09-28 17:43:56, Calvin Owens wrote:
>>> This adds a new sysfs interface that contains a directory for each
>>> console registered on the system. Each directory contains a single
>>> "loglevel" file for reading and setting the per-console loglevel.
>>>
>>> We can let kobject destruction race with console removal: if it does,
>>> loglevel_{show,store}() will safely fail with -ENODEV. This is a little
>>> weird, but avoids embedding the kobject and therefore needing to totally
>>> refactor the way we handle console struct lifetime.
>>
>> It looks like a sane approach. It might be worth a comment in the code.
>>
>>
>>> Documentation/ABI/testing/sysfs-consoles | 13 +++++
>>> include/linux/console.h | 1 +
>>> kernel/printk/printk.c | 88 ++++++++++++++++++++++++++++++++
>>> 3 files changed, 102 insertions(+)
>>> create mode 100644 Documentation/ABI/testing/sysfs-consoles
>>>
>>> diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
>>> new file mode 100644
>>> index 0000000..6a1593e
>>> --- /dev/null
>>> +++ b/Documentation/ABI/testing/sysfs-consoles
>>> @@ -0,0 +1,13 @@
>>> +What: /sys/consoles/
>
> Eeek, what!
>
>> I rather add Greg in CC. I am not 100% sure that the top level
>> directory is the right thing to do.
>
> Neither do I.
Sure. This is a placeholder I choose arbitrarily pending some real input on
the location, sorry I didn't make that clear.
>> Alternative might be to hide this under /sys/kernel/consoles/.
>
> No no no.
>
>>> +Date: September 2017
>>> +KernelVersion: 4.15
>>> +Contact: Calvin Owens <[email protected]>
>>> +Description: The /sys/consoles tree contains a directory for each console
>>> + configured on the system. These directories contain the
>>> + following attributes:
>>> +
>>> + * "loglevel" Set the per-console loglevel: the kernel uses
>>> + max(system_loglevel, perconsole_loglevel) when
>>> + deciding whether to emit a given message. The
>>> + default is 0, which means max() always yields
>>> + the system setting in the kernel.printk sysctl.
>>
>> I would call the attribute "min_loglevel". The name "loglevel" should
>> be reserved for the really used loglevel that depends also on the
>> global loglevel value.
>>
>>
>>> diff --git a/include/linux/console.h b/include/linux/console.h
>>> index a5b5d79..76840be 100644
>>> --- a/include/linux/console.h
>>> +++ b/include/linux/console.h
>>> @@ -148,6 +148,7 @@ struct console {
>>> void *data;
>>> struct console *next;
>>> int level;
>>> + struct kobject *kobj;
>
> Why are you using "raw" kobjects and not a "real" struct device? This
> is a device, use that interface instead please.
>
> If you need a console 'bus' to place them on, fine, but the virtual bus
> is probably best and simpler to use.
The problem is that the console corresponds to no actual device (this is what
Petr was getting at in the other mail). A console *may* be associated with a real
TTY device, but this isn't universally true (for example, see netconsole_ext).
Embedding a device struct in the console structure is problematic for the same
reason embedding a raw kobject is: we'd need to rewrite all the code to deal with
the new refcount/release semantics.
While that's certainly possible, it ends up being a much bigger thorny change. If
we deal with the "get()/deregister()" race in a safe way, it becomes very simple.
(If it were as trivial as replacing kfrees with puts and adding release callbacks,
that'd be the obvious way to go, but of course it doesn't end up being that nice...)
> That is if you _really_ feel you need sysfs interaction with the console
> layer (hint, I am not yet convinced...)
How would you expose this setting if not via sysfs? All I care about is having the
setting, how exactly userspace pokes it is not at all important :)
>>> };
>>>
>>> /*
>>> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
>>> index 3f1675e..488bda3 100644
>>> --- a/kernel/printk/printk.c
>>> +++ b/kernel/printk/printk.c
>>> @@ -105,6 +105,8 @@ enum devkmsg_log_masks {
>>>
>>> static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
>>>
>>> +static struct kobject *consoles_dir_kobj;
>>>
>>> static int __control_devkmsg(char *str)
>>> {
>>> if (!str)
>>> @@ -2371,6 +2373,82 @@ static int __init keep_bootcon_setup(char *str)
>>>
>>> early_param("keep_bootcon", keep_bootcon_setup);
>>>
>>> +static ssize_t loglevel_show(struct kobject *kobj, struct kobj_attribute *attr,
>>> + char *buf)
>>> +{
>>> + struct console *con;
>>> + ssize_t ret = -ENODEV;
>>> +
>>
>> This might deserve a comment. Something like:
>>
>> /*
>> * Find the related struct console a safe way. The kobject
>> * desctruction is asynchronous.
>> */
>>> + console_lock();
>>> + for_each_console(con) {
>>> + if (con->kobj == kobj) {
>
> You are doing something wrong, go from kobj to your console directly,
> the fact that you can not do that here is a _huge_ hint that your
> structure is not correct.
>
> Hint, it's not correct at all :)
I understand this looks like a common antipattern, but it is very intentionally
done this way to avoid the issues mentioned above.
> Please cc: me on stuff like this if you want a review, and as you are
> adding a random new sysfs root directory, please always cc: me on that
> so I can talk you out of it...
Apologies you were missed, I'll make sure you're on the CC list for future
revisions.
Thanks,
Calvin
> thanks,
>
> greg k-h
>
From 1583061109803518806@xxx Fri Nov 03 15:58:41 +0000 2017
X-GM-THRID: 1579832740601012796
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On Fri, Nov 03, 2017 at 04:46:51PM +0100, Petr Mladek wrote:
> On Fri 2017-11-03 15:32:34, Kroah-Hartman wrote:
> > > > diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
> > > > new file mode 100644
> > > > index 0000000..6a1593e
> > > > --- /dev/null
> > > > +++ b/Documentation/ABI/testing/sysfs-consoles
> > > > @@ -0,0 +1,13 @@
> > > > +What: /sys/consoles/
> >
> > Eeek, what!
> >
> > > I rather add Greg in CC. I am not 100% sure that the top level
> > > directory is the right thing to do.
> >
> > Neither do I.
> >
> > > Alternative might be to hide this under /sys/kernel/consoles/.
> >
> > No no no.
> >
> > > > diff --git a/include/linux/console.h b/include/linux/console.h
> > > > index a5b5d79..76840be 100644
> > > > --- a/include/linux/console.h
> > > > +++ b/include/linux/console.h
> > > > @@ -148,6 +148,7 @@ struct console {
> > > > void *data;
> > > > struct console *next;
> > > > int level;
> > > > + struct kobject *kobj;
> >
> > Why are you using "raw" kobjects and not a "real" struct device? This
> > is a device, use that interface instead please.
>
> Hmm, struct console has a member
>
> struct tty_driver *(*device)(struct console *, int *);
>
> but it is set only when the console has tty binding.
I don't understand what you are referring to here, sorry.
> > If you need a console 'bus' to place them on, fine, but the virtual bus
> > is probably best and simpler to use.
> >
> > That is if you _really_ feel you need sysfs interaction with the console
> > layer (hint, I am not yet convinced...)
>
> The purpose of this patch is to make a userfriendly interface
> for setting console-specific loglevel (message filtering).
>
> It curretnly uses kobject for creating a simple directory
> structure under /sys/. It is inspired by /sys/power/, /sys/kernel/mm/,
> /sys/kernel/debug/tracing/ stuff.
>
> There are ideas to add more files that would allow to modify even the
> global setting. This is currectly possible by the four numbers in
> /proc/sys/kernel/printk. Nobody knows what the four numbers mean.
> IMHO, the following interface would be easier to work with:
>
> /sys/console/loglevel
> /sys/console/min_loglevel
> /sys/condole/default_loglevel
No, please do not polute the /sys/* namespace with stuff like this. If
this is associated with a specific device, hang the sysfs files off of
it. Your console is in the /sys/device/ tree, right?
> > > /*
> > > * Find the related struct console a safe way. The kobject
> > > * desctruction is asynchronous.
> > > */
> > > > + console_lock();
> > > > + for_each_console(con) {
> > > > + if (con->kobj == kobj) {
> >
> > You are doing something wrong, go from kobj to your console directly,
> > the fact that you can not do that here is a _huge_ hint that your
> > structure is not correct.
> >
> > Hint, it's not correct at all :)
>
> I know that we are not following the original purpose of sysfs.
> But there are more (mis)users.
Sure, and I will gladly work to fix those. But do not add known-broken
code to the tree :)
thanks,
greg k-h
From 1583060464460036826@xxx Fri Nov 03 15:48:26 +0000 2017
X-GM-THRID: 1579832740601012796
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On Fri 2017-11-03 15:32:34, Kroah-Hartman wrote:
> > > diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
> > > new file mode 100644
> > > index 0000000..6a1593e
> > > --- /dev/null
> > > +++ b/Documentation/ABI/testing/sysfs-consoles
> > > @@ -0,0 +1,13 @@
> > > +What: /sys/consoles/
>
> Eeek, what!
>
> > I rather add Greg in CC. I am not 100% sure that the top level
> > directory is the right thing to do.
>
> Neither do I.
>
> > Alternative might be to hide this under /sys/kernel/consoles/.
>
> No no no.
>
> > > diff --git a/include/linux/console.h b/include/linux/console.h
> > > index a5b5d79..76840be 100644
> > > --- a/include/linux/console.h
> > > +++ b/include/linux/console.h
> > > @@ -148,6 +148,7 @@ struct console {
> > > void *data;
> > > struct console *next;
> > > int level;
> > > + struct kobject *kobj;
>
> Why are you using "raw" kobjects and not a "real" struct device? This
> is a device, use that interface instead please.
Hmm, struct console has a member
struct tty_driver *(*device)(struct console *, int *);
but it is set only when the console has tty binding.
> If you need a console 'bus' to place them on, fine, but the virtual bus
> is probably best and simpler to use.
>
> That is if you _really_ feel you need sysfs interaction with the console
> layer (hint, I am not yet convinced...)
The purpose of this patch is to make a userfriendly interface
for setting console-specific loglevel (message filtering).
It curretnly uses kobject for creating a simple directory
structure under /sys/. It is inspired by /sys/power/, /sys/kernel/mm/,
/sys/kernel/debug/tracing/ stuff.
There are ideas to add more files that would allow to modify even the
global setting. This is currectly possible by the four numbers in
/proc/sys/kernel/printk. Nobody knows what the four numbers mean.
IMHO, the following interface would be easier to work with:
/sys/console/loglevel
/sys/console/min_loglevel
/sys/condole/default_loglevel
> > /*
> > * Find the related struct console a safe way. The kobject
> > * desctruction is asynchronous.
> > */
> > > + console_lock();
> > > + for_each_console(con) {
> > > + if (con->kobj == kobj) {
>
> You are doing something wrong, go from kobj to your console directly,
> the fact that you can not do that here is a _huge_ hint that your
> structure is not correct.
>
> Hint, it's not correct at all :)
I know that we are not following the original purpose of sysfs.
But there are more (mis)users.
> Please cc: me on stuff like this if you want a review, and as you are
> adding a random new sysfs root directory, please always cc: me on that
> so I can talk you out of it...
Good to know.
Best Regards,
Petr
From 1583055809901777594@xxx Fri Nov 03 14:34:27 +0000 2017
X-GM-THRID: 1579832740601012796
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On Fri, Nov 03, 2017 at 03:21:14PM +0100, Petr Mladek wrote:
> On Thu 2017-09-28 17:43:56, Calvin Owens wrote:
> > This adds a new sysfs interface that contains a directory for each
> > console registered on the system. Each directory contains a single
> > "loglevel" file for reading and setting the per-console loglevel.
> >
> > We can let kobject destruction race with console removal: if it does,
> > loglevel_{show,store}() will safely fail with -ENODEV. This is a little
> > weird, but avoids embedding the kobject and therefore needing to totally
> > refactor the way we handle console struct lifetime.
>
> It looks like a sane approach. It might be worth a comment in the code.
>
>
> > Documentation/ABI/testing/sysfs-consoles | 13 +++++
> > include/linux/console.h | 1 +
> > kernel/printk/printk.c | 88 ++++++++++++++++++++++++++++++++
> > 3 files changed, 102 insertions(+)
> > create mode 100644 Documentation/ABI/testing/sysfs-consoles
> >
> > diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
> > new file mode 100644
> > index 0000000..6a1593e
> > --- /dev/null
> > +++ b/Documentation/ABI/testing/sysfs-consoles
> > @@ -0,0 +1,13 @@
> > +What: /sys/consoles/
Eeek, what!
> I rather add Greg in CC. I am not 100% sure that the top level
> directory is the right thing to do.
Neither do I.
> Alternative might be to hide this under /sys/kernel/consoles/.
No no no.
> > +Date: September 2017
> > +KernelVersion: 4.15
> > +Contact: Calvin Owens <[email protected]>
> > +Description: The /sys/consoles tree contains a directory for each console
> > + configured on the system. These directories contain the
> > + following attributes:
> > +
> > + * "loglevel" Set the per-console loglevel: the kernel uses
> > + max(system_loglevel, perconsole_loglevel) when
> > + deciding whether to emit a given message. The
> > + default is 0, which means max() always yields
> > + the system setting in the kernel.printk sysctl.
>
> I would call the attribute "min_loglevel". The name "loglevel" should
> be reserved for the really used loglevel that depends also on the
> global loglevel value.
>
>
> > diff --git a/include/linux/console.h b/include/linux/console.h
> > index a5b5d79..76840be 100644
> > --- a/include/linux/console.h
> > +++ b/include/linux/console.h
> > @@ -148,6 +148,7 @@ struct console {
> > void *data;
> > struct console *next;
> > int level;
> > + struct kobject *kobj;
Why are you using "raw" kobjects and not a "real" struct device? This
is a device, use that interface instead please.
If you need a console 'bus' to place them on, fine, but the virtual bus
is probably best and simpler to use.
That is if you _really_ feel you need sysfs interaction with the console
layer (hint, I am not yet convinced...)
> > };
> >
> > /*
> > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> > index 3f1675e..488bda3 100644
> > --- a/kernel/printk/printk.c
> > +++ b/kernel/printk/printk.c
> > @@ -105,6 +105,8 @@ enum devkmsg_log_masks {
> >
> > static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
> >
> > +static struct kobject *consoles_dir_kobj;
> >
> > static int __control_devkmsg(char *str)
> > {
> > if (!str)
> > @@ -2371,6 +2373,82 @@ static int __init keep_bootcon_setup(char *str)
> >
> > early_param("keep_bootcon", keep_bootcon_setup);
> >
> > +static ssize_t loglevel_show(struct kobject *kobj, struct kobj_attribute *attr,
> > + char *buf)
> > +{
> > + struct console *con;
> > + ssize_t ret = -ENODEV;
> > +
>
> This might deserve a comment. Something like:
>
> /*
> * Find the related struct console a safe way. The kobject
> * desctruction is asynchronous.
> */
> > + console_lock();
> > + for_each_console(con) {
> > + if (con->kobj == kobj) {
You are doing something wrong, go from kobj to your console directly,
the fact that you can not do that here is a _huge_ hint that your
structure is not correct.
Hint, it's not correct at all :)
Please cc: me on stuff like this if you want a review, and as you are
adding a random new sysfs root directory, please always cc: me on that
so I can talk you out of it...
thanks,
greg k-h
From 1583055091010432897@xxx Fri Nov 03 14:23:01 +0000 2017
X-GM-THRID: 1579832740601012796
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On Thu 2017-09-28 17:43:56, Calvin Owens wrote:
> This adds a new sysfs interface that contains a directory for each
> console registered on the system. Each directory contains a single
> "loglevel" file for reading and setting the per-console loglevel.
>
> We can let kobject destruction race with console removal: if it does,
> loglevel_{show,store}() will safely fail with -ENODEV. This is a little
> weird, but avoids embedding the kobject and therefore needing to totally
> refactor the way we handle console struct lifetime.
It looks like a sane approach. It might be worth a comment in the code.
> Documentation/ABI/testing/sysfs-consoles | 13 +++++
> include/linux/console.h | 1 +
> kernel/printk/printk.c | 88 ++++++++++++++++++++++++++++++++
> 3 files changed, 102 insertions(+)
> create mode 100644 Documentation/ABI/testing/sysfs-consoles
>
> diff --git a/Documentation/ABI/testing/sysfs-consoles b/Documentation/ABI/testing/sysfs-consoles
> new file mode 100644
> index 0000000..6a1593e
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-consoles
> @@ -0,0 +1,13 @@
> +What: /sys/consoles/
I rather add Greg in CC. I am not 100% sure that the top level
directory is the right thing to do.
Alternative might be to hide this under /sys/kernel/consoles/.
> +Date: September 2017
> +KernelVersion: 4.15
> +Contact: Calvin Owens <[email protected]>
> +Description: The /sys/consoles tree contains a directory for each console
> + configured on the system. These directories contain the
> + following attributes:
> +
> + * "loglevel" Set the per-console loglevel: the kernel uses
> + max(system_loglevel, perconsole_loglevel) when
> + deciding whether to emit a given message. The
> + default is 0, which means max() always yields
> + the system setting in the kernel.printk sysctl.
I would call the attribute "min_loglevel". The name "loglevel" should
be reserved for the really used loglevel that depends also on the
global loglevel value.
> diff --git a/include/linux/console.h b/include/linux/console.h
> index a5b5d79..76840be 100644
> --- a/include/linux/console.h
> +++ b/include/linux/console.h
> @@ -148,6 +148,7 @@ struct console {
> void *data;
> struct console *next;
> int level;
> + struct kobject *kobj;
> };
>
> /*
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 3f1675e..488bda3 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -105,6 +105,8 @@ enum devkmsg_log_masks {
>
> static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
>
> +static struct kobject *consoles_dir_kobj;
>
> static int __control_devkmsg(char *str)
> {
> if (!str)
> @@ -2371,6 +2373,82 @@ static int __init keep_bootcon_setup(char *str)
>
> early_param("keep_bootcon", keep_bootcon_setup);
>
> +static ssize_t loglevel_show(struct kobject *kobj, struct kobj_attribute *attr,
> + char *buf)
> +{
> + struct console *con;
> + ssize_t ret = -ENODEV;
> +
This might deserve a comment. Something like:
/*
* Find the related struct console a safe way. The kobject
* desctruction is asynchronous.
*/
> + console_lock();
> + for_each_console(con) {
> + if (con->kobj == kobj) {
> + ret = sprintf(buf, "%d\n", con->level);
> + break;
> + }
> + }
> + console_unlock();
> +
> + return ret;
> +}
> +
> +static ssize_t loglevel_store(struct kobject *kobj, struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + struct console *con;
> + ssize_t ret;
> + int tmp;
I would use some meaningful name, e.g. new_level ;-)
> + ret = kstrtoint(buf, 10, &tmp);
> + if (ret < 0)
> + return ret;
> +
> + if (tmp < LOGLEVEL_EMERG)
> + return -ERANGE;
> +
> + /*
> + * Mimic the behavior of /dev/kmsg with respect to minimum_loglevel
> + */
> + if (tmp < minimum_console_loglevel)
> + tmp = minimum_console_loglevel;
Hmm, I would remove this "mimic" stuff. minimum_console_loglevel is currently
used to limit operations by the syslog system call. But root is still
able modify the minimum_console_loglevel by writing into
/proc/sys/kernel/printk.
My plan is that /sys/console interface would eventually replace the
crazy /proc/sys/kernel/printk one.
In each case, the default con->level value is zero. It would be
weird if people were not able to set this value.
> +
> + ret = -ENODEV;
I would repeat the same comment here:
/*
* Find the related struct console a safe way. The kobject
* desctruction is asynchronous.
*/
> + console_lock();
> + for_each_console(con) {
> + if (con->kobj == kobj) {
> + con->level = tmp;
> + ret = count;
> + break;
> + }
> + }
> + console_unlock();
> +
> + return ret;
> +}
> +
> +static const struct kobj_attribute console_loglevel_attr =
> + __ATTR(loglevel, 0644, loglevel_show, loglevel_store);
> +
> +static void console_register_sysfs(struct console *newcon)
> +{
Please, add a sanity check to make sure that this API is used the
right way.
if (WARN_ON(newcon->kobj))
return;
> + /*
> + * We might be called very early from register_console(): in that case,
> + * printk_late_init() will take care of this later.
> + */
> + if (!consoles_dir_kobj)
> + return;
> +
> + newcon->kobj = kobject_create_and_add(newcon->name, consoles_dir_kobj);
> + if (WARN_ON(!newcon->kobj))
I would just return in case of error. The error messages from
kobject_create_and_add() should be enough and actually more useful.
> + return;
> +
> + WARN_ON(sysfs_create_file(newcon->kobj, &console_loglevel_attr.attr));
Similar here. Well, I would destroy the kobject if the sysfs file
cannot be created:
if (sysfs_create_file(newcon->kobj, &console_loglevel_attr.attr))
console_unregister_sysfs(newcon);
> +}
> +
> +static void console_unregister_sysfs(struct console *oldcon)
> +{
> + kobject_put(oldcon->kobj);
We need to make that the carefull access in the show()/store() methods work.
oldcon->kobj = NULL;
> +}
> +
> /*
> * The console driver calls this routine during kernel initialization
> * to register the console printing procedure with printk() and to
> @@ -2495,6 +2573,7 @@ void register_console(struct console *newcon)
> * By default, the per-console minimum forces no messages through.
> */
> newcon->level = LOGLEVEL_EMERG;
> + newcon->kobj = NULL;
IMHO, we do not need this. The pointer should already be NULL.
> /*
> * Put this console in the list - keep the
> @@ -2531,6 +2610,7 @@ void register_console(struct console *newcon)
> */
> exclusive_console = newcon;
> }
> + console_register_sysfs(newcon);
> console_unlock();
> console_sysfs_notify();
>
> @@ -2597,6 +2677,7 @@ int unregister_console(struct console *console)
> console_drivers->flags |= CON_CONSDEV;
>
> console->flags &= ~CON_ENABLED;
> + console_unregister_sysfs(console);
> console_unlock();
> console_sysfs_notify();
> return res;
> @@ -2672,6 +2753,13 @@ static int __init printk_late_init(void)
> ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
> console_cpu_notify, NULL);
> WARN_ON(ret < 0);
> +
> + consoles_dir_kobj = kobject_create_and_add("consoles", NULL);
> + WARN_ON(!consoles_dir_kobj);
Again, I would just return in case of error.
> + for_each_console(con)
> + console_register_sysfs(con);
> +
> return 0;
Best Regards,
Petr
From 1579832740601012796@xxx Fri Sep 29 00:45:08 +0000 2017
X-GM-THRID: 1579832740601012796
X-Gmail-Labels: Inbox,Category Forums
On Thu 2017-09-28 17:43:57, Calvin Owens wrote:
> This extends the "console=" interface to allow setting the per-console
> loglevel by adding "/N" to the string, where N is the desired loglevel
> expressed as a base 10 integer. Invalid values are silently ignored.
>
> Cc: Petr Mladek <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Sergey Senozhatsky <[email protected]>
> Signed-off-by: Calvin Owens <[email protected]>
> ---
> Documentation/admin-guide/kernel-parameters.txt | 6 ++---
> kernel/printk/console_cmdline.h | 1 +
> kernel/printk/printk.c | 30 ++++++++++++++++++++-----
> 3 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 0549662..f22b992 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -607,10 +607,10 @@
> ttyS<n>[,options]
> ttyUSB0[,options]
> Use the specified serial port. The options are of
> - the form "bbbbpnf", where "bbbb" is the baud rate,
> + the form "bbbbpnf/l", where "bbbb" is the baud rate,
> "p" is parity ("n", "o", or "e"), "n" is number of
> - bits, and "f" is flow control ("r" for RTS or
> - omit it). Default is "9600n8".
> + bits, "f" is flow control ("r" for RTS or omit it),
> + and "l" is the loglevel on [0,7]. Default is "9600n8".
>
If I get this correctly, the patch allows to define the loglevel for any
console. I think that we need to describe it in a generic
way. Something like:
console= [KNL] Output console device and options.
Format: name[,options][/min_loglevel]
Where "name" is the console name, "options"
are console-specific options, and "min_loglevel"
allows to increase the loglevel for a particular
console over the global one.
tty<n> Use the virtual console device <n>.
I would also add a cross reference into the loglevel= section
about that the global loglevel might be overridden by a higher
console-specific min_loglevel value.
> See Documentation/admin-guide/serial-console.rst for more
> information. See
> diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
> index 2ca4a8b..269e666 100644
> --- a/kernel/printk/console_cmdline.h
> +++ b/kernel/printk/console_cmdline.h
> @@ -5,6 +5,7 @@ struct console_cmdline
> {
> char name[16]; /* Name of the driver */
> int index; /* Minor dev. to use */
> + int loglevel; /* Loglevel to use */
Again, I would use "min_loglevel".
> char *options; /* Options for the driver */
> #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
> char *brl_options; /* Options for braille driver */
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 488bda3..4c14cf2 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -2541,6 +2552,12 @@ void register_console(struct console *newcon)
> if (newcon->index < 0)
> newcon->index = c->index;
>
> + /*
> + * Carry over the loglevel from the cmdline
> + */
> + newcon->level = c->loglevel;
> + extant = true;
I would personally do the following:
if (!newcon->min_loglevel)
newcon->min_loglevel = c->min_loglevel;
It is similar to newcon->index handling above. It will use the
command line setting only when the console is registered for the first
time.
All this is based on my assumption that all non-initialized struct
console members are zero. At least I do not see any location where
some other members would be explicitly zeroed. And there might
be candidates, e.g. data, match(), next.
In each case, I do not know what the shortcut "extant" stands for
and I feel confused ;-)
> if (_braille_register_console(newcon, c))
> return;
>
> @@ -2572,8 +2589,9 @@ void register_console(struct console *newcon)
> /*
> * By default, the per-console minimum forces no messages through.
> */
> - newcon->level = LOGLEVEL_EMERG;
> newcon->kobj = NULL;
> + if (!extant)
> + newcon->level = LOGLEVEL_EMERG;
I believe that this is not necessary.
Otherwise, I really like this approach. I am surprised that all
consoles might be supported so easily.
Well, I am not 100% sure that '/' delimiter is the best choice.
I hope that it will not cause any confusion. But I cannot think
of anything better.
Best Regards,
Petr
From 1579832739058560438@xxx Fri Sep 29 00:45:07 +0000 2017
X-GM-THRID: 1579832739058560438
X-Gmail-Labels: Inbox,Category Forums
On Fri, 3 Nov 2017 13:00:05 +0100
Petr Mladek <[email protected]> wrote:
> On Thu 2017-09-28 17:43:55, Calvin Owens wrote:
> > This patch introduces a new per-console loglevel setting, and changes
> > console_unlock() to use max(global_level, per_console_level) when
> > deciding whether or not to emit a given log message.
>
> > diff --git a/include/linux/console.h b/include/linux/console.h
> > index b8920a0..a5b5d79 100644
> > --- a/include/linux/console.h
> > +++ b/include/linux/console.h
> > @@ -147,6 +147,7 @@ struct console {
> > int cflag;
> > void *data;
> > struct console *next;
> > + int level;
>
> I would make the meaning more clear and call this min_loglevel.
Or just loglevel, as those of us dealing with printk should understand.
This is just a per console loglevel, and if we call it min_loglevel, it
may be confusing because there is no "min_loglevel" that is currently
used.
[ Just read what you did below, maybe min is OK ]
>
> > };
> >
> > /*
> > diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> > index 512f7c2..3f1675e 100644
> > --- a/kernel/printk/printk.c
> > +++ b/kernel/printk/printk.c
> > @@ -1141,9 +1141,14 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
> > MODULE_PARM_DESC(ignore_loglevel,
> > "ignore loglevel setting (prints all kernel messages to the console)");
> >
> > -static bool suppress_message_printing(int level)
> > +static int effective_loglevel(struct console *con)
> > {
> > - return (level >= console_loglevel && !ignore_loglevel);
> > + return max(console_loglevel, con ? con->level : LOGLEVEL_EMERG);
> > +}
> > +
> > +static bool suppress_message_printing(int level, struct console *con)
> > +{
> > + return (level >= effective_loglevel(con) && !ignore_loglevel);
> > }
>
> We need to be more careful here:
>
> First, there is one ugly level called CONSOLE_LOGLEVEL_SILENT. Fortunately,
> it is used only by vkdb_printf(). I guess that the purpose is to store
> messages into the log buffer and do not show them on consoles.
Unfortunately, it is also used by
arch/mn10300/kernel/mn10300-watchdog.c.
Which calls console_silent().
>
> It is hack and it is racy. It would hide the messages only when the
> console_lock() is not already taken. Similar hack is used on more
> locations, e.g. in __handle_sysrq() and these are racy as well.
> We need to come up with something better in the future but this
> is a task for another patchset.
Agreed.
>
>
> Second, this functions are called with NULL when we need to take
> all usable consoles into account. You simplified it by ignoring
> the per-console setting. But it is not correct. For example,
> you might need to delay the printing in boot_delay_msec()
> also on the fast console. Also this was the reason to remove
> one optimization in console_unlock().
>
> I thought about a reasonable solution and came up with something like:
>
> static bool suppress_message_printing(int level, struct console *con)
> {
> int callable_loglevel;
>
> if (ignore_loglevel || console_loglevel == CONSOLE_LOGLEVEL_MOTORMOUTH)
> return false;
>
> /* Make silent even fast consoles. */
> if (console_loglevel == CONSOLE_LOGLEVEL_SILENT)
> return true;
>
> if (con)
> callable_loglevel = con->min_loglevel;
> else
> callable_loglevel = max_custom_console_loglevel;
>
> /* Global setting might make all consoles more verbose. */
> if (callable_loglevel < console_loglevel)
> callable_loglevel = console_loglevel;
>
> return level >= callable_loglevel();
> }
>
> Yes, it is complicated. But the logic is complicated. IMHO, this has
> the advantage that we do most of the decisions on a single place
> and it might be easier to get the picture.
It's not that complex, and it also has the benefit to document exactly
what the console_loglevel is doing.
>
> Anyway, max_custom_console_loglevel would be a global variable
> defined as:
>
> /*
> * Minimum loglevel of the most talkative registered console.
> * It is a maximum of all registered con->min_logvevel values.
> */
> static int max_custom_console_loglevel = LOGLEVEL_EMERG;
Ah, that is why you added min_loglevel.
-- Steve
>
> The value should get updated when any console is registered
> and when a registered console is manipulated. It means in
> register_console(), unregister_console(), and the sysfs
> write callbacks.
>
From 1583046156854331276@xxx Fri Nov 03 12:01:01 +0000 2017
X-GM-THRID: 1579832872468894514
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On Thu 2017-09-28 17:43:55, Calvin Owens wrote:
> This patch introduces a new per-console loglevel setting, and changes
> console_unlock() to use max(global_level, per_console_level) when
> deciding whether or not to emit a given log message.
> diff --git a/include/linux/console.h b/include/linux/console.h
> index b8920a0..a5b5d79 100644
> --- a/include/linux/console.h
> +++ b/include/linux/console.h
> @@ -147,6 +147,7 @@ struct console {
> int cflag;
> void *data;
> struct console *next;
> + int level;
I would make the meaning more clear and call this min_loglevel.
> };
>
> /*
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 512f7c2..3f1675e 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -1141,9 +1141,14 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
> MODULE_PARM_DESC(ignore_loglevel,
> "ignore loglevel setting (prints all kernel messages to the console)");
>
> -static bool suppress_message_printing(int level)
> +static int effective_loglevel(struct console *con)
> {
> - return (level >= console_loglevel && !ignore_loglevel);
> + return max(console_loglevel, con ? con->level : LOGLEVEL_EMERG);
> +}
> +
> +static bool suppress_message_printing(int level, struct console *con)
> +{
> + return (level >= effective_loglevel(con) && !ignore_loglevel);
> }
We need to be more careful here:
First, there is one ugly level called CONSOLE_LOGLEVEL_SILENT. Fortunately,
it is used only by vkdb_printf(). I guess that the purpose is to store
messages into the log buffer and do not show them on consoles.
It is hack and it is racy. It would hide the messages only when the
console_lock() is not already taken. Similar hack is used on more
locations, e.g. in __handle_sysrq() and these are racy as well.
We need to come up with something better in the future but this
is a task for another patchset.
Second, this functions are called with NULL when we need to take
all usable consoles into account. You simplified it by ignoring
the per-console setting. But it is not correct. For example,
you might need to delay the printing in boot_delay_msec()
also on the fast console. Also this was the reason to remove
one optimization in console_unlock().
I thought about a reasonable solution and came up with something like:
static bool suppress_message_printing(int level, struct console *con)
{
int callable_loglevel;
if (ignore_loglevel || console_loglevel == CONSOLE_LOGLEVEL_MOTORMOUTH)
return false;
/* Make silent even fast consoles. */
if (console_loglevel == CONSOLE_LOGLEVEL_SILENT)
return true;
if (con)
callable_loglevel = con->min_loglevel;
else
callable_loglevel = max_custom_console_loglevel;
/* Global setting might make all consoles more verbose. */
if (callable_loglevel < console_loglevel)
callable_loglevel = console_loglevel;
return level >= callable_loglevel();
}
Yes, it is complicated. But the logic is complicated. IMHO, this has
the advantage that we do most of the decisions on a single place
and it might be easier to get the picture.
Anyway, max_custom_console_loglevel would be a global variable
defined as:
/*
* Minimum loglevel of the most talkative registered console.
* It is a maximum of all registered con->min_logvevel values.
*/
static int max_custom_console_loglevel = LOGLEVEL_EMERG;
The value should get updated when any console is registered
and when a registered console is manipulated. It means in
register_console(), unregister_console(), and the sysfs
write callbacks.
> #ifdef CONFIG_BOOT_PRINTK_DELAY
> @@ -2199,22 +2205,11 @@ void console_unlock(void)
> } else {
> len = 0;
> }
> -skip:
> +
> if (console_seq == log_next_seq)
> break;
>
> msg = log_from_idx(console_idx);
> - if (suppress_message_printing(msg->level)) {
> - /*
> - * Skip record we have buffered and already printed
> - * directly to the console when we received it, and
> - * record that has level above the console loglevel.
> - */
> - console_idx = log_next(console_idx);
> - console_seq++;
> - goto skip;
> - }
I would like to keep this code. It does not make sense to prepare the
text buffer if it won't be used at all. It would work with the change
that I proposed above.
> len += msg_print_text(msg, false, text + len, sizeof(text) - len);
> if (nr_ext_console_drivers) {
> ext_len = msg_print_ext_header(ext_text,
> @@ -2230,7 +2225,7 @@ void console_unlock(void)
> raw_spin_unlock(&logbuf_lock);
>
> stop_critical_timings(); /* don't trace print latency */
> - call_console_drivers(ext_text, ext_len, text, len);
> + call_console_drivers(ext_text, ext_len, text, len, msg->level);
> start_critical_timings();
> printk_safe_exit_irqrestore(flags);
>
> @@ -2497,6 +2492,11 @@ void register_console(struct console *newcon)
> newcon->flags &= ~CON_PRINTBUFFER;
>
> /*
> + * By default, the per-console minimum forces no messages through.
> + */
> + newcon->level = LOGLEVEL_EMERG;
I wonder if we need this. I am not aware of any console where the
structure would not be defined globally. It means that all
non-initialized members should be zero.
Finally, I think that this feature makes sense. Thanks a lot for
working on it. I am sorry for the late reply. I need to get more
efficient in the patch review skill.
Best Regards,
Petr
From 1581798862896430407@xxx Fri Oct 20 17:35:49 +0000 2017
X-GM-THRID: 1579832872468894514
X-Gmail-Labels: Inbox,Category Forums
On 10/20/2017 01:05 AM, Petr Mladek wrote:
> On Thu 2017-10-19 16:40:45, Calvin Owens wrote:
>> On 09/28/2017 05:43 PM, Calvin Owens wrote:
>>> Not all consoles are created equal: depending on the actual hardware,
>>> the latency of a printk() call can vary dramatically. The worst examples
>>> are serial consoles, where it can spin for tens of milliseconds banging
>>> the UART to emit a message, which can cause application-level problems
>>> when the kernel spews onto the console.
>>
>> Any thoughts on this series? Happy to resend again, but if there are no
>> objections I'd love to see it merged sooner rather than later :)
>>
>> Happy to resend too, just let me know.
>
> There is no need to resend the patch. It is on my radar and I am
> going to look at it.
>
> Please, be patient, you hit conference, illness, after vacation
> season. We do not want to unnecessarily delay it but it is
> not a trivial change that might be accepted within minutes.
No worries, just wanted to make sure it hadn't been missed :)
Thanks,
Calvin
> Best Regards,
> Petr
From 1581763015278867521@xxx Fri Oct 20 08:06:02 +0000 2017
X-GM-THRID: 1579832872468894514
X-Gmail-Labels: Inbox,Category Forums
On Thu 2017-10-19 16:40:45, Calvin Owens wrote:
> On 09/28/2017 05:43 PM, Calvin Owens wrote:
> >Not all consoles are created equal: depending on the actual hardware,
> >the latency of a printk() call can vary dramatically. The worst examples
> >are serial consoles, where it can spin for tens of milliseconds banging
> >the UART to emit a message, which can cause application-level problems
> >when the kernel spews onto the console.
>
> Any thoughts on this series? Happy to resend again, but if there are no
> objections I'd love to see it merged sooner rather than later :)
>
> Happy to resend too, just let me know.
There is no need to resend the patch. It is on my radar and I am
going to look at it.
Please, be patient, you hit conference, illness, after vacation
season. We do not want to unnecessarily delay it but it is
not a trivial change that might be accepted within minutes.
Best Regards,
Petr
From 1581731347721247387@xxx Thu Oct 19 23:42:41 +0000 2017
X-GM-THRID: 1579832872468894514
X-Gmail-Labels: Inbox,Category Forums
On 09/28/2017 05:43 PM, Calvin Owens wrote:
> Not all consoles are created equal: depending on the actual hardware,
> the latency of a printk() call can vary dramatically. The worst examples
> are serial consoles, where it can spin for tens of milliseconds banging
> the UART to emit a message, which can cause application-level problems
> when the kernel spews onto the console.
Any thoughts on this series? Happy to resend again, but if there are no
objections I'd love to see it merged sooner rather than later :)
Happy to resend too, just let me know.
Thanks,
Calvin
> At Facebook we use netconsole to monitor our fleet, but we still have
> serial consoles attached on each host for live debugging, and the latter
> has caused problems. An obvious solution is to disable the kernel
> console output to ttyS0, but this makes live debugging frustrating,
> since crashes become silent and opaque to the ttyS0 user. Enabling it on
> the fly when needed isn't feasible, since boxes you need to debug via
> serial are likely to be borked in ways that make this impossible.
>
> That puts us between a rock and a hard place: we'd love to set
> kernel.printk to KERN_INFO and get all the logs. But while netconsole is
> fast enough to permit that without perturbing userspace, ttyS0 is not,
> and we're forced to limit console logging to KERN_WARNING and higher.
>
> This patch introduces a new per-console loglevel setting, and changes
> console_unlock() to use max(global_level, per_console_level) when
> deciding whether or not to emit a given log message.
>
> This lets us have our cake and eat it too: instead of being forced to
> limit all consoles verbosity based on the speed of the slowest one, we
> can "promote" the faster console while still using a conservative system
> loglevel setting to avoid disturbing applications.
>
> Cc: Petr Mladek <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Sergey Senozhatsky <[email protected]>
> Signed-off-by: Calvin Owens <[email protected]>
> ---
> (V1: https://lkml.org/lkml/2017/4/4/783)
>
> Changes in V2:
> * Honor the ignore_loglevel setting in all cases
> * Change semantics to use max(global, console) as the loglevel
> for a console, instead of the previous patch where we treated
> the per-console one as a filter downstream of the global one.
>
> include/linux/console.h | 1 +
> kernel/printk/printk.c | 38 +++++++++++++++++++-------------------
> 2 files changed, 20 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/console.h b/include/linux/console.h
> index b8920a0..a5b5d79 100644
> --- a/include/linux/console.h
> +++ b/include/linux/console.h
> @@ -147,6 +147,7 @@ struct console {
> int cflag;
> void *data;
> struct console *next;
> + int level;
> };
>
> /*
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 512f7c2..3f1675e 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -1141,9 +1141,14 @@ module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
> MODULE_PARM_DESC(ignore_loglevel,
> "ignore loglevel setting (prints all kernel messages to the console)");
>
> -static bool suppress_message_printing(int level)
> +static int effective_loglevel(struct console *con)
> {
> - return (level >= console_loglevel && !ignore_loglevel);
> + return max(console_loglevel, con ? con->level : LOGLEVEL_EMERG);
> +}
> +
> +static bool suppress_message_printing(int level, struct console *con)
> +{
> + return (level >= effective_loglevel(con) && !ignore_loglevel);
> }
>
> #ifdef CONFIG_BOOT_PRINTK_DELAY
> @@ -1175,7 +1180,7 @@ static void boot_delay_msec(int level)
> unsigned long timeout;
>
> if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
> - || suppress_message_printing(level)) {
> + || suppress_message_printing(level, NULL)) {
> return;
> }
>
> @@ -1549,7 +1554,7 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
> * The console_lock must be held.
> */
> static void call_console_drivers(const char *ext_text, size_t ext_len,
> - const char *text, size_t len)
> + const char *text, size_t len, int level)
> {
> struct console *con;
>
> @@ -1568,6 +1573,8 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
> if (!cpu_online(smp_processor_id()) &&
> !(con->flags & CON_ANYTIME))
> continue;
> + if (suppress_message_printing(level, con))
> + continue;
> if (con->flags & CON_EXTENDED)
> con->write(con, ext_text, ext_len);
> else
> @@ -1856,10 +1863,9 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
> char *dict, size_t dict_len,
> char *text, size_t text_len) { return 0; }
> static void call_console_drivers(const char *ext_text, size_t ext_len,
> - const char *text, size_t len) {}
> + const char *text, size_t len, int level) {}
> static size_t msg_print_text(const struct printk_log *msg,
> bool syslog, char *buf, size_t size) { return 0; }
> -static bool suppress_message_printing(int level) { return false; }
>
> #endif /* CONFIG_PRINTK */
>
> @@ -2199,22 +2205,11 @@ void console_unlock(void)
> } else {
> len = 0;
> }
> -skip:
> +
> if (console_seq == log_next_seq)
> break;
>
> msg = log_from_idx(console_idx);
> - if (suppress_message_printing(msg->level)) {
> - /*
> - * Skip record we have buffered and already printed
> - * directly to the console when we received it, and
> - * record that has level above the console loglevel.
> - */
> - console_idx = log_next(console_idx);
> - console_seq++;
> - goto skip;
> - }
> -
> len += msg_print_text(msg, false, text + len, sizeof(text) - len);
> if (nr_ext_console_drivers) {
> ext_len = msg_print_ext_header(ext_text,
> @@ -2230,7 +2225,7 @@ void console_unlock(void)
> raw_spin_unlock(&logbuf_lock);
>
> stop_critical_timings(); /* don't trace print latency */
> - call_console_drivers(ext_text, ext_len, text, len);
> + call_console_drivers(ext_text, ext_len, text, len, msg->level);
> start_critical_timings();
> printk_safe_exit_irqrestore(flags);
>
> @@ -2497,6 +2492,11 @@ void register_console(struct console *newcon)
> newcon->flags &= ~CON_PRINTBUFFER;
>
> /*
> + * By default, the per-console minimum forces no messages through.
> + */
> + newcon->level = LOGLEVEL_EMERG;
> +
> + /*
> * Put this console in the list - keep the
> * preferred driver at the head of the list.
> */
>
From 1579832872468894514@xxx Fri Sep 29 00:47:14 +0000 2017
X-GM-THRID: 1579832872468894514
X-Gmail-Labels: Inbox,Category Forums