Add support for selection of dynamic debug logs destination.
When enabled it allows to configure destination of each callsite
individually. The option adds a framework (described by struct
ddebug_dst_ops) which provides flexible means to add a new destination
for debug logs. On top of the framework support for trace instance as
destination is added. By default destination of debug logs is syslog.
Syslog can be set explicitly by using dst keyword or is selected by
default when dst keyword is omitted, for example:
localhost ~ # echo -n "module usbcore dst syslog =p" >
<debugfs>/dynamic_debug/control
or
localhost ~ # echo -n "module usbcore =p" >
<debugfs>/dynamic_debug/control
To enable a trace instance as a destination of debug logs a user
can for example issue the command:
localhost ~ # echo -n "module usbcore dst trace:usb =p" >
<debugfs>/dynamic_debug/control
The command will write debug logs from usbcore subsystem to the
trace instance named "usb" and the captured logs can be read
later with the command:
localhost ~ # cat <debugfs>/tracing/instances/usb/trace
When trace instance name is omitted the debug logs will be written
to "ddebug_default" trace instance:
localhost ~ # cat <debugfs>/tracing/instances/ddebug_default/trace
The DYNAMIC_DEBUG_DST option adds destination information
in the dynamic debug control:
localhost ~ # cat <debugfs>/dynamic_debug/control
drivers/usb/core/usb-acpi.c:143 [usbcore]usb_acpi_set_power_state =p "acpi: power failed to be set\n" trace:usb
drivers/usb/core/usb-acpi.c:141 [usbcore]usb_acpi_set_power_state =p "acpi: power was set to %d\n" trace:usb
There are two modes of operation for dynamic debug logs destination
which can be selected during configuration stage of a kernel:
* Dynamic - callsites configuration and destination of debug logs
can be dynamically changed during runtime. This has slight impact
on performance because it requires SRCU to be used for synchronization.
* Static - callsites configuration and destination of debug logs
can be configured only from a kernel boot command line. This option
hides <debugfs>/dynamic_debug/control interface. It does not require
SRCU synchronization because of the static configuration. It also
provides convenient means to capture debug logs on production systems
for issues which are difficult to reproduce but on the other hand
do not expose <debugfs>/dynamic_debug/control interface for security
reasons.
For example to capture thunderbolt subsystem debug logs on production
system to trace instance named "thunderbolt":
* enable kernel configuration option CONFIG_DYNAMIC_DEBUG_CORE,
* enable kernel configuration option DYNAMIC_DEBUG_DST and set its
operation mode to Static.
* add the option to the thunderbolt's drivers/thunderbolt/Makefile
in order to compile in debug logs:
CFLAGS_nhi.o := -DDYNAMIC_DEBUG_MODULE
* compile the kernel and update it on a target device,
* finally append the entry to the kernel boot command line:
thunderbolt.dyndbg="dst trace:thunderbolt =p"
The DYNAMIC_DEBUG_DST option increases each callsite size by
the pointer size.
Signed-off-by: Łukasz Bartosik <[email protected]>
---
Changelog v1:
- initial creation
---
include/linux/dynamic_debug.h | 30 ++
lib/Kconfig.debug | 103 ++++++
lib/dynamic_debug.c | 659 +++++++++++++++++++++++++++++++++-
3 files changed, 786 insertions(+), 6 deletions(-)
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 41682278d2e8..8dee10f66e72 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -38,8 +38,18 @@ struct _ddebug {
#define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
#define _DPRINTK_FLAGS_INCL_TID (1<<4)
#define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ /*
+ * The 6th and 7th bits of the flags are used to determine
+ * destination of debug logs, currently supported destinations
+ * are defined in ddebug_dst_type enumeration
+ */
+#define _DPRINTK_FLAGS_DST_SHIFT 6
+#define _DPRINTK_FLAGS_DST_MASK (3<<_DPRINTK_FLAGS_DST_SHIFT)
+#endif
+
#define _DPRINTK_FLAGS_INCL_ANY \
(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
_DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\
_DPRINTK_FLAGS_INCL_SOURCENAME)
@@ -54,6 +64,10 @@ struct _ddebug {
struct static_key_false dd_key_false;
} key;
#endif
+
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ struct ddebug_dst *dst;
+#endif
} __attribute__((aligned(8)));
enum class_map_type {
@@ -280,10 +294,26 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
_dynamic_func_call(fmt, __dynamic_ibdev_dbg, \
dev, fmt, ##__VA_ARGS__)
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+
+void __print_hex_dump_dst(const struct _ddebug *descriptor, const char *level,
+ const char *prefix_str, int prefix_type, int rowsize,
+ int groupsize, const void *buf, size_t len,
+ bool ascii);
+
+#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
+ groupsize, buf, len, ascii) \
+ _dynamic_func_call(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
+ __print_hex_dump_dst, \
+ KERN_DEBUG, prefix_str, prefix_type, \
+ rowsize, groupsize, buf, len, ascii)
+#else
+
#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
groupsize, buf, len, ascii) \
_dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
print_hex_dump, \
KERN_DEBUG, prefix_str, prefix_type, \
rowsize, groupsize, buf, len, ascii)
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f0c30ad83f22..4abfa126bde5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -188,6 +188,109 @@ config DYNAMIC_DEBUG_CORE
the case of embedded system where the kernel image size is
sensitive for people.
+menuconfig DYNAMIC_DEBUG_DST
+ bool "Enable selection of dynamic debug logs destination"
+ depends on DYNAMIC_DEBUG_CORE
+ select TRACING
+ help
+ Add support for selecting destination of dynamic debug logs.
+ When enabled it allows to configure destination of each callsite
+ individually. The option adds a framework (described by struct
+ ddebug_dst_ops) which provides flexible means to add a new destination
+ for debug logs. On top of the framework support for trace instance as
+ destination is added. By default destination of debug logs is syslog.
+ Syslog can be set explicitly by using dst keyword or is selected by
+ default when dst keyword is omitted, for example:
+
+ localhost ~ # echo -n "module usbcore dst syslog =p" >
+ <debugfs>/dynamic_debug/control
+ or
+
+ localhost ~ # echo -n "module usbcore =p" >
+ <debugfs>/dynamic_debug/control
+
+ To enable a trace instance as a destination of debug logs a user
+ can for example issue the command:
+
+ localhost ~ # echo -n "module usbcore dst trace:usb =p" >
+ <debugfs>/dynamic_debug/control
+
+ The command will write debug logs from usbcore subsystem to the
+ trace instance named "usb" and the captured logs can be read
+ later with the command:
+
+ localhost ~ # cat <debugfs>/tracing/instances/usb/trace
+
+ When trace instance name is omitted the debug logs will be written
+ to "ddebug_default" trace instance:
+
+ localhost ~ # cat <debugfs>/tracing/instances/ddebug_default/trace
+
+ The DYNAMIC_DEBUG_DST option adds destination information
+ in the dynamic debug control:
+
+ localhost ~ # cat <debugfs>/dynamic_debug/control
+ # filename:lineno [module]function flags format destination
+ drivers/usb/core/usb-acpi.c:143 [usbcore]usb_acpi_set_power_state =p "acpi: power failed to be set\n" trace:usb
+ drivers/usb/core/usb-acpi.c:141 [usbcore]usb_acpi_set_power_state =p "acpi: power was set to %d\n" trace:usb
+
+ The DYNAMIC_DEBUG_DST option increases each callsite size by
+ the pointer size.
+
+if DYNAMIC_DEBUG_DST
+
+choice
+ prompt "Destination mode"
+ default DYNAMIC_DEBUG_DST_DYNAMIC
+ help
+ There are two modes of operation for dynamic debug logs destination
+ which can be selected during configuration stage of a kernel:
+
+ * Dynamic - callsites configuration and destination of debug logs
+ can be dynamically changed during runtime. This has slight impact
+ on performance because it requires SRCU to be used for synchronization.
+
+ * Static - callsites configuration and destination of debug logs
+ can be configured only from a kernel boot command line. This option
+ hides <debugfs>/dynamic_debug/control interface. It does not require
+ SRCU synchronization because of the static configuration. It also
+ provides convenient means to capture debug logs on production systems
+ for issues which are difficult to reproduce and at the same time
+ do not expose <debugfs>/dynamic_debug/control interface for security
+ reasons.
+
+config DYNAMIC_DEBUG_DST_DYNAMIC
+ bool "Dynamic"
+ select SRCU
+ help
+ Callsites configuration and destination of debug logs can be
+ dynamically changed during runtime. This has slight impact on
+ performance because it requires SRCU to be used for synchronization.
+
+ The interface which allows to dynamically change configuration is:
+ <debugfs>/dynamic_debug/control
+
+config DYNAMIC_DEBUG_DST_STATIC
+ bool "Static"
+ help
+ Callsites configuration and destination of debug logs can be
+ configured only from a kernel boot command line. This option
+ does not expose <debugfs>/dynamic_debug/control interface and
+ therefore does not require SRCU synchronization. It also provides
+ convenient means to capture debug logs on production systems
+ for issues which are difficult to reproduce and at the same time
+ do not expose <debugfs>/dynamic_debug/control interface
+ for security reasons.
+
+ For example to configure usbcore module to output its debug logs
+ to trace instance named "usb" the following line would have to be
+ appended to a kernel boot command line:
+
+ usbcore.dyndbg="dst trace:usb =p"
+
+endchoice
+endif
+
config SYMBOLIC_ERRNAME
bool "Support symbolic error names in printf"
default y if PRINTK
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 009f2ead09c1..0cb69b553554 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -44,6 +44,87 @@ extern struct _ddebug __stop___dyndbg[];
extern struct ddebug_class_map __start___dyndbg_classes[];
extern struct ddebug_class_map __stop___dyndbg_classes[];
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+#include <linux/trace.h>
+
+enum ddebug_dst_type {
+ DDEBUG_DST_SYSLOG, /* destination syslog */
+ DDEBUG_DST_TRACE, /* destination trace instance */
+ /* DDEBUG_DST_NOT_USED_1 */
+ /* DDEBUG_DST_NOT_USED_2 */
+ DDEBUG_DST_MAX,
+};
+
+/*
+ * The structure holds information related to debug logs output
+ * destination other than syslog. Currently only trace instance
+ * is supported.
+ */
+struct ddebug_dst {
+ unsigned int use_count;
+ enum ddebug_dst_type type;
+ struct ddebug_dst_ops *ops;
+ union {
+ struct {
+ const char *instance;
+ struct trace_array *arr;
+ } trace;
+ };
+};
+
+#ifndef CONFIG_DYNAMIC_DEBUG_DST_STATIC
+
+/*
+ * Instead of slicing code with #ifdefs create
+ * wrappers around SRCU functions we use
+ */
+DEFINE_STATIC_SRCU(dst_srcu);
+
+#define dd_srcu_dereference(p, ssp) srcu_dereference(p, ssp)
+#define dd_rcu_assign_pointer(p, v) rcu_assign_pointer(p, v)
+
+static inline int dd_srcu_read_lock(struct srcu_struct *ssp)
+{
+ return srcu_read_lock(ssp);
+}
+
+static inline void dd_srcu_read_unlock(struct srcu_struct *ssp, int idx)
+{
+ return srcu_read_unlock(ssp, idx);
+}
+
+static void dd_synchronize_srcu(struct srcu_struct *ssp)
+{
+ synchronize_srcu(ssp);
+}
+
+#else
+
+struct dd_srcu_struct {
+};
+
+static struct dd_srcu_struct dst_srcu;
+
+#define dd_srcu_dereference(p, ssp) (p)
+#define dd_rcu_assign_pointer(p, v) ((p) = (v))
+
+static inline int dd_srcu_read_lock(struct dd_srcu_struct *ssp)
+{
+ return 0;
+}
+
+static inline void dd_srcu_read_unlock(struct dd_srcu_struct *ssp, int idx)
+{
+}
+
+void dd_synchronize_srcu(struct dd_srcu_struct *ssp)
+{
+}
+
+#endif /* CONFIG_DYNAMIC_DEBUG_DST_STATIC */
+
+#endif /* CONFIG_DYNAMIC_DEBUG_DST */
+
struct ddebug_table {
struct list_head link, maps;
const char *mod_name;
@@ -58,6 +139,7 @@ struct ddebug_query {
const char *format;
const char *class_string;
unsigned int first_lineno, last_lineno;
+ struct ddebug_dst *dst;
};
struct ddebug_iter {
@@ -126,10 +208,402 @@ do { \
#define v3pr_info(fmt, ...) vnpr_info(3, fmt, ##__VA_ARGS__)
#define v4pr_info(fmt, ...) vnpr_info(4, fmt, ##__VA_ARGS__)
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+
+static inline enum ddebug_dst_type get_dst_type(const struct _ddebug *dp)
+{
+ return (dp->flags & _DPRINTK_FLAGS_DST_MASK) >>
+ _DPRINTK_FLAGS_DST_SHIFT;
+}
+
+static void set_dst_type(unsigned int *newflags,
+ enum ddebug_dst_type type)
+{
+ *newflags &= ~_DPRINTK_FLAGS_DST_MASK;
+ *newflags |= (type << _DPRINTK_FLAGS_DST_SHIFT);
+}
+
+#define DST_SYSLOG_STR "syslog"
+#define DST_TRACE_STR "trace"
+#define DST_INVALID_STR "invalid"
+#define DST_DST_STR "destination"
+
+struct dstbuf {
+#define DST_DESC_MAX_SIZE 32
+ char buf[DST_DESC_MAX_SIZE];
+};
+
+static char *describe_dst_type(const enum ddebug_dst_type type)
+{
+ switch (type) {
+ case DDEBUG_DST_SYSLOG:
+ return DST_SYSLOG_STR;
+ case DDEBUG_DST_TRACE:
+ return DST_TRACE_STR;
+ default:
+ return DST_INVALID_STR;
+ }
+}
+
+static char *ddebug_describe_dst(const struct ddebug_dst *dst,
+ struct dstbuf *dstb)
+{
+ if (!dst)
+ return DST_SYSLOG_STR;
+
+ switch (dst->type) {
+ case DDEBUG_DST_SYSLOG:
+ return DST_SYSLOG_STR;
+ case DDEBUG_DST_TRACE:
+ snprintf(dstb->buf, DST_DESC_MAX_SIZE, "%s:%s",
+ DST_TRACE_STR, dst->trace.instance);
+ break;
+ default:
+ return DST_INVALID_STR;
+ }
+
+ return dstb->buf;
+}
+
+static char *ddebug_describe_dst_dp(struct _ddebug *dp, struct dstbuf *dstb)
+{
+ if (!get_dst_type(dp))
+ return DST_SYSLOG_STR;
+
+ return ddebug_describe_dst(dp->dst, dstb);
+}
+
+/*
+ * struct ddebug_dst_ops - dynamic debug logs destination operations
+ *
+ * @parse_and_init - parses and initializes destination
+ * @has_changed - checks whether destination has changed for a callsite
+ * @print - prints message for a callsite
+ * @dev_print - prints device message for a callsite
+ * @free - frees memory associated with a destination
+ */
+struct ddebug_dst_ops {
+ int (*parse_and_init)(struct ddebug_query *query, const char *arg);
+ bool (*has_changed)(const struct ddebug_query *query,
+ const struct _ddebug *dp);
+ void (*print)(const struct ddebug_dst *dst, const char *fmt, ...);
+ void (*dev_print)(const struct ddebug_dst *dst,
+ const struct device *dev, const char *fmt, ...);
+ void (*free)(struct ddebug_dst *dst, int arg);
+};
+
+static int tr_parse_and_init(struct ddebug_query *query,
+ const char *arg)
+{
+ char *instance = strchr(arg, ':');
+ struct trace_array *arr;
+ int ret = -EINVAL;
+
+ if (instance)
+ instance++;
+ else
+ instance = "ddebug_default";
+
+ arr = trace_array_get_by_name(instance);
+ if (!arr)
+ goto err;
+
+ ret = trace_array_init_printk(arr);
+ if (ret)
+ goto err_trace;
+
+ query->dst->trace.instance = kstrdup(instance, GFP_KERNEL);
+ if (!query->dst->trace.instance)
+ goto err_trace;
+
+ query->dst->trace.arr = arr;
+
+ v3pr_info("parsed trace dst: instance name=%s, arrray=%p\n",
+ query->dst->trace.instance, arr);
+ return 0;
+err_trace:
+ trace_array_put(arr);
+ trace_array_destroy(arr);
+err:
+ return ret;
+}
+
+bool tr_has_changed(const struct ddebug_query *query, const struct _ddebug *dp)
+{
+ if (!dp->dst)
+ return true;
+ else if (dp->dst->type != DDEBUG_DST_TRACE)
+ return true;
+
+ return query->dst->trace.arr != dp->dst->trace.arr;
+}
+
+__printf(2, 3)
+static void tr_print(const struct ddebug_dst *dst, const char *fmt, ...)
+{
+ va_list args;
+ struct va_format vaf;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ trace_array_printk(dst->trace.arr, _THIS_IP_, "%pV", &vaf);
+
+ va_end(args);
+}
+
+__printf(3, 4)
+static void tr_dev_print(const struct ddebug_dst *dst,
+ const struct device *dev, const char *fmt, ...)
+{
+ va_list args;
+ struct va_format vaf;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ trace_array_printk(dst->trace.arr, _THIS_IP_, "%pV", &vaf);
+ va_end(args);
+}
+
+static void tr_free(struct ddebug_dst *dst, int tr_arr_destroy)
+{
+ WARN_ON(!dst);
+
+ v2pr_info("freed dst: type=trace, instance=%s\n", dst->trace.instance);
+
+ kfree(dst->trace.instance);
+ dst->trace.instance = NULL;
+
+ trace_array_put(dst->trace.arr);
+ /*
+ * destroy trace instance in case when query was not applied to any
+ * of the callsites, otherwise don't try to destroy it because it will
+ * be removed from sysfs together with captured debug logs, let a user
+ * to delete it manually later at convenient time
+ */
+ if (tr_arr_destroy)
+ trace_array_destroy(dst->trace.arr);
+ dst->trace.arr = NULL;
+}
+
+bool syslog_has_changed(const struct ddebug_query *query,
+ const struct _ddebug *dp)
+{
+ if (!dp->dst)
+ return false;
+
+ return dp->dst->type != DDEBUG_DST_SYSLOG;
+}
+
+static void syslog_free(struct ddebug_dst *dst, int arg)
+{
+ v2pr_info("freed dst: type=syslog\n");
+}
+
+static struct ddebug_dst_ops dst_ops[] = {
+ // syslog destination ops
+ {
+ .parse_and_init = NULL,
+ .has_changed = syslog_has_changed,
+ .print = NULL,
+ .dev_print = NULL,
+ .free = syslog_free,
+ },
+ // trace destination ops
+ {
+ .parse_and_init = tr_parse_and_init,
+ .has_changed = tr_has_changed,
+ .print = tr_print,
+ .dev_print = tr_dev_print,
+ .free = tr_free,
+ },
+};
+
+static int dst_parse_and_init(struct ddebug_query *query, const char *arg)
+{
+ int ret = -EINVAL;
+
+ if (!arg)
+ goto err;
+
+ if (query->dst)
+ goto err;
+
+ query->dst = kzalloc(sizeof(struct ddebug_dst), GFP_KERNEL);
+ if (!query->dst) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (!strcmp(arg, "syslog")) {
+ query->dst->type = DDEBUG_DST_SYSLOG;
+ query->dst->ops = &dst_ops[DDEBUG_DST_SYSLOG];
+ } else if (!strcmp(arg, "trace") ||
+ !strncmp(arg, "trace:", 6)) {
+ query->dst->type = DDEBUG_DST_TRACE;
+ query->dst->ops = &dst_ops[DDEBUG_DST_TRACE];
+ } else
+ goto err_mem;
+
+ if (query->dst->ops->parse_and_init) {
+ ret = query->dst->ops->parse_and_init(query, arg);
+ if (ret)
+ goto err_mem;
+ }
+
+ v3pr_info("parsed dst: type=%s\n", describe_dst_type(query->dst->type));
+ return 0;
+err_mem:
+ kfree(query->dst);
+err:
+ return ret;
+}
+
+static void dst_apply_change(const struct ddebug_query *query,
+ struct _ddebug *dp)
+{
+ /* store old destination pointer */
+ struct ddebug_dst *old_dst = dp->dst;
+ struct ddebug_dst *new_dst;
+
+ /* increase use count */
+ query->dst->use_count++;
+
+ /*
+ * syslog destination does not require additional description
+ * however we use it to simplify control path processing
+ */
+ new_dst = (query->dst->type == DDEBUG_DST_SYSLOG) ? NULL : query->dst;
+
+ /* update destination pointer */
+ dd_rcu_assign_pointer(dp->dst, new_dst);
+
+ if (!old_dst)
+ return;
+
+ old_dst->use_count--;
+ /* check if old destination pointer is still being used */
+ if (!old_dst->use_count) {
+ /*
+ * wait for all read rcu critical sections
+ * in progress to finish
+ */
+ dd_synchronize_srcu(&dst_srcu);
+ /* call destination specific free function */
+ old_dst->ops->free(old_dst, 0);
+ /* free old destination pointer */
+ kfree(old_dst);
+ }
+}
+
+static void dst_try_free(struct ddebug_query *query)
+{
+ if (!query)
+ return;
+
+ /* free destination if it wasn't applied to any callsite */
+ if (!query->dst->use_count) {
+ if (query->dst->ops->free)
+ query->dst->ops->free(query->dst, 1);
+ /*
+ * for syslog we always free its destination because
+ * it is used only to simplify control path processing
+ */
+ if (query->dst->type)
+ v2pr_info("freed dst: didn't apply to any of the callsites\n");
+ kfree(query->dst);
+ query->dst = NULL;
+ }
+}
+
+void __print_hex_dump_dst(const struct _ddebug *descriptor, const char *level,
+ const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize, const void *buf,
+ size_t len, bool ascii)
+{
+ if (get_dst_type(descriptor)) {
+ const u8 *ptr = buf;
+ struct ddebug_dst *dst;
+ int i, linelen, remaining = len, idx;
+ unsigned char linebuf[32 * 3 + 2 + 32 + 1];
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ idx = dd_srcu_read_lock(&dst_srcu);
+ dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
+ WARN_ON(!dst);
+ for (i = 0; i < len; i += rowsize) {
+ linelen = min(remaining, rowsize);
+ remaining -= rowsize;
+
+ hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+ linebuf, sizeof(linebuf), ascii);
+
+ switch (prefix_type) {
+ case DUMP_PREFIX_ADDRESS:
+ dst->ops->print(descriptor->dst, "%s%s%p: %s\n",
+ level, prefix_str, ptr + i, linebuf);
+ break;
+ case DUMP_PREFIX_OFFSET:
+ dst->ops->print(descriptor->dst, "%s%s%.8x: %s\n",
+ level, prefix_str, i, linebuf);
+ break;
+ default:
+ dst->ops->print(descriptor->dst, "%s%s%s\n",
+ level, prefix_str, linebuf);
+ break;
+ }
+ }
+
+ dd_srcu_read_unlock(&dst_srcu, idx);
+ } else
+ print_hex_dump(level, prefix_str, prefix_type, rowsize,
+ groupsize, buf, len, ascii);
+}
+EXPORT_SYMBOL(__print_hex_dump_dst);
+
+#else
+
+struct ddebug_dst {
+};
+
+struct dstbuf {
+};
+
+static inline unsigned int get_dst_type(const struct _ddebug *dp)
+{
+ return 0;
+}
+
+static char *ddebug_describe_dst(const struct ddebug_dst *dst,
+ struct dstbuf *dstb)
+{
+ return "";
+}
+
+static char *ddebug_describe_dst_dp(struct _ddebug *dp,
+ struct dstbuf *dstb)
+{
+ return "";
+}
+
+static void dst_try_free(struct ddebug_query *query)
+{
+}
+
+#define DST_DST_STR ""
+
+#endif /* CONFIG_DYNAMIC_DEBUG_DST */
+
static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
{
/* trim any trailing newlines */
int fmtlen = 0;
+ struct dstbuf dstb;
if (query->format) {
fmtlen = strlen(query->format);
@@ -137,13 +611,14 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
fmtlen--;
}
- v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s\n",
+ v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s %s=%s\n",
msg,
query->function ?: "",
query->filename ?: "",
query->module ?: "",
fmtlen, query->format ?: "",
- query->first_lineno, query->last_lineno, query->class_string);
+ query->first_lineno, query->last_lineno, query->class_string,
+ DST_DST_STR, ddebug_describe_dst(query->dst, &dstb));
}
static struct ddebug_class_map *ddebug_find_valid_class(struct ddebug_table const *dt,
@@ -178,8 +653,12 @@ static int ddebug_change(const struct ddebug_query *query,
unsigned int newflags;
unsigned int nfound = 0;
struct flagsbuf fbuf, nbuf;
+ struct dstbuf dstbc, dstbn;
struct ddebug_class_map *map = NULL;
int __outvar valid_class;
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ struct ddebug_dst_ops *dst_ops = query->dst->ops;
+#endif
/* search for matching ddebugs */
mutex_lock(&ddebug_lock);
@@ -243,8 +722,19 @@ static int ddebug_change(const struct ddebug_query *query,
nfound++;
newflags = (dp->flags & modifiers->mask) | modifiers->flags;
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ /* set destination type in newflags */
+ set_dst_type(&newflags, query->dst->type);
+ if (newflags == dp->flags &&
+ (!dst_ops->has_changed ||
+ !dst_ops->has_changed(query, dp)))
+ /* nothing changed */
+ continue;
+#else
if (newflags == dp->flags)
continue;
+#endif
+
#ifdef CONFIG_JUMP_LABEL
if (dp->flags & _DPRINTK_FLAGS_PRINT) {
if (!(newflags & _DPRINTK_FLAGS_PRINT))
@@ -253,14 +743,24 @@ static int ddebug_change(const struct ddebug_query *query,
static_branch_enable(&dp->key.dd_key_true);
}
#endif
- v4pr_info("changed %s:%d [%s]%s %s => %s\n",
+ v4pr_info("changed %s:%d [%s]%s %s %s => %s %s\n",
trim_prefix(dp->filename), dp->lineno,
dt->mod_name, dp->function,
ddebug_describe_flags(dp->flags, &fbuf),
- ddebug_describe_flags(newflags, &nbuf));
+ ddebug_describe_dst_dp(dp, &dstbc),
+ ddebug_describe_flags(newflags, &nbuf),
+ ddebug_describe_dst(query->dst, &dstbn));
dp->flags = newflags;
+
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ if (dst_ops->has_changed &&
+ dst_ops->has_changed(query, dp))
+ /* destination changed, apply it */
+ dst_apply_change(query, dp);
+#endif
}
}
+
mutex_unlock(&ddebug_lock);
if (!nfound && verbose)
@@ -456,6 +956,12 @@ static int ddebug_parse_query(char *words[], int nwords,
return -EINVAL;
} else if (!strcmp(keyword, "class")) {
rc = check_set(&query->class_string, arg, "class");
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ } else if (!strcmp(keyword, "dst")) {
+ rc = dst_parse_and_init(query, arg);
+ if (rc)
+ return rc;
+#endif
} else {
pr_err("unknown keyword \"%s\"\n", keyword);
return -EINVAL;
@@ -463,6 +969,15 @@ static int ddebug_parse_query(char *words[], int nwords,
if (rc)
return rc;
}
+
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ /* set destination to syslog by default if it was not explicitly set */
+ if (!query->dst) {
+ rc = dst_parse_and_init(query, "syslog");
+ if (rc)
+ return rc;
+ }
+#endif
if (!query->module && modname)
/*
* support $modname.dyndbg=<multiple queries>, when
@@ -549,11 +1064,14 @@ static int ddebug_exec_query(char *query_string, const char *modname)
}
if (ddebug_parse_query(words, nwords-1, &query, modname)) {
pr_err("query parse failed\n");
+ dst_try_free(&query);
return -EINVAL;
}
+
/* actually go and implement the change */
nfound = ddebug_change(&query, &modifiers);
vpr_info_dq(&query, nfound ? "applied" : "no-match");
+ dst_try_free(&query);
return nfound;
}
@@ -856,6 +1374,10 @@ static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
{
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ struct ddebug_dst *dst;
+ int idx;
+#endif
va_list args;
struct va_format vaf;
char buf[PREFIX_SIZE] = "";
@@ -868,6 +1390,21 @@ void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ if (!get_dst_type(descriptor))
+ goto syslog;
+
+ idx = dd_srcu_read_lock(&dst_srcu);
+ dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
+ WARN_ON(!dst);
+ dst->ops->print(dst, "%s%pV", dynamic_emit_prefix(descriptor, buf),
+ &vaf);
+
+ dd_srcu_read_unlock(&dst_srcu, idx);
+ va_end(args);
+ return;
+syslog:
+#endif
printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
va_end(args);
@@ -877,6 +1414,10 @@ EXPORT_SYMBOL(__dynamic_pr_debug);
void __dynamic_dev_dbg(struct _ddebug *descriptor,
const struct device *dev, const char *fmt, ...)
{
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ struct ddebug_dst *dst;
+ int idx;
+#endif
struct va_format vaf;
va_list args;
@@ -888,6 +1429,29 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor,
vaf.fmt = fmt;
vaf.va = &args;
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ if (!get_dst_type(descriptor))
+ goto syslog;
+
+ idx = dd_srcu_read_lock(&dst_srcu);
+ dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
+ WARN_ON(!dst);
+ if (!dev) {
+ dst->ops->print(dst, "(NULL device *): %pV", &vaf);
+ } else {
+ char buf[PREFIX_SIZE] = "";
+
+ dst->ops->dev_print(dst, dev, "%s%s %s: %pV",
+ dynamic_emit_prefix(descriptor, buf),
+ dev_driver_string(dev), dev_name(dev),
+ &vaf);
+ }
+
+ dd_srcu_read_unlock(&dst_srcu, idx);
+ va_end(args);
+ return;
+syslog:
+#endif
if (!dev) {
printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
} else {
@@ -908,6 +1472,10 @@ EXPORT_SYMBOL(__dynamic_dev_dbg);
void __dynamic_netdev_dbg(struct _ddebug *descriptor,
const struct net_device *dev, const char *fmt, ...)
{
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ struct ddebug_dst *dst;
+ int idx;
+#endif
struct va_format vaf;
va_list args;
@@ -919,6 +1487,35 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
vaf.fmt = fmt;
vaf.va = &args;
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ if (!get_dst_type(descriptor))
+ goto syslog;
+
+ idx = dd_srcu_read_lock(&dst_srcu);
+ dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
+ WARN_ON(!dst);
+ if (dev && dev->dev.parent) {
+ char buf[PREFIX_SIZE] = "";
+
+ dst->ops->dev_print(dst, dev->dev.parent,
+ "%s%s %s %s%s: %pV",
+ dynamic_emit_prefix(descriptor, buf),
+ dev_driver_string(dev->dev.parent),
+ dev_name(dev->dev.parent),
+ netdev_name(dev), netdev_reg_state(dev),
+ &vaf);
+ } else if (dev) {
+ dst->ops->print(dst, "%s%s: %pV", netdev_name(dev),
+ netdev_reg_state(dev), &vaf);
+ } else {
+ dst->ops->print(dst, "(NULL net_device *): %pV", &vaf);
+ }
+
+ dd_srcu_read_unlock(&dst_srcu, idx);
+ va_end(args);
+ return;
+syslog:
+#endif
if (dev && dev->dev.parent) {
char buf[PREFIX_SIZE] = "";
@@ -947,6 +1544,10 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg);
void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
const struct ib_device *ibdev, const char *fmt, ...)
{
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ struct ddebug_dst *dst;
+ int idx;
+#endif
struct va_format vaf;
va_list args;
@@ -955,6 +1556,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
vaf.fmt = fmt;
vaf.va = &args;
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ if (!get_dst_type(descriptor))
+ goto syslog;
+
+ idx = dd_srcu_read_lock(&dst_srcu);
+ dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
+ WARN_ON(!dst);
+ if (ibdev && ibdev->dev.parent) {
+ char buf[PREFIX_SIZE] = "";
+
+ dst->ops->dev_print(dst, ibdev->dev.parent,
+ "%s%s %s %s: %pV",
+ dynamic_emit_prefix(descriptor, buf),
+ dev_driver_string(ibdev->dev.parent),
+ dev_name(ibdev->dev.parent),
+ dev_name(&ibdev->dev),
+ &vaf);
+ } else if (ibdev) {
+ dst->ops->print(dst, "%s: %pV", dev_name(&ibdev->dev), &vaf);
+ } else {
+ dst->ops->print(dst, "(NULL ib_device): %pV", &vaf);
+ }
+
+ dd_srcu_read_unlock(&dst_srcu, idx);
+ va_end(args);
+ return;
+syslog:
+#endif
if (ibdev && ibdev->dev.parent) {
char buf[PREFIX_SIZE] = "";
@@ -1127,11 +1756,16 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
struct ddebug_iter *iter = m->private;
struct _ddebug *dp = p;
struct flagsbuf flags;
+ struct dstbuf dstb;
char const *class;
if (p == SEQ_START_TOKEN) {
- seq_puts(m,
- "# filename:lineno [module]function flags format\n");
+ seq_puts(m, "# filename:lineno [module]function flags format");
+#if defined CONFIG_DYNAMIC_DEBUG_DST
+ seq_puts(m, " destination\n");
+#else
+ seq_puts(m, "\n");
+#endif
return 0;
}
@@ -1149,6 +1783,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
else
seq_printf(m, " class unknown, _id:%d", dp->class_id);
}
+ seq_printf(m, " %s", ddebug_describe_dst_dp(dp, &dstb));
seq_puts(m, "\n");
return 0;
@@ -1351,6 +1986,9 @@ static void ddebug_remove_all_tables(void)
static __initdata int ddebug_init_success;
+
+#ifndef CONFIG_DYNAMIC_DEBUG_DST_STATIC
+
static int __init dynamic_debug_init_control(void)
{
struct proc_dir_entry *procfs_dir;
@@ -1374,6 +2012,15 @@ static int __init dynamic_debug_init_control(void)
return 0;
}
+#else
+
+static int __init dynamic_debug_init_control(void)
+{
+ return 0;
+}
+
+#endif
+
static int __init dynamic_debug_init(void)
{
struct _ddebug *iter, *iter_mod_start;
--
2.42.0.459.ge4e396fd5e-goog
On Fri, Sep 15, 2023 at 9:49 AM Łukasz Bartosik <[email protected]> wrote:
>
> Add support for selection of dynamic debug logs destination.
> When enabled it allows to configure destination of each callsite
> individually. The option adds a framework (described by struct
> ddebug_dst_ops) which provides flexible means to add a new destination
> for debug logs. On top of the framework support for trace instance as
> destination is added. By default destination of debug logs is syslog.
> Syslog can be set explicitly by using dst keyword or is selected by
> default when dst keyword is omitted, for example:
A while back,
Cc: Vincent Whitchurch <[email protected]>
proposed these patches:
https://lore.kernel.org/lkml/[email protected]/
his approach adds a single flag, +x or (my pref) +T
which sends the pr_debug to tracefs, independent of +p.
Steve Rostedt offered feedback on one of the versions,
and one could read that as provisional acceptance.
Ive worked his patchset since, it is here:
https://github.com/jimc/linux/tree/dd-trc-11
(it has some unrelated bits too)
but it sits atop another patchset:
https://lore.kernel.org/lkml/[email protected]/
or for git remote add
https://github.com/jimc/linux/tree/dd-fix-5i
syslog and/or tracefs seems sufficient, do you have a 3rd destination ?
thanks
pt., 15 wrz 2023 o 20:02 <[email protected]> napisał(a):
>
> On Fri, Sep 15, 2023 at 9:49 AM Łukasz Bartosik <[email protected]> wrote:
> >
> > Add support for selection of dynamic debug logs destination.
> > When enabled it allows to configure destination of each callsite
> > individually. The option adds a framework (described by struct
> > ddebug_dst_ops) which provides flexible means to add a new destination
> > for debug logs. On top of the framework support for trace instance as
> > destination is added. By default destination of debug logs is syslog.
> > Syslog can be set explicitly by using dst keyword or is selected by
> > default when dst keyword is omitted, for example:
>
> A while back,
> Cc: Vincent Whitchurch <[email protected]>
> proposed these patches:
> https://lore.kernel.org/lkml/[email protected]/
>
> his approach adds a single flag, +x or (my pref) +T
> which sends the pr_debug to tracefs, independent of +p.
> Steve Rostedt offered feedback on one of the versions,
> and one could read that as provisional acceptance.
>
> Ive worked his patchset since, it is here:
> https://github.com/jimc/linux/tree/dd-trc-11
> (it has some unrelated bits too)
>
> but it sits atop another patchset:
> https://lore.kernel.org/lkml/[email protected]/
> or for git remote add
> https://github.com/jimc/linux/tree/dd-fix-5i
>
I looked through the patches you pointed me to. The solution with
+T/+x is elegant and tailored to specific needs.
Our rationale to write pr_debug/dev_dbg logs to tracefs is similar to
Vincent's to aid us in debugging.
Unfortunately the solution with +T/+x does not cover our use case.
Our use case is different. We experience issues reported by Chromebook
users which are difficult to reproduce
that's why we would like to enable debug logs to be written to tracefs
on productions systems.
When users experiences an issues and sends us a feedback report the
debug logs written to tracefs would be
attached to the feedback report. We would like to write debug logs to
a separate trace instances based on
the subsystem/driver. Our initial areas of interest/issues are usbcore
and thunderbolt drivers/subsystems.
More may come in the future.
With my proposal this could be achieved for the thunderbolt subsystem
with the following steps:
* enable kernel configuration option CONFIG_DYNAMIC_DEBUG_CORE,
* enable kernel configuration option DYNAMIC_DEBUG_DST and set its
operation mode to Static.
* add the option to the thunderbolt's drivers/thunderbolt/Makefile
in order to compile in debug logs:
CFLAGS_nhi.o := -DDYNAMIC_DEBUG_MODULE
* compile the kernel and update it on a target device,
* finally append the entry to the kernel boot command line:
thunderbolt.dyndbg="dst trace:thunderbolt =p"
Both the solutions (I mean +x/+T and mine proposal) have pros and cons.
Maybe both could coexist together, +x/+T by default and this patch as
configurable option
which adds more flexibility ?
>
> syslog and/or tracefs seems sufficient, do you have a 3rd destination ?
>
I don't have a use case for the third destination, but for example
adding socket destination to my proposal would be relatively fast and
easy (that would be for limited use only after a network is up and
running).
Thanks,
Lukasz
> thanks
pon., 18 wrz 2023 o 09:20 Łukasz Bartosik <[email protected]> napisał(a):
>
> pt., 15 wrz 2023 o 20:02 <[email protected]> napisał(a):
> >
> > On Fri, Sep 15, 2023 at 9:49 AM Łukasz Bartosik <[email protected]> wrote:
> > >
> > > Add support for selection of dynamic debug logs destination.
> > > When enabled it allows to configure destination of each callsite
> > > individually. The option adds a framework (described by struct
> > > ddebug_dst_ops) which provides flexible means to add a new destination
> > > for debug logs. On top of the framework support for trace instance as
> > > destination is added. By default destination of debug logs is syslog.
> > > Syslog can be set explicitly by using dst keyword or is selected by
> > > default when dst keyword is omitted, for example:
> >
> > A while back,
> > Cc: Vincent Whitchurch <[email protected]>
> > proposed these patches:
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > his approach adds a single flag, +x or (my pref) +T
> > which sends the pr_debug to tracefs, independent of +p.
> > Steve Rostedt offered feedback on one of the versions,
> > and one could read that as provisional acceptance.
> >
> > Ive worked his patchset since, it is here:
> > https://github.com/jimc/linux/tree/dd-trc-11
> > (it has some unrelated bits too)
> >
> > but it sits atop another patchset:
> > https://lore.kernel.org/lkml/[email protected]/
> > or for git remote add
> > https://github.com/jimc/linux/tree/dd-fix-5i
> >
>
> I looked through the patches you pointed me to. The solution with
> +T/+x is elegant and tailored to specific needs.
> Our rationale to write pr_debug/dev_dbg logs to tracefs is similar to
> Vincent's to aid us in debugging.
> Unfortunately the solution with +T/+x does not cover our use case.
>
> Our use case is different. We experience issues reported by Chromebook
> users which are difficult to reproduce
> that's why we would like to enable debug logs to be written to tracefs
> on productions systems.
> When users experiences an issues and sends us a feedback report the
> debug logs written to tracefs would be
> attached to the feedback report. We would like to write debug logs to
> a separate trace instances based on
> the subsystem/driver. Our initial areas of interest/issues are usbcore
> and thunderbolt drivers/subsystems.
> More may come in the future.
>
> With my proposal this could be achieved for the thunderbolt subsystem
> with the following steps:
> * enable kernel configuration option CONFIG_DYNAMIC_DEBUG_CORE,
> * enable kernel configuration option DYNAMIC_DEBUG_DST and set its
> operation mode to Static.
> * add the option to the thunderbolt's drivers/thunderbolt/Makefile
> in order to compile in debug logs:
> CFLAGS_nhi.o := -DDYNAMIC_DEBUG_MODULE
> * compile the kernel and update it on a target device,
> * finally append the entry to the kernel boot command line:
> thunderbolt.dyndbg="dst trace:thunderbolt =p"
>
> Both the solutions (I mean +x/+T and mine proposal) have pros and cons.
> Maybe both could coexist together, +x/+T by default and this patch as
> configurable option
> which adds more flexibility ?
>
Jim,
Do you have thoughts/comments about/to my proposal ?
Thanks,
Lukasz
> >
> > syslog and/or tracefs seems sufficient, do you have a 3rd destination ?
> >
>
> I don't have a use case for the third destination, but for example
> adding socket destination to my proposal would be relatively fast and
> easy (that would be for limited use only after a network is up and
> running).
>
> Thanks,
> Lukasz
>
> > thanks
On Mon, Sep 18, 2023 at 1:20 AM Łukasz Bartosik <[email protected]> wrote:
>
> pt., 15 wrz 2023 o 20:02 <[email protected]> napisał(a):
> >
> > On Fri, Sep 15, 2023 at 9:49 AM Łukasz Bartosik <[email protected]> wrote:
> > >
hi Lukasz,
sorry my kernel-time has been in my own trees.
What I dont understand is why +T is insufficient.
IIUC, tracefs is intended for production use.
thats why each event can be enabled / disabled
- to select and minimize whats traced, and not impact the system
and +T can forward all pr_debugs to trace,
(by 1-few trace events defined similarly to others)
or very few, giving yet another selection mechanism
to choose or eliminate specific pr-debugs and reduce traffic to
interesting stuff.
Once your debug is in the trace-buf,
shouldnt user-space be deciding what to do with it ?
a smart daemon could leverage tracefs to good effect.
IMO the main value of +T is that it allows feeding existing pr_debugs
into the place where other trace-data is already integrated and managed.
At this point, I dont see any extra destination handling as prudent.
Jim
> > > Add support for selection of dynamic debug logs destination.
> > > When enabled it allows to configure destination of each callsite
> > > individually. The option adds a framework (described by struct
> > > ddebug_dst_ops) which provides flexible means to add a new destination
> > > for debug logs. On top of the framework support for trace instance as
> > > destination is added. By default destination of debug logs is syslog.
> > > Syslog can be set explicitly by using dst keyword or is selected by
> > > default when dst keyword is omitted, for example:
> >
> > A while back,
> > Cc: Vincent Whitchurch <[email protected]>
> > proposed these patches:
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > his approach adds a single flag, +x or (my pref) +T
> > which sends the pr_debug to tracefs, independent of +p.
> > Steve Rostedt offered feedback on one of the versions,
> > and one could read that as provisional acceptance.
> >
> > Ive worked his patchset since, it is here:
> > https://github.com/jimc/linux/tree/dd-trc-11
> > (it has some unrelated bits too)
> >
> > but it sits atop another patchset:
> > https://lore.kernel.org/lkml/[email protected]/
> > or for git remote add
> > https://github.com/jimc/linux/tree/dd-fix-5i
> >
>
> I looked through the patches you pointed me to. The solution with
> +T/+x is elegant and tailored to specific needs.
> Our rationale to write pr_debug/dev_dbg logs to tracefs is similar to
> Vincent's to aid us in debugging.
> Unfortunately the solution with +T/+x does not cover our use case.
>
> Our use case is different. We experience issues reported by Chromebook
> users which are difficult to reproduce
> that's why we would like to enable debug logs to be written to tracefs
> on productions systems.
> When users experiences an issues and sends us a feedback report the
> debug logs written to tracefs would be
> attached to the feedback report. We would like to write debug logs to
> a separate trace instances based on
> the subsystem/driver. Our initial areas of interest/issues are usbcore
> and thunderbolt drivers/subsystems.
> More may come in the future.
>
> With my proposal this could be achieved for the thunderbolt subsystem
> with the following steps:
> * enable kernel configuration option CONFIG_DYNAMIC_DEBUG_CORE,
> * enable kernel configuration option DYNAMIC_DEBUG_DST and set its
> operation mode to Static.
> * add the option to the thunderbolt's drivers/thunderbolt/Makefile
> in order to compile in debug logs:
> CFLAGS_nhi.o := -DDYNAMIC_DEBUG_MODULE
> * compile the kernel and update it on a target device,
> * finally append the entry to the kernel boot command line:
> thunderbolt.dyndbg="dst trace:thunderbolt =p"
>
> Both the solutions (I mean +x/+T and mine proposal) have pros and cons.
> Maybe both could coexist together, +x/+T by default and this patch as
> configurable option
> which adds more flexibility ?
>
> >
> > syslog and/or tracefs seems sufficient, do you have a 3rd destination ?
> >
>
> I don't have a use case for the third destination, but for example
> adding socket destination to my proposal would be relatively fast and
> easy (that would be for limited use only after a network is up and
> running).
>
> Thanks,
> Lukasz
>
> > thanks
On Mon, 2 Oct 2023 14:49:20 -0600
[email protected] wrote:
> hi Lukasz,
>
> sorry my kernel-time has been in my own trees.
>
> What I dont understand is why +T is insufficient.
>
> IIUC, tracefs is intended for production use.
> thats why each event can be enabled / disabled
> - to select and minimize whats traced, and not impact the system
>
> and +T can forward all pr_debugs to trace,
> (by 1-few trace events defined similarly to others)
> or very few, giving yet another selection mechanism
> to choose or eliminate specific pr-debugs and reduce traffic to
> interesting stuff.
>
> Once your debug is in the trace-buf,
> shouldnt user-space be deciding what to do with it ?
> a smart daemon could leverage tracefs to good effect.
>
> IMO the main value of +T is that it allows feeding existing pr_debugs
> into the place where other trace-data is already integrated and managed.
>
> At this point, I dont see any extra destination handling as prudent.
>
I'm fine with either approach. I kind of like the creation of the instance,
as that allows the user to keep this debug separate from other tracing
going on. We are starting to have multiple applications using the tracing
buffer (although most are using instances, which is why I'm trying to make
them lighter weight with the eventfs code).
-- Steve
On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
>
> On Mon, 2 Oct 2023 14:49:20 -0600
> [email protected] wrote:
>
> > hi Lukasz,
> >
> > sorry my kernel-time has been in my own trees.
> >
> > What I dont understand is why +T is insufficient.
> >
> > IIUC, tracefs is intended for production use.
> > thats why each event can be enabled / disabled
> > - to select and minimize whats traced, and not impact the system
> >
> > and +T can forward all pr_debugs to trace,
> > (by 1-few trace events defined similarly to others)
> > or very few, giving yet another selection mechanism
> > to choose or eliminate specific pr-debugs and reduce traffic to
> > interesting stuff.
> >
> > Once your debug is in the trace-buf,
> > shouldnt user-space be deciding what to do with it ?
> > a smart daemon could leverage tracefs to good effect.
> >
> > IMO the main value of +T is that it allows feeding existing pr_debugs
> > into the place where other trace-data is already integrated and managed.
> >
> > At this point, I dont see any extra destination handling as prudent.
> >
>
>
> I'm fine with either approach. I kind of like the creation of the instance,
> as that allows the user to keep this debug separate from other tracing
> going on. We are starting to have multiple applications using the tracing
> buffer (although most are using instances, which is why I'm trying to make
> them lighter weight with the eventfs code).
>
> -- Steve
>
Ok Im starting to grasp that multiple instances are good
(and wondering how I didnt notice)
What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
can you make it go away ?
I have some thoughts ..
wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
>
> On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> >
> > On Mon, 2 Oct 2023 14:49:20 -0600
> > [email protected] wrote:
> >
> > > hi Lukasz,
> > >
> > > sorry my kernel-time has been in my own trees.
> > >
> > > What I dont understand is why +T is insufficient.
> > >
We would like to be able to separate debug logs from different
subsystem (e.g. thunderbolt and usbcore).
With +T it is not possible because all debug logs will land in the same bucket.
> > > IIUC, tracefs is intended for production use.
> > > thats why each event can be enabled / disabled
> > > - to select and minimize whats traced, and not impact the system
> > >
> > > and +T can forward all pr_debugs to trace,
> > > (by 1-few trace events defined similarly to others)
> > > or very few, giving yet another selection mechanism
> > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > interesting stuff.
> > >
> > > Once your debug is in the trace-buf,
> > > shouldnt user-space be deciding what to do with it ?
> > > a smart daemon could leverage tracefs to good effect.
> > >
Yes, a daemon could separate the debug logs but IMHO it is much
easier to separate logs by sending them directly from a given subsystem
to a separate trace instance. My proposal allows to configure different
trace instance as destination for each callsite.
> > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > into the place where other trace-data is already integrated and managed.
> > >
> > > At this point, I dont see any extra destination handling as prudent.
> > >
> >
> >
> > I'm fine with either approach. I kind of like the creation of the instance,
> > as that allows the user to keep this debug separate from other tracing
> > going on. We are starting to have multiple applications using the tracing
> > buffer (although most are using instances, which is why I'm trying to make
> > them lighter weight with the eventfs code).
> >
> > -- Steve
> >
Steve, thanks for commenting from the trace perspective.
>
>
> Ok Im starting to grasp that multiple instances are good
> (and wondering how I didnt notice)
>
> What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
>
Yes it increases _ddebug structure by a pointer size.
> can you make it go away ?
I implemented my proposal with flexibility in mind so that if someone
would like to add
another destination in the future it should be easy to do. I
understand that adding a pointer
to the _ddebug structure increases footprint size that's why I also
added CONFIG_DYNAMIC_DEBUG_DST
kernel configuration option in order to enable/disable this functionality.
> I have some thoughts ..
Please share your thoughts. I'm sure we can come to an agreement how
to incorporate both +T and my proposal.
Thanks,
Lukasz
Jim,
Just one more thought. If you review my patch then we could move into
discussing the details.
Thanks,
Lukasz
śr., 4 paź 2023 o 12:55 Łukasz Bartosik <[email protected]> napisał(a):
>
> wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
> >
> > On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> > >
> > > On Mon, 2 Oct 2023 14:49:20 -0600
> > > [email protected] wrote:
> > >
> > > > hi Lukasz,
> > > >
> > > > sorry my kernel-time has been in my own trees.
> > > >
> > > > What I dont understand is why +T is insufficient.
> > > >
>
> We would like to be able to separate debug logs from different
> subsystem (e.g. thunderbolt and usbcore).
> With +T it is not possible because all debug logs will land in the same bucket.
>
> > > > IIUC, tracefs is intended for production use.
> > > > thats why each event can be enabled / disabled
> > > > - to select and minimize whats traced, and not impact the system
> > > >
> > > > and +T can forward all pr_debugs to trace,
> > > > (by 1-few trace events defined similarly to others)
> > > > or very few, giving yet another selection mechanism
> > > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > > interesting stuff.
> > > >
> > > > Once your debug is in the trace-buf,
> > > > shouldnt user-space be deciding what to do with it ?
> > > > a smart daemon could leverage tracefs to good effect.
> > > >
>
> Yes, a daemon could separate the debug logs but IMHO it is much
> easier to separate logs by sending them directly from a given subsystem
> to a separate trace instance. My proposal allows to configure different
> trace instance as destination for each callsite.
>
> > > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > > into the place where other trace-data is already integrated and managed.
> > > >
> > > > At this point, I dont see any extra destination handling as prudent.
> > > >
> > >
> > >
> > > I'm fine with either approach. I kind of like the creation of the instance,
> > > as that allows the user to keep this debug separate from other tracing
> > > going on. We are starting to have multiple applications using the tracing
> > > buffer (although most are using instances, which is why I'm trying to make
> > > them lighter weight with the eventfs code).
> > >
> > > -- Steve
> > >
>
> Steve, thanks for commenting from the trace perspective.
>
> >
> >
> > Ok Im starting to grasp that multiple instances are good
> > (and wondering how I didnt notice)
> >
> > What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
> >
>
> Yes it increases _ddebug structure by a pointer size.
>
> > can you make it go away ?
>
> I implemented my proposal with flexibility in mind so that if someone
> would like to add
> another destination in the future it should be easy to do. I
> understand that adding a pointer
> to the _ddebug structure increases footprint size that's why I also
> added CONFIG_DYNAMIC_DEBUG_DST
> kernel configuration option in order to enable/disable this functionality.
>
> > I have some thoughts ..
>
> Please share your thoughts. I'm sure we can come to an agreement how
> to incorporate both +T and my proposal.
>
> Thanks,
> Lukasz
On Wed, Oct 4, 2023 at 4:55 AM Łukasz Bartosik <[email protected]> wrote:
>
> wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
> >
> > On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> > >
> > > On Mon, 2 Oct 2023 14:49:20 -0600
> > > [email protected] wrote:
> > >
> > > > hi Lukasz,
> > > >
> > > > sorry my kernel-time has been in my own trees.
> > > >
> > > > What I dont understand is why +T is insufficient.
> > > >
>
> We would like to be able to separate debug logs from different
> subsystem (e.g. thunderbolt and usbcore).
> With +T it is not possible because all debug logs will land in the same bucket.
>
> > > > IIUC, tracefs is intended for production use.
> > > > thats why each event can be enabled / disabled
> > > > - to select and minimize whats traced, and not impact the system
> > > >
> > > > and +T can forward all pr_debugs to trace,
> > > > (by 1-few trace events defined similarly to others)
> > > > or very few, giving yet another selection mechanism
> > > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > > interesting stuff.
> > > >
> > > > Once your debug is in the trace-buf,
> > > > shouldnt user-space be deciding what to do with it ?
> > > > a smart daemon could leverage tracefs to good effect.
> > > >
>
> Yes, a daemon could separate the debug logs but IMHO it is much
> easier to separate logs by sending them directly from a given subsystem
> to a separate trace instance. My proposal allows to configure different
> trace instance as destination for each callsite.
>
> > > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > > into the place where other trace-data is already integrated and managed.
> > > >
> > > > At this point, I dont see any extra destination handling as prudent.
> > > >
> > >
> > >
> > > I'm fine with either approach. I kind of like the creation of the instance,
> > > as that allows the user to keep this debug separate from other tracing
> > > going on. We are starting to have multiple applications using the tracing
> > > buffer (although most are using instances, which is why I'm trying to make
> > > them lighter weight with the eventfs code).
> > >
> > > -- Steve
> > >
>
> Steve, thanks for commenting from the trace perspective.
>
> >
> >
> > Ok Im starting to grasp that multiple instances are good
> > (and wondering how I didnt notice)
> >
> > What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
> >
>
> Yes it increases _ddebug structure by a pointer size.
>
> > can you make it go away ?
>
> I implemented my proposal with flexibility in mind so that if someone
> would like to add
> another destination in the future it should be easy to do. I
> understand that adding a pointer
> to the _ddebug structure increases footprint size that's why I also
> added CONFIG_DYNAMIC_DEBUG_DST
> kernel configuration option in order to enable/disable this functionality.
>
> > I have some thoughts ..
>
> Please share your thoughts. I'm sure we can come to an agreement how
> to incorporate both +T and my proposal.
So heres what Im thinking:
shrink lineno, get 2-3 bits back.
last I checked largest C file is <32kloc
largest header is ~120kloc, but its a data only,
no pr_debugs will suddenly appear there.
define a dst_id field with 3 bits
0 is for main tracebuf
1-7 is for other instances
then the alt-dest lookup is avoided except when the dst_id field is >0
It might work to put the alt-dst-pointer into the classmaps,
so the destination is used for the entire group of debugs
forex DRM_UT_CORE etc.
But its no better than the dst_id field, which is per-callsite,
and entirely independent of classes.
> Thanks,
> Lukasz
pt., 6 paź 2023 o 22:49 <[email protected]> napisał(a):
>
> On Wed, Oct 4, 2023 at 4:55 AM Łukasz Bartosik <[email protected]> wrote:
> >
> > wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
> > >
> > > On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> > > >
> > > > On Mon, 2 Oct 2023 14:49:20 -0600
> > > > [email protected] wrote:
> > > >
> > > > > hi Lukasz,
> > > > >
> > > > > sorry my kernel-time has been in my own trees.
> > > > >
> > > > > What I dont understand is why +T is insufficient.
> > > > >
> >
> > We would like to be able to separate debug logs from different
> > subsystem (e.g. thunderbolt and usbcore).
> > With +T it is not possible because all debug logs will land in the same bucket.
> >
> > > > > IIUC, tracefs is intended for production use.
> > > > > thats why each event can be enabled / disabled
> > > > > - to select and minimize whats traced, and not impact the system
> > > > >
> > > > > and +T can forward all pr_debugs to trace,
> > > > > (by 1-few trace events defined similarly to others)
> > > > > or very few, giving yet another selection mechanism
> > > > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > > > interesting stuff.
> > > > >
> > > > > Once your debug is in the trace-buf,
> > > > > shouldnt user-space be deciding what to do with it ?
> > > > > a smart daemon could leverage tracefs to good effect.
> > > > >
> >
> > Yes, a daemon could separate the debug logs but IMHO it is much
> > easier to separate logs by sending them directly from a given subsystem
> > to a separate trace instance. My proposal allows to configure different
> > trace instance as destination for each callsite.
> >
> > > > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > > > into the place where other trace-data is already integrated and managed.
> > > > >
> > > > > At this point, I dont see any extra destination handling as prudent.
> > > > >
> > > >
> > > >
> > > > I'm fine with either approach. I kind of like the creation of the instance,
> > > > as that allows the user to keep this debug separate from other tracing
> > > > going on. We are starting to have multiple applications using the tracing
> > > > buffer (although most are using instances, which is why I'm trying to make
> > > > them lighter weight with the eventfs code).
> > > >
> > > > -- Steve
> > > >
> >
> > Steve, thanks for commenting from the trace perspective.
> >
> > >
> > >
> > > Ok Im starting to grasp that multiple instances are good
> > > (and wondering how I didnt notice)
> > >
> > > What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
> > >
> >
> > Yes it increases _ddebug structure by a pointer size.
> >
> > > can you make it go away ?
> >
> > I implemented my proposal with flexibility in mind so that if someone
> > would like to add
> > another destination in the future it should be easy to do. I
> > understand that adding a pointer
> > to the _ddebug structure increases footprint size that's why I also
> > added CONFIG_DYNAMIC_DEBUG_DST
> > kernel configuration option in order to enable/disable this functionality.
> >
> > > I have some thoughts ..
> >
> > Please share your thoughts. I'm sure we can come to an agreement how
> > to incorporate both +T and my proposal.
>
>
> So heres what Im thinking:
>
> shrink lineno, get 2-3 bits back.
> last I checked largest C file is <32kloc
> largest header is ~120kloc, but its a data only,
> no pr_debugs will suddenly appear there.
>
> define a dst_id field with 3 bits
The dst_id field would be taken into account only when a callsite has
T flag set, is that your assumption ?
Can one bit be taken from class_id to increase dst_id to 4 bits ?
Decreasing class_id length to 5 bits would still leave its range at
[0..31]
> 0 is for main tracebuf
> 1-7 is for other instances
>
Do you want to leave trace events as originally implemented with +T flag ?
If yes then I would like to propose:
dst_id = 0 - for trace events for pr_debug and dbg_dev logs (default)
dst_id = 1 - for default trace instance
dst_id > 1 - other trace instances
> then the alt-dest lookup is avoided except when the dst_id field is >0
>
> It might work to put the alt-dst-pointer into the classmaps,
> so the destination is used for the entire group of debugs
> forex DRM_UT_CORE etc.
>
If we store dst pointers for dst_id > 1 in classmaps then is there a fast way
to get from callsite (_ddebug) to its corresponding classmap
(ddebug_class_map) in order to
lookup trace instance destination ? I ask because I don't see it being possible
without adding a new field to the _ddebug structure.
> But its no better than the dst_id field, which is per-callsite,
> and entirely independent of classes.
>
I don't have a real life use case to configure different trace
instance for each callsite.
I just tried to be as much flexible as possible.
Thanks,
Lukasz
>
> > Thanks,
> > Lukasz
On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
>
> pt., 6 paź 2023 o 22:49 <[email protected]> napisał(a):
> >
> > On Wed, Oct 4, 2023 at 4:55 AM Łukasz Bartosik <[email protected]> wrote:
> > >
> > > wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
> > > >
> > > > On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> > > > >
> > > > > On Mon, 2 Oct 2023 14:49:20 -0600
> > > > > [email protected] wrote:
> > > > >
> > > > > > hi Lukasz,
> > > > > >
> > > > > > sorry my kernel-time has been in my own trees.
> > > > > >
> > > > > > What I dont understand is why +T is insufficient.
> > > > > >
> > >
> > > We would like to be able to separate debug logs from different
> > > subsystem (e.g. thunderbolt and usbcore).
> > > With +T it is not possible because all debug logs will land in the same bucket.
> > >
> > > > > > IIUC, tracefs is intended for production use.
> > > > > > thats why each event can be enabled / disabled
> > > > > > - to select and minimize whats traced, and not impact the system
> > > > > >
> > > > > > and +T can forward all pr_debugs to trace,
> > > > > > (by 1-few trace events defined similarly to others)
> > > > > > or very few, giving yet another selection mechanism
> > > > > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > > > > interesting stuff.
> > > > > >
> > > > > > Once your debug is in the trace-buf,
> > > > > > shouldnt user-space be deciding what to do with it ?
> > > > > > a smart daemon could leverage tracefs to good effect.
> > > > > >
> > >
> > > Yes, a daemon could separate the debug logs but IMHO it is much
> > > easier to separate logs by sending them directly from a given subsystem
> > > to a separate trace instance. My proposal allows to configure different
> > > trace instance as destination for each callsite.
> > >
> > > > > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > > > > into the place where other trace-data is already integrated and managed.
> > > > > >
> > > > > > At this point, I dont see any extra destination handling as prudent.
> > > > > >
> > > > >
> > > > >
> > > > > I'm fine with either approach. I kind of like the creation of the instance,
> > > > > as that allows the user to keep this debug separate from other tracing
> > > > > going on. We are starting to have multiple applications using the tracing
> > > > > buffer (although most are using instances, which is why I'm trying to make
> > > > > them lighter weight with the eventfs code).
> > > > >
> > > > > -- Steve
> > > > >
> > >
> > > Steve, thanks for commenting from the trace perspective.
> > >
> > > >
> > > >
> > > > Ok Im starting to grasp that multiple instances are good
> > > > (and wondering how I didnt notice)
> > > >
> > > > What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
> > > >
> > >
> > > Yes it increases _ddebug structure by a pointer size.
> > >
> > > > can you make it go away ?
> > >
> > > I implemented my proposal with flexibility in mind so that if someone
> > > would like to add
> > > another destination in the future it should be easy to do. I
> > > understand that adding a pointer
> > > to the _ddebug structure increases footprint size that's why I also
> > > added CONFIG_DYNAMIC_DEBUG_DST
> > > kernel configuration option in order to enable/disable this functionality.
> > >
> > > > I have some thoughts ..
> > >
> > > Please share your thoughts. I'm sure we can come to an agreement how
> > > to incorporate both +T and my proposal.
> >
> >
> > So heres what Im thinking:
> >
> > shrink lineno, get 2-3 bits back.
> > last I checked largest C file is <32kloc
> > largest header is ~120kloc, but its a data only,
> > no pr_debugs will suddenly appear there.
> >
> > define a dst_id field with 3 bits
>
> The dst_id field would be taken into account only when a callsite has
> T flag set, is that your assumption ?
Im ambivalent about the +T bit itself,
it could as easily be another "special" value in the 0-2^N range of dst_id
its a use-case tradeoff:
+T goes to main tracebuf.
dst_id>0 goes to separate, pre-registered "flight-recorder" tracebufs
I'm not sure whether doing both independently is better than having
2^(n-1)-1 extra bufs.
Actually, demoting +T to just another dest makes sense -
theres a large population of pr-debugs, and events like vblank-*
Nobody will send vblank to syslog on purpose either.
either way, dyndbg will need both:
new trace-events, so that prdbgs can get enabled as a single/few event-types
trace_array_buf writes to flight recorders
> Can one bit be taken from class_id to increase dst_id to 4 bits ?
> Decreasing class_id length to 5 bits would still leave its range at
> [0..31]
31 classes / categories should be enough.
64 was convenient, BIT* supported.
> > 0 is for main tracebuf
> > 1-7 is for other instances
> >
>
> Do you want to leave trace events as originally implemented with +T flag ?
> If yes then I would like to propose:
> dst_id = 0 - for trace events for pr_debug and dbg_dev logs (default)
> dst_id = 1 - for default trace instance
> dst_id > 1 - other trace instances
>
> > then the alt-dest lookup is avoided except when the dst_id field is >0
> >
> > It might work to put the alt-dst-pointer into the classmaps,
> > so the destination is used for the entire group of debugs
> > forex DRM_UT_CORE etc.
> >
>
> If we store dst pointers for dst_id > 1 in classmaps then is there a fast way
> to get from callsite (_ddebug) to its corresponding classmap
> (ddebug_class_map) in order to
> lookup trace instance destination ? I ask because I don't see it being possible
> without adding a new field to the _ddebug structure.
I agree, theres no good enough way, compared to a small dst-id.
The "muse" was on the grouping aspect it might bring.
> > But its no better than the dst_id field, which is per-callsite,
> > and entirely independent of classes.
> >
this was me agreeing with your point above :-)
>
> I don't have a real life use case to configure different trace
> instance for each callsite.
> I just tried to be as much flexible as possible.
>
Ive come around to agree - I looked back at some old threads
(that I was a part of, and barely remembered :-}
At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
have expressed a desire for a "flight-recorder"
it'd be hard to say now that 2-3 such buffers would always be enough,
esp as theres a performance reason for having your own.
> Thanks,
> Lukasz
>
> >
> > > Thanks,
> > > Lukasz
since I name-dropped you all,
On Tue, Oct 10, 2023 at 10:01 AM <[email protected]> wrote:
>
> On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
> >
> > pt., 6 paź 2023 o 22:49 <[email protected]> napisał(a):
> > >
> > > On Wed, Oct 4, 2023 at 4:55 AM Łukasz Bartosik <[email protected]> wrote:
> > > >
> > > > wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
> > > > >
> > > > > On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> > > > > >
> > > > > > On Mon, 2 Oct 2023 14:49:20 -0600
> > > > > > [email protected] wrote:
> > > > > >
> > > > > > > hi Lukasz,
> > > > > > >
> > > > > > > sorry my kernel-time has been in my own trees.
> > > > > > >
> > > > > > > What I dont understand is why +T is insufficient.
> > > > > > >
> > > >
> > > > We would like to be able to separate debug logs from different
> > > > subsystem (e.g. thunderbolt and usbcore).
> > > > With +T it is not possible because all debug logs will land in the same bucket.
> > > >
> > > > > > > IIUC, tracefs is intended for production use.
> > > > > > > thats why each event can be enabled / disabled
> > > > > > > - to select and minimize whats traced, and not impact the system
> > > > > > >
> > > > > > > and +T can forward all pr_debugs to trace,
> > > > > > > (by 1-few trace events defined similarly to others)
> > > > > > > or very few, giving yet another selection mechanism
> > > > > > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > > > > > interesting stuff.
> > > > > > >
> > > > > > > Once your debug is in the trace-buf,
> > > > > > > shouldnt user-space be deciding what to do with it ?
> > > > > > > a smart daemon could leverage tracefs to good effect.
> > > > > > >
> > > >
> > > > Yes, a daemon could separate the debug logs but IMHO it is much
> > > > easier to separate logs by sending them directly from a given subsystem
> > > > to a separate trace instance. My proposal allows to configure different
> > > > trace instance as destination for each callsite.
> > > >
> > > > > > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > > > > > into the place where other trace-data is already integrated and managed.
> > > > > > >
> > > > > > > At this point, I dont see any extra destination handling as prudent.
> > > > > > >
> > > > > >
> > > > > >
> > > > > > I'm fine with either approach. I kind of like the creation of the instance,
> > > > > > as that allows the user to keep this debug separate from other tracing
> > > > > > going on. We are starting to have multiple applications using the tracing
> > > > > > buffer (although most are using instances, which is why I'm trying to make
> > > > > > them lighter weight with the eventfs code).
> > > > > >
> > > > > > -- Steve
> > > > > >
> > > >
> > > > Steve, thanks for commenting from the trace perspective.
> > > >
> > > > >
> > > > >
> > > > > Ok Im starting to grasp that multiple instances are good
> > > > > (and wondering how I didnt notice)
> > > > >
> > > > > What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
> > > > >
> > > >
> > > > Yes it increases _ddebug structure by a pointer size.
> > > >
> > > > > can you make it go away ?
> > > >
> > > > I implemented my proposal with flexibility in mind so that if someone
> > > > would like to add
> > > > another destination in the future it should be easy to do. I
> > > > understand that adding a pointer
> > > > to the _ddebug structure increases footprint size that's why I also
> > > > added CONFIG_DYNAMIC_DEBUG_DST
> > > > kernel configuration option in order to enable/disable this functionality.
> > > >
> > > > > I have some thoughts ..
> > > >
> > > > Please share your thoughts. I'm sure we can come to an agreement how
> > > > to incorporate both +T and my proposal.
> > >
> > >
> > > So heres what Im thinking:
> > >
> > > shrink lineno, get 2-3 bits back.
> > > last I checked largest C file is <32kloc
> > > largest header is ~120kloc, but its a data only,
> > > no pr_debugs will suddenly appear there.
> > >
> > > define a dst_id field with 3 bits
> >
> > The dst_id field would be taken into account only when a callsite has
> > T flag set, is that your assumption ?
>
> Im ambivalent about the +T bit itself,
> it could as easily be another "special" value in the 0-2^N range of dst_id
>
> its a use-case tradeoff:
> +T goes to main tracebuf.
> dst_id>0 goes to separate, pre-registered "flight-recorder" tracebufs
>
> I'm not sure whether doing both independently is better than having
> 2^(n-1)-1 extra bufs.
>
> Actually, demoting +T to just another dest makes sense -
> theres a large population of pr-debugs, and events like vblank-*
> Nobody will send vblank to syslog on purpose either.
>
>
> either way, dyndbg will need both:
> new trace-events, so that prdbgs can get enabled as a single/few event-types
> trace_array_buf writes to flight recorders
>
>
>
>
>
>
>
> > Can one bit be taken from class_id to increase dst_id to 4 bits ?
> > Decreasing class_id length to 5 bits would still leave its range at
> > [0..31]
>
> 31 classes / categories should be enough.
> 64 was convenient, BIT* supported.
>
> > > 0 is for main tracebuf
> > > 1-7 is for other instances
> > >
> >
> > Do you want to leave trace events as originally implemented with +T flag ?
> > If yes then I would like to propose:
> > dst_id = 0 - for trace events for pr_debug and dbg_dev logs (default)
> > dst_id = 1 - for default trace instance
> > dst_id > 1 - other trace instances
> >
> > > then the alt-dest lookup is avoided except when the dst_id field is >0
> > >
> > > It might work to put the alt-dst-pointer into the classmaps,
> > > so the destination is used for the entire group of debugs
> > > forex DRM_UT_CORE etc.
> > >
> >
> > If we store dst pointers for dst_id > 1 in classmaps then is there a fast way
> > to get from callsite (_ddebug) to its corresponding classmap
> > (ddebug_class_map) in order to
> > lookup trace instance destination ? I ask because I don't see it being possible
> > without adding a new field to the _ddebug structure.
>
> I agree, theres no good enough way, compared to a small dst-id.
> The "muse" was on the grouping aspect it might bring.
>
> > > But its no better than the dst_id field, which is per-callsite,
> > > and entirely independent of classes.
> > >
> this was me agreeing with your point above :-)
>
>
> >
> > I don't have a real life use case to configure different trace
> > instance for each callsite.
> > I just tried to be as much flexible as possible.
> >
>
> Ive come around to agree - I looked back at some old threads
> (that I was a part of, and barely remembered :-}
>
> At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> have expressed a desire for a "flight-recorder"
> it'd be hard to say now that 2-3 such buffers would always be enough,
> esp as theres a performance reason for having your own.
>
>
> > Thanks,
> > Lukasz
> >
> > >
> > > > Thanks,
> > > > Lukasz
On Tue, 10 Oct 2023 10:06:02 -0600
[email protected] wrote:
> since I name-dropped you all,
Hi everyone,
I'm really happy to see this topic being developed! I've practically
forgot about it myself, but the need for it has not diminished at all.
I didn't understand much of the conversation, so I'll just reiterate
what I would use it for, as a Wayland compositor developer.
I added a few more cc's to get better coverage of DRM and Wayland
compositor developers.
> On Tue, Oct 10, 2023 at 10:01 AM <[email protected]> wrote:
> >
> > On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
...
> > > I don't have a real life use case to configure different trace
> > > instance for each callsite.
> > > I just tried to be as much flexible as possible.
> > >
> >
> > Ive come around to agree - I looked back at some old threads
> > (that I was a part of, and barely remembered :-}
> >
> > At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> > have expressed a desire for a "flight-recorder"
> > it'd be hard to say now that 2-3 such buffers would always be enough,
> > esp as theres a performance reason for having your own.
A Wayland compositor has roughly three important things where the kernel
debugs might come in handy:
- input
- DRM KMS
- DRM GPU rendering
DRM KMS is the one I've been thinking of in the flight recorder context
the most, because KMS hardware varies a lot, and there is plenty of
room for both KMS drivers and KMS userspace to go wrong. The usual
result is your display doesn't work, so the system is practically
unusable to the end user. In the wild, the simplest or maybe the only
way out of that may be a reboot, maybe an automated one (e.g. digital
signage). In order to debug such problems, we would need both
compositor logs and the relevant kernel debug messages.
For example, Weston already has a flight recorder framework of its own,
so we have the compositor debug logs. It would be useful to get the
selected kernel debug logs in the same place. It could be used for
automated or semi-manual bug reporting, for example, making the
administrator or end user life much easier reporting issues.
Since this is usually a production environment, and the Wayland
compositor runs without root privileges, we need something that works
with that. We would likely want the kernel debug messages in the
compositor to combine and order them properly with the compositor debug
messages.
It's quite likely that developers would like to pick and choose which
kernel debug messages might be interesting enough to record, to avoid
excessive log flooding. The flight recorder in Weston is fixed size to
avoid running out of memory or disk space. I can also see that Weston
could have debugging options that affect which kernel debug messages it
subscribes to. We can have a reasonable default setup that allows us to
pinpoint the problem area and figure out most problems, and if needed,
we could ask the administrator pass another debug option to Weston. It
helps if there is just one place to configure everything about the
compositor.
This implies that it would be really nice to have userspace subscriber
specific debug message streams from the kernel, or a good way to filter
the messages we want. A Wayland compositor would not be interested in
file system or wireless debugs for example, but another system
component might be. There is also a security aspect of which component is
allowed to see which messages in case they could contain anything
sensitive (input debug could contain typed passwords).
Configuring the kernel debug message selection for our debug message
stream can and probably should require elevated privileges, and we can
likely solve that in userspace with a daemon or such, to allow the
Wayland compositor to run as a regular user.
Thinking of desktop systems, and especially physically multi-seat systems:
- there can be multiple different Wayland compositors running simultaneously
- each of them may want debug messages only from a specific DRM KMS
device instance, and not from others
- each of them may have a different idea of which debug messages are important
- because DRM KMS leasing is a thing, different compositor instances
could be using the same DRM KMS device instance simultaneously; since
this is specific to DRM KMS, and it should be harmless to get a
little too much DRM KMS debug (that is, from the whole device instead
of just the leased parts), it may not be worth to consider splitting
debug message streams this far.
If userspace is offered some standardised fields in kernel debug
structures, then userspace could do some filtering on its own too, but I
guess it would be better to filter at the source and not need that.
There is also an anti-goal. The kernel debug message contents are
specifically not machine-parsable. I very much do not want to impose
debug strings as ABI, they are for human (and AI?) readers only.
As a summary, here are the most important requirements first:
- usable in production as a normal thing to enable always by default
- final delivery to unprivileged userspace process
- per debug-print selection of messages (finer or coarser, categories
within a kernel sub-system could be enough)
- per originating device (driver instance) selection of messages
- all selections tailored separately for each userspace subscriber
(- per open device file description selection of messages)
That's my idea of it. It is interesting to see how far the requirements
can be reasonably realised.
Thanks,
pq
On Wed, Oct 11, 2023 at 11:48:16AM +0300, Pekka Paalanen wrote:
> On Tue, 10 Oct 2023 10:06:02 -0600
> [email protected] wrote:
>
> > since I name-dropped you all,
>
> Hi everyone,
>
> I'm really happy to see this topic being developed! I've practically
> forgot about it myself, but the need for it has not diminished at all.
>
> I didn't understand much of the conversation, so I'll just reiterate
> what I would use it for, as a Wayland compositor developer.
>
> I added a few more cc's to get better coverage of DRM and Wayland
> compositor developers.
>
> > On Tue, Oct 10, 2023 at 10:01 AM <[email protected]> wrote:
> > >
> > > On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
>
> ...
>
> > > > I don't have a real life use case to configure different trace
> > > > instance for each callsite.
> > > > I just tried to be as much flexible as possible.
> > > >
> > >
> > > Ive come around to agree - I looked back at some old threads
> > > (that I was a part of, and barely remembered :-}
> > >
> > > At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> > > have expressed a desire for a "flight-recorder"
> > > it'd be hard to say now that 2-3 such buffers would always be enough,
> > > esp as theres a performance reason for having your own.
>
> A Wayland compositor has roughly three important things where the kernel
> debugs might come in handy:
> - input
> - DRM KMS
> - DRM GPU rendering
>
> DRM KMS is the one I've been thinking of in the flight recorder context
> the most, because KMS hardware varies a lot, and there is plenty of
> room for both KMS drivers and KMS userspace to go wrong. The usual
> result is your display doesn't work, so the system is practically
> unusable to the end user. In the wild, the simplest or maybe the only
> way out of that may be a reboot, maybe an automated one (e.g. digital
> signage). In order to debug such problems, we would need both
> compositor logs and the relevant kernel debug messages.
>
> For example, Weston already has a flight recorder framework of its own,
> so we have the compositor debug logs. It would be useful to get the
> selected kernel debug logs in the same place. It could be used for
> automated or semi-manual bug reporting, for example, making the
> administrator or end user life much easier reporting issues.
>
> Since this is usually a production environment, and the Wayland
> compositor runs without root privileges, we need something that works
> with that. We would likely want the kernel debug messages in the
> compositor to combine and order them properly with the compositor debug
> messages.
>
> It's quite likely that developers would like to pick and choose which
> kernel debug messages might be interesting enough to record, to avoid
> excessive log flooding. The flight recorder in Weston is fixed size to
> avoid running out of memory or disk space. I can also see that Weston
> could have debugging options that affect which kernel debug messages it
> subscribes to. We can have a reasonable default setup that allows us to
> pinpoint the problem area and figure out most problems, and if needed,
> we could ask the administrator pass another debug option to Weston. It
> helps if there is just one place to configure everything about the
> compositor.
>
> This implies that it would be really nice to have userspace subscriber
> specific debug message streams from the kernel, or a good way to filter
> the messages we want. A Wayland compositor would not be interested in
> file system or wireless debugs for example, but another system
> component might be. There is also a security aspect of which component is
> allowed to see which messages in case they could contain anything
> sensitive (input debug could contain typed passwords).
>
> Configuring the kernel debug message selection for our debug message
> stream can and probably should require elevated privileges, and we can
> likely solve that in userspace with a daemon or such, to allow the
> Wayland compositor to run as a regular user.
>
> Thinking of desktop systems, and especially physically multi-seat systems:
> - there can be multiple different Wayland compositors running simultaneously
> - each of them may want debug messages only from a specific DRM KMS
> device instance, and not from others
> - each of them may have a different idea of which debug messages are important
> - because DRM KMS leasing is a thing, different compositor instances
> could be using the same DRM KMS device instance simultaneously; since
> this is specific to DRM KMS, and it should be harmless to get a
> little too much DRM KMS debug (that is, from the whole device instead
> of just the leased parts), it may not be worth to consider splitting
> debug message streams this far.
>
> If userspace is offered some standardised fields in kernel debug
> structures, then userspace could do some filtering on its own too, but I
> guess it would be better to filter at the source and not need that.
>
> There is also an anti-goal. The kernel debug message contents are
> specifically not machine-parsable. I very much do not want to impose
> debug strings as ABI, they are for human (and AI?) readers only.
>
>
> As a summary, here are the most important requirements first:
> - usable in production as a normal thing to enable always by default
> - final delivery to unprivileged userspace process
I think this is the one that's trickiest, and I don't fully understand why
you need it. The issues I'm seeing:
- logs tend to leak a lot of kernel internal state that's useful for
attacks. There's measures for the worst (like obfuscating kernel
pointers by hashing them), so there's always going to be a difference
here between what full sysadmin can get and what unpriviledged userspace
can get. And there's always a risk we miss something that we should
obfuscate but didn't.
- handing this to userspace increases the risks it becomes uapi. Who's
going to stop compositors from sussing out the reason an atomic commit
failed from the logs if they can get them easily, and these logs contain
very interesting information about why something failed?
Much better if journald or a crash handler assemebles all the different
flight recorder logs and packages it into a bug report so that the
compositor cannot ever get at these directly. Yeah this needs some OS
support with a dbus request or similar so that the compositor can ask
for a crash dump with everything relevant to its session.
- the idea of an in-kernel flight recorder is that it's really fast. The
entire tracing infra is built such that recording an event is really
quick, but printing it is not - the entire string formatting is delayed
to when userspace reads the buffers. If you constantly push the log
messages to userspace we toss the advantage of the low-overhead
in-kernel flight recorder. If you push logs to dmesg there's a
substantial measureable overhead which you don't really want in
production, and your requirement would impose the same.
- I'm not sure how this is supposed to mesh with userspace log aggregators
like journald when every compositor has it's own flight recorder on top.
Feels a bit like a solution that ignores the entire os stack and assumes
that the only pieces we can touch are the kernel and the compositor to
get to such a flight recorder.
You might object that events will get out-of-order if you merge multiple
logs after the fact, but that's the case anyway if we use the tracing
framework since that's always a ringbuffer within the kernel and not
synchronous. The only thing we could do is allow userspace to push
markers into that ringbuffer, which is easily done by adding more debug
output lines (heck we could even add a logging cookie to certain ioctl
when userspace really cares about knowing exact ordering of it's
requests with the stuff the kernel does).
- If you really want direct deliver to userspace I guess we could do
something where sessiond opens the flight recorder fd for you, sets it
all up and passes it to the compositor. But I'm really not a big fan of
sending the full kms dbg spam to compositors to freely digest in real
time.
> - per debug-print selection of messages (finer or coarser, categories
> within a kernel sub-system could be enough)
> - per originating device (driver instance) selection of messages
The dyndbg stuff can do all that already, which is why I'm so much in
favour of relying on that framework.
> - all selections tailored separately for each userspace subscriber
> (- per open device file description selection of messages)
Again this feels like a userspace problem. Sessions could register what
kind of info they need for their session, and something like journald can
figure out how to record it all.
If you want the kernel to keep separate flight recorders I guess we could
add that, but I don't think it currently exists for the dyndbg stuff at
least. Maybe a flight recorder v2 feature, once the basics are in.
> That's my idea of it. It is interesting to see how far the requirements
> can be reasonably realised.
I think aside from the "make it available directly to unpriviledged
userspace" everything sounds reasonable and doable.
More on the process side of things, I think Jim is very much looking for
acks and tested-by by people who are interested in better drm logging
infra. That should help that things are moving in a direction that's
actually useful, even when it's not yet entirely complete.
Cheers, Sima
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
wt., 10 paź 2023 o 18:02 <[email protected]> napisał(a):
>
> On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
> >
> > pt., 6 paź 2023 o 22:49 <[email protected]> napisał(a):
> > >
> > > On Wed, Oct 4, 2023 at 4:55 AM Łukasz Bartosik <[email protected]> wrote:
> > > >
> > > > wt., 3 paź 2023 o 22:54 <[email protected]> napisał(a):
> > > > >
> > > > > On Tue, Oct 3, 2023 at 1:57 PM Steven Rostedt <[email protected]> wrote:
> > > > > >
> > > > > > On Mon, 2 Oct 2023 14:49:20 -0600
> > > > > > [email protected] wrote:
> > > > > >
> > > > > > > hi Lukasz,
> > > > > > >
> > > > > > > sorry my kernel-time has been in my own trees.
> > > > > > >
> > > > > > > What I dont understand is why +T is insufficient.
> > > > > > >
> > > >
> > > > We would like to be able to separate debug logs from different
> > > > subsystem (e.g. thunderbolt and usbcore).
> > > > With +T it is not possible because all debug logs will land in the same bucket.
> > > >
> > > > > > > IIUC, tracefs is intended for production use.
> > > > > > > thats why each event can be enabled / disabled
> > > > > > > - to select and minimize whats traced, and not impact the system
> > > > > > >
> > > > > > > and +T can forward all pr_debugs to trace,
> > > > > > > (by 1-few trace events defined similarly to others)
> > > > > > > or very few, giving yet another selection mechanism
> > > > > > > to choose or eliminate specific pr-debugs and reduce traffic to
> > > > > > > interesting stuff.
> > > > > > >
> > > > > > > Once your debug is in the trace-buf,
> > > > > > > shouldnt user-space be deciding what to do with it ?
> > > > > > > a smart daemon could leverage tracefs to good effect.
> > > > > > >
> > > >
> > > > Yes, a daemon could separate the debug logs but IMHO it is much
> > > > easier to separate logs by sending them directly from a given subsystem
> > > > to a separate trace instance. My proposal allows to configure different
> > > > trace instance as destination for each callsite.
> > > >
> > > > > > > IMO the main value of +T is that it allows feeding existing pr_debugs
> > > > > > > into the place where other trace-data is already integrated and managed.
> > > > > > >
> > > > > > > At this point, I dont see any extra destination handling as prudent.
> > > > > > >
> > > > > >
> > > > > >
> > > > > > I'm fine with either approach. I kind of like the creation of the instance,
> > > > > > as that allows the user to keep this debug separate from other tracing
> > > > > > going on. We are starting to have multiple applications using the tracing
> > > > > > buffer (although most are using instances, which is why I'm trying to make
> > > > > > them lighter weight with the eventfs code).
> > > > > >
> > > > > > -- Steve
> > > > > >
> > > >
> > > > Steve, thanks for commenting from the trace perspective.
> > > >
> > > > >
> > > > >
> > > > > Ok Im starting to grasp that multiple instances are good
> > > > > (and wondering how I didnt notice)
> > > > >
> > > > > What doesnt thrill me is the new _ddebug field, it enlarges the footprint.
> > > > >
> > > >
> > > > Yes it increases _ddebug structure by a pointer size.
> > > >
> > > > > can you make it go away ?
> > > >
> > > > I implemented my proposal with flexibility in mind so that if someone
> > > > would like to add
> > > > another destination in the future it should be easy to do. I
> > > > understand that adding a pointer
> > > > to the _ddebug structure increases footprint size that's why I also
> > > > added CONFIG_DYNAMIC_DEBUG_DST
> > > > kernel configuration option in order to enable/disable this functionality.
> > > >
> > > > > I have some thoughts ..
> > > >
> > > > Please share your thoughts. I'm sure we can come to an agreement how
> > > > to incorporate both +T and my proposal.
> > >
> > >
> > > So heres what Im thinking:
> > >
> > > shrink lineno, get 2-3 bits back.
> > > last I checked largest C file is <32kloc
> > > largest header is ~120kloc, but its a data only,
> > > no pr_debugs will suddenly appear there.
> > >
> > > define a dst_id field with 3 bits
> >
> > The dst_id field would be taken into account only when a callsite has
> > T flag set, is that your assumption ?
>
> Im ambivalent about the +T bit itself,
> it could as easily be another "special" value in the 0-2^N range of dst_id
>
> its a use-case tradeoff:
> +T goes to main tracebuf.
> dst_id>0 goes to separate, pre-registered "flight-recorder" tracebufs
>
> I'm not sure whether doing both independently is better than having
> 2^(n-1)-1 extra bufs.
>
Even though having +T seems more consistent I would prefer to combine
it with dst_id
because it will give us wider range.
> Actually, demoting +T to just another dest makes sense -
> theres a large population of pr-debugs, and events like vblank-*
> Nobody will send vblank to syslog on purpose either.
>
>
> either way, dyndbg will need both:
> new trace-events, so that prdbgs can get enabled as a single/few event-types
> trace_array_buf writes to flight recorders
>
Then depending on the value of dst_id (assuming dst_id consumed +T and
one more bit from class_id) we would have:
dst_id = 0 - trace disabled
dst_id = 1 - trace events for pr_debug and dev_dbg
dst_id = [2..31] - predefined trace instances
The trace instance destinations will be global but this number seems
pretty reasonable and is sufficient from our use case perspective.
>
>
>
>
>
>
> > Can one bit be taken from class_id to increase dst_id to 4 bits ?
> > Decreasing class_id length to 5 bits would still leave its range at
> > [0..31]
>
> 31 classes / categories should be enough.
> 64 was convenient, BIT* supported.
>
> > > 0 is for main tracebuf
> > > 1-7 is for other instances
> > >
> >
> > Do you want to leave trace events as originally implemented with +T flag ?
> > If yes then I would like to propose:
> > dst_id = 0 - for trace events for pr_debug and dbg_dev logs (default)
> > dst_id = 1 - for default trace instance
> > dst_id > 1 - other trace instances
> >
> > > then the alt-dest lookup is avoided except when the dst_id field is >0
> > >
> > > It might work to put the alt-dst-pointer into the classmaps,
> > > so the destination is used for the entire group of debugs
> > > forex DRM_UT_CORE etc.
> > >
> >
> > If we store dst pointers for dst_id > 1 in classmaps then is there a fast way
> > to get from callsite (_ddebug) to its corresponding classmap
> > (ddebug_class_map) in order to
> > lookup trace instance destination ? I ask because I don't see it being possible
> > without adding a new field to the _ddebug structure.
>
> I agree, theres no good enough way, compared to a small dst-id.
> The "muse" was on the grouping aspect it might bring.
>
> > > But its no better than the dst_id field, which is per-callsite,
> > > and entirely independent of classes.
> > >
> this was me agreeing with your point above :-)
>
I see :)
>
> >
> > I don't have a real life use case to configure different trace
> > instance for each callsite.
> > I just tried to be as much flexible as possible.
> >
>
> Ive come around to agree - I looked back at some old threads
> (that I was a part of, and barely remembered :-}
>
> At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> have expressed a desire for a "flight-recorder"
> it'd be hard to say now that 2-3 such buffers would always be enough,
> esp as theres a performance reason for having your own.
>
What would you recommend to base continuation of development of this feature on,
I mean whether to use one of your private branches you mentioned (in
one of the previous replies)
or latest upstream ?
>
> > Thanks,
> > Lukasz
> >
> > >
> > > > Thanks,
> > > > Lukasz
śr., 11 paź 2023 o 11:42 Daniel Vetter <[email protected]> napisał(a):
>
> On Wed, Oct 11, 2023 at 11:48:16AM +0300, Pekka Paalanen wrote:
> > On Tue, 10 Oct 2023 10:06:02 -0600
> > [email protected] wrote:
> >
> > > since I name-dropped you all,
> >
> > Hi everyone,
> >
> > I'm really happy to see this topic being developed! I've practically
> > forgot about it myself, but the need for it has not diminished at all.
> >
It's good to hear you guys are also interested in this feature.
> > I didn't understand much of the conversation, so I'll just reiterate
> > what I would use it for, as a Wayland compositor developer.
> >
> > I added a few more cc's to get better coverage of DRM and Wayland
> > compositor developers.
> >
> > > On Tue, Oct 10, 2023 at 10:01 AM <[email protected]> wrote:
> > > >
> > > > On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
> >
> > ...
> >
> > > > > I don't have a real life use case to configure different trace
> > > > > instance for each callsite.
> > > > > I just tried to be as much flexible as possible.
> > > > >
> > > >
> > > > Ive come around to agree - I looked back at some old threads
> > > > (that I was a part of, and barely remembered :-}
> > > >
> > > > At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> > > > have expressed a desire for a "flight-recorder"
> > > > it'd be hard to say now that 2-3 such buffers would always be enough,
> > > > esp as theres a performance reason for having your own.
> >
> > A Wayland compositor has roughly three important things where the kernel
> > debugs might come in handy:
> > - input
> > - DRM KMS
> > - DRM GPU rendering
> >
> > DRM KMS is the one I've been thinking of in the flight recorder context
> > the most, because KMS hardware varies a lot, and there is plenty of
> > room for both KMS drivers and KMS userspace to go wrong. The usual
> > result is your display doesn't work, so the system is practically
> > unusable to the end user. In the wild, the simplest or maybe the only
> > way out of that may be a reboot, maybe an automated one (e.g. digital
> > signage). In order to debug such problems, we would need both
> > compositor logs and the relevant kernel debug messages.
> >
> > For example, Weston already has a flight recorder framework of its own,
> > so we have the compositor debug logs. It would be useful to get the
> > selected kernel debug logs in the same place. It could be used for
> > automated or semi-manual bug reporting, for example, making the
> > administrator or end user life much easier reporting issues.
> >
> > Since this is usually a production environment, and the Wayland
> > compositor runs without root privileges, we need something that works
> > with that. We would likely want the kernel debug messages in the
> > compositor to combine and order them properly with the compositor debug
> > messages.
> >
> > It's quite likely that developers would like to pick and choose which
> > kernel debug messages might be interesting enough to record, to avoid
> > excessive log flooding. The flight recorder in Weston is fixed size to
> > avoid running out of memory or disk space. I can also see that Weston
> > could have debugging options that affect which kernel debug messages it
> > subscribes to. We can have a reasonable default setup that allows us to
> > pinpoint the problem area and figure out most problems, and if needed,
> > we could ask the administrator pass another debug option to Weston. It
> > helps if there is just one place to configure everything about the
> > compositor.
> >
> > This implies that it would be really nice to have userspace subscriber
> > specific debug message streams from the kernel, or a good way to filter
> > the messages we want. A Wayland compositor would not be interested in
> > file system or wireless debugs for example, but another system
> > component might be. There is also a security aspect of which component is
> > allowed to see which messages in case they could contain anything
> > sensitive (input debug could contain typed passwords).
> >
> > Configuring the kernel debug message selection for our debug message
> > stream can and probably should require elevated privileges, and we can
> > likely solve that in userspace with a daemon or such, to allow the
> > Wayland compositor to run as a regular user.
> >
> > Thinking of desktop systems, and especially physically multi-seat systems:
> > - there can be multiple different Wayland compositors running simultaneously
> > - each of them may want debug messages only from a specific DRM KMS
> > device instance, and not from others
> > - each of them may have a different idea of which debug messages are important
> > - because DRM KMS leasing is a thing, different compositor instances
> > could be using the same DRM KMS device instance simultaneously; since
> > this is specific to DRM KMS, and it should be harmless to get a
> > little too much DRM KMS debug (that is, from the whole device instead
> > of just the leased parts), it may not be worth to consider splitting
> > debug message streams this far.
> >
> > If userspace is offered some standardised fields in kernel debug
> > structures, then userspace could do some filtering on its own too, but I
> > guess it would be better to filter at the source and not need that.
> >
> > There is also an anti-goal. The kernel debug message contents are
> > specifically not machine-parsable. I very much do not want to impose
> > debug strings as ABI, they are for human (and AI?) readers only.
> >
> >
> > As a summary, here are the most important requirements first:
> > - usable in production as a normal thing to enable always by default
> > - final delivery to unprivileged userspace process
>
> I think this is the one that's trickiest, and I don't fully understand why
> you need it. The issues I'm seeing:
>
> - logs tend to leak a lot of kernel internal state that's useful for
> attacks. There's measures for the worst (like obfuscating kernel
> pointers by hashing them), so there's always going to be a difference
> here between what full sysadmin can get and what unpriviledged userspace
> can get. And there's always a risk we miss something that we should
> obfuscate but didn't.
>
> - handing this to userspace increases the risks it becomes uapi. Who's
> going to stop compositors from sussing out the reason an atomic commit
> failed from the logs if they can get them easily, and these logs contain
> very interesting information about why something failed?
>
> Much better if journald or a crash handler assemebles all the different
> flight recorder logs and packages it into a bug report so that the
> compositor cannot ever get at these directly. Yeah this needs some OS
> support with a dbus request or similar so that the compositor can ask
> for a crash dump with everything relevant to its session.
>
This is similar to what we plan to do in ChromeOS. We want to enable
writing debug logs of each subsystem/driver
of interest (e.g. thunderbolt) into a separate trace instance and when
an issue happens and a user submits
a feedback report we will attach the captured logs from trace
instances to the report.
> - the idea of an in-kernel flight recorder is that it's really fast. The
> entire tracing infra is built such that recording an event is really
> quick, but printing it is not - the entire string formatting is delayed
> to when userspace reads the buffers. If you constantly push the log
> messages to userspace we toss the advantage of the low-overhead
> in-kernel flight recorder. If you push logs to dmesg there's a
> substantial measureable overhead which you don't really want in
> production, and your requirement would impose the same.
>
> - I'm not sure how this is supposed to mesh with userspace log aggregators
> like journald when every compositor has it's own flight recorder on top.
> Feels a bit like a solution that ignores the entire os stack and assumes
> that the only pieces we can touch are the kernel and the compositor to
> get to such a flight recorder.
>
> You might object that events will get out-of-order if you merge multiple
> logs after the fact, but that's the case anyway if we use the tracing
> framework since that's always a ringbuffer within the kernel and not
> synchronous. The only thing we could do is allow userspace to push
> markers into that ringbuffer, which is easily done by adding more debug
> output lines (heck we could even add a logging cookie to certain ioctl
> when userspace really cares about knowing exact ordering of it's
> requests with the stuff the kernel does).
>
> - If you really want direct deliver to userspace I guess we could do
> something where sessiond opens the flight recorder fd for you, sets it
> all up and passes it to the compositor. But I'm really not a big fan of
> sending the full kms dbg spam to compositors to freely digest in real
> time.
>
> > - per debug-print selection of messages (finer or coarser, categories
> > within a kernel sub-system could be enough)
> > - per originating device (driver instance) selection of messages
>
> The dyndbg stuff can do all that already, which is why I'm so much in
> favour of relying on that framework.
>
> > - all selections tailored separately for each userspace subscriber
> > (- per open device file description selection of messages)
>
> Again this feels like a userspace problem. Sessions could register what
> kind of info they need for their session, and something like journald can
> figure out how to record it all.
>
> If you want the kernel to keep separate flight recorders I guess we could
> add that, but I don't think it currently exists for the dyndbg stuff at
> least. Maybe a flight recorder v2 feature, once the basics are in.
>
> > That's my idea of it. It is interesting to see how far the requirements
> > can be reasonably realised.
>
> I think aside from the "make it available directly to unpriviledged
> userspace" everything sounds reasonable and doable.
>
> More on the process side of things, I think Jim is very much looking for
> acks and tested-by by people who are interested in better drm logging
> infra. That should help that things are moving in a direction that's
> actually useful, even when it's not yet entirely complete.
>
> Cheers, Sima
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
On Wed, 11 Oct 2023 11:42:24 +0200
Daniel Vetter <[email protected]> wrote:
> On Wed, Oct 11, 2023 at 11:48:16AM +0300, Pekka Paalanen wrote:
> > On Tue, 10 Oct 2023 10:06:02 -0600
> > [email protected] wrote:
> >
> > > since I name-dropped you all,
> >
> > Hi everyone,
> >
> > I'm really happy to see this topic being developed! I've practically
> > forgot about it myself, but the need for it has not diminished at all.
> >
> > I didn't understand much of the conversation, so I'll just reiterate
> > what I would use it for, as a Wayland compositor developer.
> >
> > I added a few more cc's to get better coverage of DRM and Wayland
> > compositor developers.
> >
> > > On Tue, Oct 10, 2023 at 10:01 AM <[email protected]> wrote:
> > > >
> > > > On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
> >
> > ...
> >
> > > > > I don't have a real life use case to configure different trace
> > > > > instance for each callsite.
> > > > > I just tried to be as much flexible as possible.
> > > > >
> > > >
> > > > Ive come around to agree - I looked back at some old threads
> > > > (that I was a part of, and barely remembered :-}
> > > >
> > > > At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> > > > have expressed a desire for a "flight-recorder"
> > > > it'd be hard to say now that 2-3 such buffers would always be enough,
> > > > esp as theres a performance reason for having your own.
> >
> > A Wayland compositor has roughly three important things where the kernel
> > debugs might come in handy:
> > - input
> > - DRM KMS
> > - DRM GPU rendering
> >
> > DRM KMS is the one I've been thinking of in the flight recorder context
> > the most, because KMS hardware varies a lot, and there is plenty of
> > room for both KMS drivers and KMS userspace to go wrong. The usual
> > result is your display doesn't work, so the system is practically
> > unusable to the end user. In the wild, the simplest or maybe the only
> > way out of that may be a reboot, maybe an automated one (e.g. digital
> > signage). In order to debug such problems, we would need both
> > compositor logs and the relevant kernel debug messages.
> >
> > For example, Weston already has a flight recorder framework of its own,
> > so we have the compositor debug logs. It would be useful to get the
> > selected kernel debug logs in the same place. It could be used for
> > automated or semi-manual bug reporting, for example, making the
> > administrator or end user life much easier reporting issues.
> >
> > Since this is usually a production environment, and the Wayland
> > compositor runs without root privileges, we need something that works
> > with that. We would likely want the kernel debug messages in the
> > compositor to combine and order them properly with the compositor debug
> > messages.
> >
> > It's quite likely that developers would like to pick and choose which
> > kernel debug messages might be interesting enough to record, to avoid
> > excessive log flooding. The flight recorder in Weston is fixed size to
> > avoid running out of memory or disk space. I can also see that Weston
> > could have debugging options that affect which kernel debug messages it
> > subscribes to. We can have a reasonable default setup that allows us to
> > pinpoint the problem area and figure out most problems, and if needed,
> > we could ask the administrator pass another debug option to Weston. It
> > helps if there is just one place to configure everything about the
> > compositor.
> >
> > This implies that it would be really nice to have userspace subscriber
> > specific debug message streams from the kernel, or a good way to filter
> > the messages we want. A Wayland compositor would not be interested in
> > file system or wireless debugs for example, but another system
> > component might be. There is also a security aspect of which component is
> > allowed to see which messages in case they could contain anything
> > sensitive (input debug could contain typed passwords).
> >
> > Configuring the kernel debug message selection for our debug message
> > stream can and probably should require elevated privileges, and we can
> > likely solve that in userspace with a daemon or such, to allow the
> > Wayland compositor to run as a regular user.
> >
> > Thinking of desktop systems, and especially physically multi-seat systems:
> > - there can be multiple different Wayland compositors running simultaneously
> > - each of them may want debug messages only from a specific DRM KMS
> > device instance, and not from others
> > - each of them may have a different idea of which debug messages are important
> > - because DRM KMS leasing is a thing, different compositor instances
> > could be using the same DRM KMS device instance simultaneously; since
> > this is specific to DRM KMS, and it should be harmless to get a
> > little too much DRM KMS debug (that is, from the whole device instead
> > of just the leased parts), it may not be worth to consider splitting
> > debug message streams this far.
> >
> > If userspace is offered some standardised fields in kernel debug
> > structures, then userspace could do some filtering on its own too, but I
> > guess it would be better to filter at the source and not need that.
> >
> > There is also an anti-goal. The kernel debug message contents are
> > specifically not machine-parsable. I very much do not want to impose
> > debug strings as ABI, they are for human (and AI?) readers only.
> >
> >
> > As a summary, here are the most important requirements first:
> > - usable in production as a normal thing to enable always by default
> > - final delivery to unprivileged userspace process
>
> I think this is the one that's trickiest, and I don't fully understand why
> you need it.
It's not an arbitrary unprivileged userspace process. It is an
unprivileged userspace process configured by the system administrator
to receive the information.
I would simply like it to be so that you don't need to have a userspace
process running as root shovelling the bulk debug message data
manually, but that the target process can read it directly e.g. by
getting passed an appropriate fd to it.
> The issues I'm seeing:
>
> - logs tend to leak a lot of kernel internal state that's useful for
> attacks. There's measures for the worst (like obfuscating kernel
> pointers by hashing them), so there's always going to be a difference
> here between what full sysadmin can get and what unpriviledged userspace
> can get. And there's always a risk we miss something that we should
> obfuscate but didn't.
>
> - handing this to userspace increases the risks it becomes uapi. Who's
> going to stop compositors from sussing out the reason an atomic commit
> failed from the logs if they can get them easily, and these logs contain
> very interesting information about why something failed?
That problem exists regardless of where in userspace the data goes to.
Compositors could as well get it from journald if they need it. Or from
the same place where 'dmesg' gets its stuff from, with a tiny helper
with the needed access.
> Much better if journald or a crash handler assemebles all the different
> flight recorder logs and packages it into a bug report so that the
> compositor cannot ever get at these directly. Yeah this needs some OS
> support with a dbus request or similar so that the compositor can ask
> for a crash dump with everything relevant to its session.
Ok.
> - the idea of an in-kernel flight recorder is that it's really fast. The
> entire tracing infra is built such that recording an event is really
> quick, but printing it is not - the entire string formatting is delayed
> to when userspace reads the buffers. If you constantly push the log
> messages to userspace we toss the advantage of the low-overhead
> in-kernel flight recorder. If you push logs to dmesg there's a
> substantial measureable overhead which you don't really want in
> production, and your requirement would impose the same.
Right.
> - I'm not sure how this is supposed to mesh with userspace log aggregators
> like journald when every compositor has it's own flight recorder on top.
> Feels a bit like a solution that ignores the entire os stack and assumes
> that the only pieces we can touch are the kernel and the compositor to
> get to such a flight recorder.
>
> You might object that events will get out-of-order if you merge multiple
> logs after the fact, but that's the case anyway if we use the tracing
> framework since that's always a ringbuffer within the kernel and not
> synchronous. The only thing we could do is allow userspace to push
> markers into that ringbuffer, which is easily done by adding more debug
> output lines (heck we could even add a logging cookie to certain ioctl
> when userspace really cares about knowing exact ordering of it's
> requests with the stuff the kernel does).
That's a good point. So we need to agree on the clock to timestamp all
messages in all components, both kernel and userspace, and then use a
log aggregator to interleave logs by timestamp. Is that clock already
defined as UABI?
The compositor flight recorder is a ring buffer too, because it would
be far too much volume to send it all to another process all the time.
>
> - If you really want direct deliver to userspace I guess we could do
> something where sessiond opens the flight recorder fd for you, sets it
> all up and passes it to the compositor. But I'm really not a big fan of
> sending the full kms dbg spam to compositors to freely digest in real
> time.
Well, something has to get the logs in userspace, be that a compositor,
systemd-journald, or another aggregator. And that needs to be available
in production, so not via debugfs. If it does not need too run as root,
even better.
It doesn't need to be real-time all the time, it is enough if it can be
triggered when needed.
> > - per debug-print selection of messages (finer or coarser, categories
> > within a kernel sub-system could be enough)
> > - per originating device (driver instance) selection of messages
>
> The dyndbg stuff can do all that already, which is why I'm so much in
> favour of relying on that framework.
Cool. I have no idea of any frameworks, and no opinion of which one
should be used, anyway.
> > - all selections tailored separately for each userspace subscriber
> > (- per open device file description selection of messages)
>
> Again this feels like a userspace problem. Sessions could register what
> kind of info they need for their session, and something like journald can
> figure out how to record it all.
Only if the kernel actually attaches all the required information to
the debug messages *in machine readable form* so that userspace
actually can do the filtering. And that makes *that* information UABI.
Maybe that's fine? I wouldn't know.
> If you want the kernel to keep separate flight recorders I guess we could
> add that, but I don't think it currently exists for the dyndbg stuff at
> least. Maybe a flight recorder v2 feature, once the basics are in.
Maybe, maybe not necessary, I'm not sure. The thing is, ring buffers
are of fixed size for a reason, they always overwrite oldest messages,
so the volume of messages with ring buffer size determines how long
into the past you can get the messages. Different message sources can
have widely different bandwidth (volume), and different use cases need
logs for different past time periods.
It's really cool if there is no need to copy the same data into
multiple ring buffers, but it is bad to not be able to get logs far
enough back when some unrelated thing is spamming the same ring buffer.
You said that turning the kernel ring buffer contents into strings is a
very heavy operation, so it is not possible to push this scope
separation to userspace, right?
> > That's my idea of it. It is interesting to see how far the requirements
> > can be reasonably realised.
>
> I think aside from the "make it available directly to unpriviledged
> userspace" everything sounds reasonable and doable.
I never said "free for all". I meant specifically chosen unprivileged
(not running as root or with caps) processes.
> More on the process side of things, I think Jim is very much looking for
> acks and tested-by by people who are interested in better drm logging
> infra. That should help that things are moving in a direction that's
> actually useful, even when it's not yet entirely complete.
I'd love to help with that, but I can barely afford to send this email.
Do let me know if my comments are not helping, so I can do other things
instead.
Thanks,
pq
On Thu, Oct 12, 2023 at 11:55:48AM +0300, Pekka Paalanen wrote:
> On Wed, 11 Oct 2023 11:42:24 +0200
> Daniel Vetter <[email protected]> wrote:
>
> > On Wed, Oct 11, 2023 at 11:48:16AM +0300, Pekka Paalanen wrote:
> > > On Tue, 10 Oct 2023 10:06:02 -0600
> > > [email protected] wrote:
> > >
> > > > since I name-dropped you all,
> > >
> > > Hi everyone,
> > >
> > > I'm really happy to see this topic being developed! I've practically
> > > forgot about it myself, but the need for it has not diminished at all.
> > >
> > > I didn't understand much of the conversation, so I'll just reiterate
> > > what I would use it for, as a Wayland compositor developer.
> > >
> > > I added a few more cc's to get better coverage of DRM and Wayland
> > > compositor developers.
> > >
> > > > On Tue, Oct 10, 2023 at 10:01 AM <[email protected]> wrote:
> > > > >
> > > > > On Mon, Oct 9, 2023 at 4:47 PM Łukasz Bartosik <[email protected]> wrote:
> > >
> > > ...
> > >
> > > > > > I don't have a real life use case to configure different trace
> > > > > > instance for each callsite.
> > > > > > I just tried to be as much flexible as possible.
> > > > > >
> > > > >
> > > > > Ive come around to agree - I looked back at some old threads
> > > > > (that I was a part of, and barely remembered :-}
> > > > >
> > > > > At least Sean Paul, Lyude, Simon Ser, Pekka Paalanen
> > > > > have expressed a desire for a "flight-recorder"
> > > > > it'd be hard to say now that 2-3 such buffers would always be enough,
> > > > > esp as theres a performance reason for having your own.
> > >
> > > A Wayland compositor has roughly three important things where the kernel
> > > debugs might come in handy:
> > > - input
> > > - DRM KMS
> > > - DRM GPU rendering
> > >
> > > DRM KMS is the one I've been thinking of in the flight recorder context
> > > the most, because KMS hardware varies a lot, and there is plenty of
> > > room for both KMS drivers and KMS userspace to go wrong. The usual
> > > result is your display doesn't work, so the system is practically
> > > unusable to the end user. In the wild, the simplest or maybe the only
> > > way out of that may be a reboot, maybe an automated one (e.g. digital
> > > signage). In order to debug such problems, we would need both
> > > compositor logs and the relevant kernel debug messages.
> > >
> > > For example, Weston already has a flight recorder framework of its own,
> > > so we have the compositor debug logs. It would be useful to get the
> > > selected kernel debug logs in the same place. It could be used for
> > > automated or semi-manual bug reporting, for example, making the
> > > administrator or end user life much easier reporting issues.
> > >
> > > Since this is usually a production environment, and the Wayland
> > > compositor runs without root privileges, we need something that works
> > > with that. We would likely want the kernel debug messages in the
> > > compositor to combine and order them properly with the compositor debug
> > > messages.
> > >
> > > It's quite likely that developers would like to pick and choose which
> > > kernel debug messages might be interesting enough to record, to avoid
> > > excessive log flooding. The flight recorder in Weston is fixed size to
> > > avoid running out of memory or disk space. I can also see that Weston
> > > could have debugging options that affect which kernel debug messages it
> > > subscribes to. We can have a reasonable default setup that allows us to
> > > pinpoint the problem area and figure out most problems, and if needed,
> > > we could ask the administrator pass another debug option to Weston. It
> > > helps if there is just one place to configure everything about the
> > > compositor.
> > >
> > > This implies that it would be really nice to have userspace subscriber
> > > specific debug message streams from the kernel, or a good way to filter
> > > the messages we want. A Wayland compositor would not be interested in
> > > file system or wireless debugs for example, but another system
> > > component might be. There is also a security aspect of which component is
> > > allowed to see which messages in case they could contain anything
> > > sensitive (input debug could contain typed passwords).
> > >
> > > Configuring the kernel debug message selection for our debug message
> > > stream can and probably should require elevated privileges, and we can
> > > likely solve that in userspace with a daemon or such, to allow the
> > > Wayland compositor to run as a regular user.
> > >
> > > Thinking of desktop systems, and especially physically multi-seat systems:
> > > - there can be multiple different Wayland compositors running simultaneously
> > > - each of them may want debug messages only from a specific DRM KMS
> > > device instance, and not from others
> > > - each of them may have a different idea of which debug messages are important
> > > - because DRM KMS leasing is a thing, different compositor instances
> > > could be using the same DRM KMS device instance simultaneously; since
> > > this is specific to DRM KMS, and it should be harmless to get a
> > > little too much DRM KMS debug (that is, from the whole device instead
> > > of just the leased parts), it may not be worth to consider splitting
> > > debug message streams this far.
> > >
> > > If userspace is offered some standardised fields in kernel debug
> > > structures, then userspace could do some filtering on its own too, but I
> > > guess it would be better to filter at the source and not need that.
> > >
> > > There is also an anti-goal. The kernel debug message contents are
> > > specifically not machine-parsable. I very much do not want to impose
> > > debug strings as ABI, they are for human (and AI?) readers only.
> > >
> > >
> > > As a summary, here are the most important requirements first:
> > > - usable in production as a normal thing to enable always by default
> > > - final delivery to unprivileged userspace process
> >
> > I think this is the one that's trickiest, and I don't fully understand why
> > you need it.
>
> It's not an arbitrary unprivileged userspace process. It is an
> unprivileged userspace process configured by the system administrator
> to receive the information.
>
> I would simply like it to be so that you don't need to have a userspace
> process running as root shovelling the bulk debug message data
> manually, but that the target process can read it directly e.g. by
> getting passed an appropriate fd to it.
Yeah I think an fd that just allows you to get at some ringbuffer is
probably fine.
> > The issues I'm seeing:
> >
> > - logs tend to leak a lot of kernel internal state that's useful for
> > attacks. There's measures for the worst (like obfuscating kernel
> > pointers by hashing them), so there's always going to be a difference
> > here between what full sysadmin can get and what unpriviledged userspace
> > can get. And there's always a risk we miss something that we should
> > obfuscate but didn't.
> >
> > - handing this to userspace increases the risks it becomes uapi. Who's
> > going to stop compositors from sussing out the reason an atomic commit
> > failed from the logs if they can get them easily, and these logs contain
> > very interesting information about why something failed?
>
> That problem exists regardless of where in userspace the data goes to.
> Compositors could as well get it from journald if they need it. Or from
> the same place where 'dmesg' gets its stuff from, with a tiny helper
> with the needed access.
I do think there's shades to this, if we make it trivially easy (like you
can set up the crash recorder with some ioctls on the drmfd) we're much
more likely to that things become uapi than if it's some
configfs/sysfs/whatever thing.
Nothing guarantees anything much, but then not screwing up uapi is largely
a statistical game, some screw up is inevitable, we just try to keep it at
manageable levels.
> > Much better if journald or a crash handler assemebles all the different
> > flight recorder logs and packages it into a bug report so that the
> > compositor cannot ever get at these directly. Yeah this needs some OS
> > support with a dbus request or similar so that the compositor can ask
> > for a crash dump with everything relevant to its session.
>
> Ok.
>
> > - the idea of an in-kernel flight recorder is that it's really fast. The
> > entire tracing infra is built such that recording an event is really
> > quick, but printing it is not - the entire string formatting is delayed
> > to when userspace reads the buffers. If you constantly push the log
> > messages to userspace we toss the advantage of the low-overhead
> > in-kernel flight recorder. If you push logs to dmesg there's a
> > substantial measureable overhead which you don't really want in
> > production, and your requirement would impose the same.
>
> Right.
>
> > - I'm not sure how this is supposed to mesh with userspace log aggregators
> > like journald when every compositor has it's own flight recorder on top.
> > Feels a bit like a solution that ignores the entire os stack and assumes
> > that the only pieces we can touch are the kernel and the compositor to
> > get to such a flight recorder.
> >
> > You might object that events will get out-of-order if you merge multiple
> > logs after the fact, but that's the case anyway if we use the tracing
> > framework since that's always a ringbuffer within the kernel and not
> > synchronous. The only thing we could do is allow userspace to push
> > markers into that ringbuffer, which is easily done by adding more debug
> > output lines (heck we could even add a logging cookie to certain ioctl
> > when userspace really cares about knowing exact ordering of it's
> > requests with the stuff the kernel does).
>
> That's a good point. So we need to agree on the clock to timestamp all
> messages in all components, both kernel and userspace, and then use a
> log aggregator to interleave logs by timestamp. Is that clock already
> defined as UABI?
I think for dmesg it's CLOCK_BOOTTIME for dmesg.
> The compositor flight recorder is a ring buffer too, because it would
> be far too much volume to send it all to another process all the time.
>
> >
> > - If you really want direct deliver to userspace I guess we could do
> > something where sessiond opens the flight recorder fd for you, sets it
> > all up and passes it to the compositor. But I'm really not a big fan of
> > sending the full kms dbg spam to compositors to freely digest in real
> > time.
>
> Well, something has to get the logs in userspace, be that a compositor,
> systemd-journald, or another aggregator. And that needs to be available
> in production, so not via debugfs. If it does not need too run as root,
> even better.
>
> It doesn't need to be real-time all the time, it is enough if it can be
> triggered when needed.
>
> > > - per debug-print selection of messages (finer or coarser, categories
> > > within a kernel sub-system could be enough)
> > > - per originating device (driver instance) selection of messages
> >
> > The dyndbg stuff can do all that already, which is why I'm so much in
> > favour of relying on that framework.
>
> Cool. I have no idea of any frameworks, and no opinion of which one
> should be used, anyway.
>
> > > - all selections tailored separately for each userspace subscriber
> > > (- per open device file description selection of messages)
> >
> > Again this feels like a userspace problem. Sessions could register what
> > kind of info they need for their session, and something like journald can
> > figure out how to record it all.
>
> Only if the kernel actually attaches all the required information to
> the debug messages *in machine readable form* so that userspace
> actually can do the filtering. And that makes *that* information UABI.
> Maybe that's fine? I wouldn't know.
Well if you configure the filters to go into separate ringbuffers for each
session (or whatever you want to split) it also becomes uapi.
Also I'd say that for the first cut just getting the logs out on demand
should be good enough, multi-gpu (or multi-compositor) systems are a step
further. We can figure those out when we get there.
> > If you want the kernel to keep separate flight recorders I guess we could
> > add that, but I don't think it currently exists for the dyndbg stuff at
> > least. Maybe a flight recorder v2 feature, once the basics are in.
>
> Maybe, maybe not necessary, I'm not sure. The thing is, ring buffers
> are of fixed size for a reason, they always overwrite oldest messages,
> so the volume of messages with ring buffer size determines how long
> into the past you can get the messages. Different message sources can
> have widely different bandwidth (volume), and different use cases need
> logs for different past time periods.
>
> It's really cool if there is no need to copy the same data into
> multiple ring buffers, but it is bad to not be able to get logs far
> enough back when some unrelated thing is spamming the same ring buffer.
>
> You said that turning the kernel ring buffer contents into strings is a
> very heavy operation, so it is not possible to push this scope
> separation to userspace, right?
I think it's the kernel that does the formatting, but honestly not sure
how this works with perf traces. Might be that it's actually userspace
doing the formatting later on so that it doesn't incur the overhead while
recording.
> > > That's my idea of it. It is interesting to see how far the requirements
> > > can be reasonably realised.
> >
> > I think aside from the "make it available directly to unpriviledged
> > userspace" everything sounds reasonable and doable.
>
> I never said "free for all". I meant specifically chosen unprivileged
> (not running as root or with caps) processes.
An idea that just crossed my mind is whether we should piggy-pack on top
of the devcoredump infrastructure. Essentially if your userspace has
enough privs to read those, it's probably also ok if it can read driver
side states when things go wrong.
That would also give us a really neat tie-in with the actual device.
> > More on the process side of things, I think Jim is very much looking for
> > acks and tested-by by people who are interested in better drm logging
> > infra. That should help that things are moving in a direction that's
> > actually useful, even when it's not yet entirely complete.
>
> I'd love to help with that, but I can barely afford to send this email.
>
> Do let me know if my comments are not helping, so I can do other things
> instead.
I think it's useful to get some consensus on what we're building.
-Sima
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
On Thu, 12 Oct 2023 11:53:52 +0200
Daniel Vetter <[email protected]> wrote:
> On Thu, Oct 12, 2023 at 11:55:48AM +0300, Pekka Paalanen wrote:
> > On Wed, 11 Oct 2023 11:42:24 +0200
> > Daniel Vetter <[email protected]> wrote:
> >
> > > On Wed, Oct 11, 2023 at 11:48:16AM +0300, Pekka Paalanen wrote:
...
> > > > - all selections tailored separately for each userspace subscriber
> > > > (- per open device file description selection of messages)
> > >
> > > Again this feels like a userspace problem. Sessions could register what
> > > kind of info they need for their session, and something like journald can
> > > figure out how to record it all.
> >
> > Only if the kernel actually attaches all the required information to
> > the debug messages *in machine readable form* so that userspace
> > actually can do the filtering. And that makes *that* information UABI.
> > Maybe that's fine? I wouldn't know.
>
> Well if you configure the filters to go into separate ringbuffers for each
> session (or whatever you want to split) it also becomes uapi.
It's a different UAPI: filter configuration vs. message structure. I
don't mind which it is, I just suspect one is easier to maintain and
extend than the other.
> Also I'd say that for the first cut just getting the logs out on demand
> should be good enough, multi-gpu (or multi-compositor) systems are a step
> further. We can figure those out when we get there.
This reminds me of what you recently said in IRC about a very different
topic:
<sima> swick[m], tell this past me roughly 10 years ago, would
have been easy to add into the design back when there was no
driver code yet
I just want to mention today everything I can see as useful. It's up to
the people doing the actual work to decide what they include and how.
Thanks,
pq
On Thu, Oct 12, 2023 at 01:39:44PM +0300, Pekka Paalanen wrote:
> On Thu, 12 Oct 2023 11:53:52 +0200
> Daniel Vetter <[email protected]> wrote:
>
> > On Thu, Oct 12, 2023 at 11:55:48AM +0300, Pekka Paalanen wrote:
> > > On Wed, 11 Oct 2023 11:42:24 +0200
> > > Daniel Vetter <[email protected]> wrote:
> > >
> > > > On Wed, Oct 11, 2023 at 11:48:16AM +0300, Pekka Paalanen wrote:
>
> ...
>
> > > > > - all selections tailored separately for each userspace subscriber
> > > > > (- per open device file description selection of messages)
> > > >
> > > > Again this feels like a userspace problem. Sessions could register what
> > > > kind of info they need for their session, and something like journald can
> > > > figure out how to record it all.
> > >
> > > Only if the kernel actually attaches all the required information to
> > > the debug messages *in machine readable form* so that userspace
> > > actually can do the filtering. And that makes *that* information UABI.
> > > Maybe that's fine? I wouldn't know.
> >
> > Well if you configure the filters to go into separate ringbuffers for each
> > session (or whatever you want to split) it also becomes uapi.
>
> It's a different UAPI: filter configuration vs. message structure. I
> don't mind which it is, I just suspect one is easier to maintain and
> extend than the other.
>
> > Also I'd say that for the first cut just getting the logs out on demand
> > should be good enough, multi-gpu (or multi-compositor) systems are a step
> > further. We can figure those out when we get there.
>
> This reminds me of what you recently said in IRC about a very different
> topic:
>
> <sima> swick[m], tell this past me roughly 10 years ago, would
> have been easy to add into the design back when there was no
> driver code yet
>
> I just want to mention today everything I can see as useful. It's up to
> the people doing the actual work to decide what they include and how.
I actually pondered this a bit more today, and I think even with hindsight
the atomic design we ended up with was probably rather close to optimal.
Sure there's a bunch of things that would have been nice to include, but
another very hard requirement of atomic was that it's feasible to convert
current drivers over to it. And I think going full free-standing state
structures with unlimited (at least at the design level) queue depth would
have been a bridge too far.
The hacks and conversion helpers are all gone by now, but "you can just
peek at the object struct to get your state" was a huge help in reducing
the conversion churn.
But it definitely resulted in a big price we're still paying.
tldr I don't think getting somewhere useful, even if somewhat deficient,
is bad.
-Sima
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
> If you want the kernel to keep separate flight recorders I guess we could
> add that, but I don't think it currently exists for the dyndbg stuff at
> least. Maybe a flight recorder v2 feature, once the basics are in.
>
dyndbg has +p writes to syslog
+T would separately independently write the same to global trace
This would allow graceful switchover to tracefs,
without removing logging from dmesg, where most folks
(and any monitor tools) would expect it.
Lukas (iiuc) wants to steer each site to just 1 destination.
Or maybe (in addition to +p > syslog) one trace destination,
either global via events, or a separate tracebuf
Im ambivalent, but thinking the smooth rollover from syslog to trace
might be worth having to ease migration / weaning off syslog.
And we have a 4 byte hole in struct _ddebug we could just use.
Unless the align 8 is optional on 32-bits,
I think we're never gonna close the hole anywhere.
is align 8 a generic expression of an architectural simplifying constraint ?
or a need for 1-7 ptr offsets ?
> > That's my idea of it. It is interesting to see how far the requirements
> > can be reasonably realised.
>
> I think aside from the "make it available directly to unpriviledged
> userspace" everything sounds reasonable and doable.
>
> More on the process side of things, I think Jim is very much looking for
> acks and tested-by by people who are interested in better drm logging
> infra. That should help that things are moving in a direction that's
> actually useful, even when it's not yet entirely complete.
>
yes, please. Now posted at
https://lore.kernel.org/lkml/[email protected]/T/#t
Lukas, I managed to miss your email in the send phase.
please consider yourself a direct recipient :-)
thanks everyone
> Cheers, Sima
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
On Thu, 12 Oct 2023 11:53:52 +0200
Daniel Vetter <[email protected]> wrote:
> > You said that turning the kernel ring buffer contents into strings is a
> > very heavy operation, so it is not possible to push this scope
> > separation to userspace, right?
>
> I think it's the kernel that does the formatting, but honestly not sure
> how this works with perf traces. Might be that it's actually userspace
> doing the formatting later on so that it doesn't incur the overhead while
> recording.
perf and trace-cmd do the formatting in user space via libtraceevent:
git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git
It reads the format files of the events:
/sys/kernel/tracing/events/*/*/format
and uses that to read the raw data saved from the kernel into human
readable output.
Note, this means that addresses coming from kernel trace events are not
hashed!
-- Steve
czw., 12 paź 2023 o 20:48 <[email protected]> napisał(a):
>
> > If you want the kernel to keep separate flight recorders I guess we could
> > add that, but I don't think it currently exists for the dyndbg stuff at
> > least. Maybe a flight recorder v2 feature, once the basics are in.
> >
>
> dyndbg has +p writes to syslog
> +T would separately independently write the same to global trace
>
> This would allow graceful switchover to tracefs,
> without removing logging from dmesg, where most folks
> (and any monitor tools) would expect it.
>
> Lukas (iiuc) wants to steer each site to just 1 destination.
> Or maybe (in addition to +p > syslog) one trace destination,
> either global via events, or a separate tracebuf
>
> Im ambivalent, but thinking the smooth rollover from syslog to trace
> might be worth having to ease migration / weaning off syslog.
>
> And we have a 4 byte hole in struct _ddebug we could just use.
I'm glad you brought that up. This means we can leave class_id field
untouched, have separate +T in flags (for consistency)
and dst_id can be easily 8 bits wide.
Also can you point me to the latest version of writing debug logs to
trace events (+T option).
I would like to base trace instances work on that because both are
closely related.
> Unless the align 8 is optional on 32-bits,
I verified with "gcc -g -m32 ..." that the align(8) is honored on 32 bits.
> I think we're never gonna close the hole anywhere.
>
:)
> is align 8 a generic expression of an architectural simplifying constraint ?
> or a need for 1-7 ptr offsets ?
>
>
>
>
> > > That's my idea of it. It is interesting to see how far the requirements
> > > can be reasonably realised.
> >
> > I think aside from the "make it available directly to unpriviledged
> > userspace" everything sounds reasonable and doable.
> >
> > More on the process side of things, I think Jim is very much looking for
> > acks and tested-by by people who are interested in better drm logging
> > infra. That should help that things are moving in a direction that's
> > actually useful, even when it's not yet entirely complete.
> >
>
> yes, please. Now posted at
>
> https://lore.kernel.org/lkml/[email protected]/T/#t
>
> Lukas, I managed to miss your email in the send phase.
> please consider yourself a direct recipient :-)
>
> thanks everyone
>
> > Cheers, Sima
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
adding in Jason, not sure how he got missed.
On Mon, Oct 16, 2023 at 9:13 AM Łukasz Bartosik <[email protected]> wrote:
>
> czw., 12 paź 2023 o 20:48 <[email protected]> napisał(a):
> >
> > > If you want the kernel to keep separate flight recorders I guess we could
> > > add that, but I don't think it currently exists for the dyndbg stuff at
> > > least. Maybe a flight recorder v2 feature, once the basics are in.
> > >
> >
> > dyndbg has +p writes to syslog
> > +T would separately independently write the same to global trace
> >
> > This would allow graceful switchover to tracefs,
> > without removing logging from dmesg, where most folks
> > (and any monitor tools) would expect it.
> >
> > Lukas (iiuc) wants to steer each site to just 1 destination.
> > Or maybe (in addition to +p > syslog) one trace destination,
> > either global via events, or a separate tracebuf
> >
> > Im ambivalent, but thinking the smooth rollover from syslog to trace
> > might be worth having to ease migration / weaning off syslog.
> >
> > And we have a 4 byte hole in struct _ddebug we could just use.
>
> I'm glad you brought that up. This means we can leave class_id field
> untouched, have separate +T in flags (for consistency)
> and dst_id can be easily 8 bits wide.
>
> Also can you point me to the latest version of writing debug logs to
> trace events (+T option).
> I would like to base trace instances work on that because both are
> closely related.
>
Ive got 3 branches, all stacked up on rc6
(last I checked, they were clean on drm-tip)
https://github.com/jimc/linux/tree/dd-fix-7c
the regression fix, reposting this version next.
it also grabs another flag bit: _DPRINTK_FLAGS_INCL_LOOKUP
https://github.com/jimc/linux/tree/dd-shrink-3b
split modname,filename,function to new __dyndbg_sites section
loads the 3 column values and their intervals into 3 maple trees
and implements 3 accessor functions to retrieve the vals
from the descriptor addresses.
it "works" but doesnt erase intervals properly on rmmod
it also claims another flag - _DPRINTK_FLAGS_PREFIX_CACHED
and uses it to mark cached prefix fragment that get created for enabled calls.
https://github.com/jimc/linux/tree/dd-kitchen-sink
this adds the +T flag stuff.
its still a little fuzzy, esp around the descriptor arg -
using it to render the prefix str at buffer-render time
got UNSAFE warnings - likely due to loadable module
descriptors which could or did go away between
capture and render (after rmmod)
> > Unless the align 8 is optional on 32-bits,
>
> I verified with "gcc -g -m32 ..." that the align(8) is honored on 32 bits.
>
this is sorta the opposite of what I was probing, but I think result
is the same.
istm align(8) is only for JUMP_LABEL, nothing else in the struct
appears to need it
so moving it to the top trades the hole for padding.
> > I think we're never gonna close the hole anywhere.
> >
>
> :)
>
> > is align 8 a generic expression of an architectural simplifying constraint ?
> > or a need for 1-7 ptr offsets ?
> >
> >
> >
> >
> > > > That's my idea of it. It is interesting to see how far the requirements
> > > > can be reasonably realised.
> > >
> > > I think aside from the "make it available directly to unpriviledged
> > > userspace" everything sounds reasonable and doable.
> > >
> > > More on the process side of things, I think Jim is very much looking for
> > > acks and tested-by by people who are interested in better drm logging
> > > infra. That should help that things are moving in a direction that's
> > > actually useful, even when it's not yet entirely complete.
> > >
> >
> > yes, please. Now posted at
> >
> > https://lore.kernel.org/lkml/[email protected]/T/#t
> >
> > Lukas, I managed to miss your email in the send phase.
> > please consider yourself a direct recipient :-)
> >
> > thanks everyone
> >
> > > Cheers, Sima
> > > --
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
śr., 18 paź 2023 o 05:08 <[email protected]> napisał(a):
>
> adding in Jason, not sure how he got missed.
>
> On Mon, Oct 16, 2023 at 9:13 AM Łukasz Bartosik <[email protected]> wrote:
> >
> > czw., 12 paź 2023 o 20:48 <[email protected]> napisał(a):
> > >
> > > > If you want the kernel to keep separate flight recorders I guess we could
> > > > add that, but I don't think it currently exists for the dyndbg stuff at
> > > > least. Maybe a flight recorder v2 feature, once the basics are in.
> > > >
> > >
> > > dyndbg has +p writes to syslog
> > > +T would separately independently write the same to global trace
> > >
> > > This would allow graceful switchover to tracefs,
> > > without removing logging from dmesg, where most folks
> > > (and any monitor tools) would expect it.
> > >
> > > Lukas (iiuc) wants to steer each site to just 1 destination.
> > > Or maybe (in addition to +p > syslog) one trace destination,
> > > either global via events, or a separate tracebuf
> > >
> > > Im ambivalent, but thinking the smooth rollover from syslog to trace
> > > might be worth having to ease migration / weaning off syslog.
> > >
> > > And we have a 4 byte hole in struct _ddebug we could just use.
> >
> > I'm glad you brought that up. This means we can leave class_id field
> > untouched, have separate +T in flags (for consistency)
> > and dst_id can be easily 8 bits wide.
> >
> > Also can you point me to the latest version of writing debug logs to
> > trace events (+T option).
> > I would like to base trace instances work on that because both are
> > closely related.
> >
>
> Ive got 3 branches, all stacked up on rc6
> (last I checked, they were clean on drm-tip)
>
> https://github.com/jimc/linux/tree/dd-fix-7c
> the regression fix, reposting this version next.
> it also grabs another flag bit: _DPRINTK_FLAGS_INCL_LOOKUP
>
>
> https://github.com/jimc/linux/tree/dd-shrink-3b
> split modname,filename,function to new __dyndbg_sites section
> loads the 3 column values and their intervals into 3 maple trees
> and implements 3 accessor functions to retrieve the vals
> from the descriptor addresses.
> it "works" but doesnt erase intervals properly on rmmod
> it also claims another flag - _DPRINTK_FLAGS_PREFIX_CACHED
> and uses it to mark cached prefix fragment that get created for enabled calls.
>
> https://github.com/jimc/linux/tree/dd-kitchen-sink
> this adds the +T flag stuff.
> its still a little fuzzy, esp around the descriptor arg -
> using it to render the prefix str at buffer-render time
> got UNSAFE warnings - likely due to loadable module
> descriptors which could or did go away between
> capture and render (after rmmod)
>
Great, I will use commits related to adding +T from this tree as a
baseline for adding trace instances.
>
>
>
> > > Unless the align 8 is optional on 32-bits,
> >
> > I verified with "gcc -g -m32 ..." that the align(8) is honored on 32 bits.
> >
>
> this is sorta the opposite of what I was probing, but I think result
> is the same.
> istm align(8) is only for JUMP_LABEL, nothing else in the struct
> appears to need it
> so moving it to the top trades the hole for padding.
>
Ahh I see what you meant now. Although I have to admit that looking
at the jump label code and static key usage cases
around the kernel code I don't see where align(8) requirement for jump
label comes from.
>
>
> > > I think we're never gonna close the hole anywhere.
> > >
> >
> > :)
> >
> > > is align 8 a generic expression of an architectural simplifying constraint ?
> > > or a need for 1-7 ptr offsets ?
> > >
> > >
> > >
> > >
> > > > > That's my idea of it. It is interesting to see how far the requirements
> > > > > can be reasonably realised.
> > > >
> > > > I think aside from the "make it available directly to unpriviledged
> > > > userspace" everything sounds reasonable and doable.
> > > >
> > > > More on the process side of things, I think Jim is very much looking for
> > > > acks and tested-by by people who are interested in better drm logging
> > > > infra. That should help that things are moving in a direction that's
> > > > actually useful, even when it's not yet entirely complete.
> > > >
> > >
> > > yes, please. Now posted at
> > >
> > > https://lore.kernel.org/lkml/[email protected]/T/#t
> > >
> > > Lukas, I managed to miss your email in the send phase.
> > > please consider yourself a direct recipient :-)
> > >
> > > thanks everyone
> > >
> > > > Cheers, Sima
> > > > --
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch
On 9/15/23 11:48 AM, Łukasz Bartosik wrote:
> Add support for selection of dynamic debug logs destination.
> When enabled it allows to configure destination of each callsite
> individually. The option adds a framework (described by struct
> ddebug_dst_ops) which provides flexible means to add a new destination
> for debug logs. On top of the framework support for trace instance as
> destination is added. By default destination of debug logs is syslog.
> Syslog can be set explicitly by using dst keyword or is selected by
> default when dst keyword is omitted, for example:
>
> localhost ~ # echo -n "module usbcore dst syslog =p" >
> <debugfs>/dynamic_debug/control
> or
>
> localhost ~ # echo -n "module usbcore =p" >
> <debugfs>/dynamic_debug/control
>
> To enable a trace instance as a destination of debug logs a user
> can for example issue the command:
>
> localhost ~ # echo -n "module usbcore dst trace:usb =p" >
> <debugfs>/dynamic_debug/control
Sorry for the late reply here. So I normally thing of the dynamic debug
keywords- func,file,line,etc... as 'selectors'. Like these control which
sites are selected. Whereas the 'dst' keyword is where the output of
what is selected goes to. So it feels like it should be more like something:
echo "module usbcore =T:usb" > <debugfs>/dyanmic_debug/control
And then if there are other flags maybe we need a comma?
echo "module usbcore =T:usb,ptm" > <debugfs>/dyanmic_debug/control
or
echo "module usbcore =ptmT:usb" > <debugfs>/dyanmic_debug/control
And if you just have 'T' like:
echo "module usbcore =ptmT" > <debugfs>/dyanmic_debug/control
That means use the 'default' trace buffer. And 'p' continues to mean the
printk buffer. So you could send lines to both. But you can't send
output to more than one trace buffer (although we could relax that in
the future, if needed with another ':' separator?).
Thanks,
-Jason
>
> The command will write debug logs from usbcore subsystem to the
> trace instance named "usb" and the captured logs can be read
> later with the command:
>
> localhost ~ # cat <debugfs>/tracing/instances/usb/trace
>
> When trace instance name is omitted the debug logs will be written
> to "ddebug_default" trace instance:
>
> localhost ~ # cat <debugfs>/tracing/instances/ddebug_default/trace
>
> The DYNAMIC_DEBUG_DST option adds destination information
> in the dynamic debug control:
>
> localhost ~ # cat <debugfs>/dynamic_debug/control
> drivers/usb/core/usb-acpi.c:143 [usbcore]usb_acpi_set_power_state =p "acpi: power failed to be set\n" trace:usb
> drivers/usb/core/usb-acpi.c:141 [usbcore]usb_acpi_set_power_state =p "acpi: power was set to %d\n" trace:usb
>
> There are two modes of operation for dynamic debug logs destination
> which can be selected during configuration stage of a kernel:
>
> * Dynamic - callsites configuration and destination of debug logs
> can be dynamically changed during runtime. This has slight impact
> on performance because it requires SRCU to be used for synchronization.
>
> * Static - callsites configuration and destination of debug logs
> can be configured only from a kernel boot command line. This option
> hides <debugfs>/dynamic_debug/control interface. It does not require
> SRCU synchronization because of the static configuration. It also
> provides convenient means to capture debug logs on production systems
> for issues which are difficult to reproduce but on the other hand
> do not expose <debugfs>/dynamic_debug/control interface for security
> reasons.
>
> For example to capture thunderbolt subsystem debug logs on production
> system to trace instance named "thunderbolt":
> * enable kernel configuration option CONFIG_DYNAMIC_DEBUG_CORE,
> * enable kernel configuration option DYNAMIC_DEBUG_DST and set its
> operation mode to Static.
> * add the option to the thunderbolt's drivers/thunderbolt/Makefile
> in order to compile in debug logs:
> CFLAGS_nhi.o := -DDYNAMIC_DEBUG_MODULE
> * compile the kernel and update it on a target device,
> * finally append the entry to the kernel boot command line:
> thunderbolt.dyndbg="dst trace:thunderbolt =p"
>
> The DYNAMIC_DEBUG_DST option increases each callsite size by
> the pointer size.
>
> Signed-off-by: Łukasz Bartosik <[email protected]>
> ---
> Changelog v1:
> - initial creation
> ---
> include/linux/dynamic_debug.h | 30 ++
> lib/Kconfig.debug | 103 ++++++
> lib/dynamic_debug.c | 659 +++++++++++++++++++++++++++++++++-
> 3 files changed, 786 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
> index 41682278d2e8..8dee10f66e72 100644
> --- a/include/linux/dynamic_debug.h
> +++ b/include/linux/dynamic_debug.h
> @@ -38,8 +38,18 @@ struct _ddebug {
> #define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
> #define _DPRINTK_FLAGS_INCL_TID (1<<4)
> #define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + /*
> + * The 6th and 7th bits of the flags are used to determine
> + * destination of debug logs, currently supported destinations
> + * are defined in ddebug_dst_type enumeration
> + */
> +#define _DPRINTK_FLAGS_DST_SHIFT 6
> +#define _DPRINTK_FLAGS_DST_MASK (3<<_DPRINTK_FLAGS_DST_SHIFT)
> +#endif
> +
> #define _DPRINTK_FLAGS_INCL_ANY \
> (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
> _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\
> _DPRINTK_FLAGS_INCL_SOURCENAME)
> @@ -54,6 +64,10 @@ struct _ddebug {
> struct static_key_false dd_key_false;
> } key;
> #endif
> +
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + struct ddebug_dst *dst;
> +#endif
> } __attribute__((aligned(8)));
>
> enum class_map_type {
> @@ -280,10 +294,26 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
> _dynamic_func_call(fmt, __dynamic_ibdev_dbg, \
> dev, fmt, ##__VA_ARGS__)
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> +
> +void __print_hex_dump_dst(const struct _ddebug *descriptor, const char *level,
> + const char *prefix_str, int prefix_type, int rowsize,
> + int groupsize, const void *buf, size_t len,
> + bool ascii);
> +
> +#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
> + groupsize, buf, len, ascii) \
> + _dynamic_func_call(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
> + __print_hex_dump_dst, \
> + KERN_DEBUG, prefix_str, prefix_type, \
> + rowsize, groupsize, buf, len, ascii)
> +#else
> +
> #define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
> groupsize, buf, len, ascii) \
> _dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
> print_hex_dump, \
> KERN_DEBUG, prefix_str, prefix_type, \
> rowsize, groupsize, buf, len, ascii)
> +#endif
>
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index f0c30ad83f22..4abfa126bde5 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -188,6 +188,109 @@ config DYNAMIC_DEBUG_CORE
> the case of embedded system where the kernel image size is
> sensitive for people.
>
> +menuconfig DYNAMIC_DEBUG_DST
> + bool "Enable selection of dynamic debug logs destination"
> + depends on DYNAMIC_DEBUG_CORE
> + select TRACING
> + help
> + Add support for selecting destination of dynamic debug logs.
> + When enabled it allows to configure destination of each callsite
> + individually. The option adds a framework (described by struct
> + ddebug_dst_ops) which provides flexible means to add a new destination
> + for debug logs. On top of the framework support for trace instance as
> + destination is added. By default destination of debug logs is syslog.
> + Syslog can be set explicitly by using dst keyword or is selected by
> + default when dst keyword is omitted, for example:
> +
> + localhost ~ # echo -n "module usbcore dst syslog =p" >
> + <debugfs>/dynamic_debug/control
> + or
> +
> + localhost ~ # echo -n "module usbcore =p" >
> + <debugfs>/dynamic_debug/control
> +
> + To enable a trace instance as a destination of debug logs a user
> + can for example issue the command:
> +
> + localhost ~ # echo -n "module usbcore dst trace:usb =p" >
> + <debugfs>/dynamic_debug/control
> +
> + The command will write debug logs from usbcore subsystem to the
> + trace instance named "usb" and the captured logs can be read
> + later with the command:
> +
> + localhost ~ # cat <debugfs>/tracing/instances/usb/trace
> +
> + When trace instance name is omitted the debug logs will be written
> + to "ddebug_default" trace instance:
> +
> + localhost ~ # cat <debugfs>/tracing/instances/ddebug_default/trace
> +
> + The DYNAMIC_DEBUG_DST option adds destination information
> + in the dynamic debug control:
> +
> + localhost ~ # cat <debugfs>/dynamic_debug/control
> + # filename:lineno [module]function flags format destination
> + drivers/usb/core/usb-acpi.c:143 [usbcore]usb_acpi_set_power_state =p "acpi: power failed to be set\n" trace:usb
> + drivers/usb/core/usb-acpi.c:141 [usbcore]usb_acpi_set_power_state =p "acpi: power was set to %d\n" trace:usb
> +
> + The DYNAMIC_DEBUG_DST option increases each callsite size by
> + the pointer size.
> +
> +if DYNAMIC_DEBUG_DST
> +
> +choice
> + prompt "Destination mode"
> + default DYNAMIC_DEBUG_DST_DYNAMIC
> + help
> + There are two modes of operation for dynamic debug logs destination
> + which can be selected during configuration stage of a kernel:
> +
> + * Dynamic - callsites configuration and destination of debug logs
> + can be dynamically changed during runtime. This has slight impact
> + on performance because it requires SRCU to be used for synchronization.
> +
> + * Static - callsites configuration and destination of debug logs
> + can be configured only from a kernel boot command line. This option
> + hides <debugfs>/dynamic_debug/control interface. It does not require
> + SRCU synchronization because of the static configuration. It also
> + provides convenient means to capture debug logs on production systems
> + for issues which are difficult to reproduce and at the same time
> + do not expose <debugfs>/dynamic_debug/control interface for security
> + reasons.
> +
> +config DYNAMIC_DEBUG_DST_DYNAMIC
> + bool "Dynamic"
> + select SRCU
> + help
> + Callsites configuration and destination of debug logs can be
> + dynamically changed during runtime. This has slight impact on
> + performance because it requires SRCU to be used for synchronization.
> +
> + The interface which allows to dynamically change configuration is:
> + <debugfs>/dynamic_debug/control
> +
> +config DYNAMIC_DEBUG_DST_STATIC
> + bool "Static"
> + help
> + Callsites configuration and destination of debug logs can be
> + configured only from a kernel boot command line. This option
> + does not expose <debugfs>/dynamic_debug/control interface and
> + therefore does not require SRCU synchronization. It also provides
> + convenient means to capture debug logs on production systems
> + for issues which are difficult to reproduce and at the same time
> + do not expose <debugfs>/dynamic_debug/control interface
> + for security reasons.
> +
> + For example to configure usbcore module to output its debug logs
> + to trace instance named "usb" the following line would have to be
> + appended to a kernel boot command line:
> +
> + usbcore.dyndbg="dst trace:usb =p"
> +
> +endchoice
> +endif
> +
> config SYMBOLIC_ERRNAME
> bool "Support symbolic error names in printf"
> default y if PRINTK
> diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
> index 009f2ead09c1..0cb69b553554 100644
> --- a/lib/dynamic_debug.c
> +++ b/lib/dynamic_debug.c
> @@ -44,6 +44,87 @@ extern struct _ddebug __stop___dyndbg[];
> extern struct ddebug_class_map __start___dyndbg_classes[];
> extern struct ddebug_class_map __stop___dyndbg_classes[];
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> +#include <linux/trace.h>
> +
> +enum ddebug_dst_type {
> + DDEBUG_DST_SYSLOG, /* destination syslog */
> + DDEBUG_DST_TRACE, /* destination trace instance */
> + /* DDEBUG_DST_NOT_USED_1 */
> + /* DDEBUG_DST_NOT_USED_2 */
> + DDEBUG_DST_MAX,
> +};
> +
> +/*
> + * The structure holds information related to debug logs output
> + * destination other than syslog. Currently only trace instance
> + * is supported.
> + */
> +struct ddebug_dst {
> + unsigned int use_count;
> + enum ddebug_dst_type type;
> + struct ddebug_dst_ops *ops;
> + union {
> + struct {
> + const char *instance;
> + struct trace_array *arr;
> + } trace;
> + };
> +};
> +
> +#ifndef CONFIG_DYNAMIC_DEBUG_DST_STATIC
> +
> +/*
> + * Instead of slicing code with #ifdefs create
> + * wrappers around SRCU functions we use
> + */
> +DEFINE_STATIC_SRCU(dst_srcu);
> +
> +#define dd_srcu_dereference(p, ssp) srcu_dereference(p, ssp)
> +#define dd_rcu_assign_pointer(p, v) rcu_assign_pointer(p, v)
> +
> +static inline int dd_srcu_read_lock(struct srcu_struct *ssp)
> +{
> + return srcu_read_lock(ssp);
> +}
> +
> +static inline void dd_srcu_read_unlock(struct srcu_struct *ssp, int idx)
> +{
> + return srcu_read_unlock(ssp, idx);
> +}
> +
> +static void dd_synchronize_srcu(struct srcu_struct *ssp)
> +{
> + synchronize_srcu(ssp);
> +}
> +
> +#else
> +
> +struct dd_srcu_struct {
> +};
> +
> +static struct dd_srcu_struct dst_srcu;
> +
> +#define dd_srcu_dereference(p, ssp) (p)
> +#define dd_rcu_assign_pointer(p, v) ((p) = (v))
> +
> +static inline int dd_srcu_read_lock(struct dd_srcu_struct *ssp)
> +{
> + return 0;
> +}
> +
> +static inline void dd_srcu_read_unlock(struct dd_srcu_struct *ssp, int idx)
> +{
> +}
> +
> +void dd_synchronize_srcu(struct dd_srcu_struct *ssp)
> +{
> +}
> +
> +#endif /* CONFIG_DYNAMIC_DEBUG_DST_STATIC */
> +
> +#endif /* CONFIG_DYNAMIC_DEBUG_DST */
> +
> struct ddebug_table {
> struct list_head link, maps;
> const char *mod_name;
> @@ -58,6 +139,7 @@ struct ddebug_query {
> const char *format;
> const char *class_string;
> unsigned int first_lineno, last_lineno;
> + struct ddebug_dst *dst;
> };
>
> struct ddebug_iter {
> @@ -126,10 +208,402 @@ do { \
> #define v3pr_info(fmt, ...) vnpr_info(3, fmt, ##__VA_ARGS__)
> #define v4pr_info(fmt, ...) vnpr_info(4, fmt, ##__VA_ARGS__)
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> +
> +static inline enum ddebug_dst_type get_dst_type(const struct _ddebug *dp)
> +{
> + return (dp->flags & _DPRINTK_FLAGS_DST_MASK) >>
> + _DPRINTK_FLAGS_DST_SHIFT;
> +}
> +
> +static void set_dst_type(unsigned int *newflags,
> + enum ddebug_dst_type type)
> +{
> + *newflags &= ~_DPRINTK_FLAGS_DST_MASK;
> + *newflags |= (type << _DPRINTK_FLAGS_DST_SHIFT);
> +}
> +
> +#define DST_SYSLOG_STR "syslog"
> +#define DST_TRACE_STR "trace"
> +#define DST_INVALID_STR "invalid"
> +#define DST_DST_STR "destination"
> +
> +struct dstbuf {
> +#define DST_DESC_MAX_SIZE 32
> + char buf[DST_DESC_MAX_SIZE];
> +};
> +
> +static char *describe_dst_type(const enum ddebug_dst_type type)
> +{
> + switch (type) {
> + case DDEBUG_DST_SYSLOG:
> + return DST_SYSLOG_STR;
> + case DDEBUG_DST_TRACE:
> + return DST_TRACE_STR;
> + default:
> + return DST_INVALID_STR;
> + }
> +}
> +
> +static char *ddebug_describe_dst(const struct ddebug_dst *dst,
> + struct dstbuf *dstb)
> +{
> + if (!dst)
> + return DST_SYSLOG_STR;
> +
> + switch (dst->type) {
> + case DDEBUG_DST_SYSLOG:
> + return DST_SYSLOG_STR;
> + case DDEBUG_DST_TRACE:
> + snprintf(dstb->buf, DST_DESC_MAX_SIZE, "%s:%s",
> + DST_TRACE_STR, dst->trace.instance);
> + break;
> + default:
> + return DST_INVALID_STR;
> + }
> +
> + return dstb->buf;
> +}
> +
> +static char *ddebug_describe_dst_dp(struct _ddebug *dp, struct dstbuf *dstb)
> +{
> + if (!get_dst_type(dp))
> + return DST_SYSLOG_STR;
> +
> + return ddebug_describe_dst(dp->dst, dstb);
> +}
> +
> +/*
> + * struct ddebug_dst_ops - dynamic debug logs destination operations
> + *
> + * @parse_and_init - parses and initializes destination
> + * @has_changed - checks whether destination has changed for a callsite
> + * @print - prints message for a callsite
> + * @dev_print - prints device message for a callsite
> + * @free - frees memory associated with a destination
> + */
> +struct ddebug_dst_ops {
> + int (*parse_and_init)(struct ddebug_query *query, const char *arg);
> + bool (*has_changed)(const struct ddebug_query *query,
> + const struct _ddebug *dp);
> + void (*print)(const struct ddebug_dst *dst, const char *fmt, ...);
> + void (*dev_print)(const struct ddebug_dst *dst,
> + const struct device *dev, const char *fmt, ...);
> + void (*free)(struct ddebug_dst *dst, int arg);
> +};
> +
> +static int tr_parse_and_init(struct ddebug_query *query,
> + const char *arg)
> +{
> + char *instance = strchr(arg, ':');
> + struct trace_array *arr;
> + int ret = -EINVAL;
> +
> + if (instance)
> + instance++;
> + else
> + instance = "ddebug_default";
> +
> + arr = trace_array_get_by_name(instance);
> + if (!arr)
> + goto err;
> +
> + ret = trace_array_init_printk(arr);
> + if (ret)
> + goto err_trace;
> +
> + query->dst->trace.instance = kstrdup(instance, GFP_KERNEL);
> + if (!query->dst->trace.instance)
> + goto err_trace;
> +
> + query->dst->trace.arr = arr;
> +
> + v3pr_info("parsed trace dst: instance name=%s, arrray=%p\n",
> + query->dst->trace.instance, arr);
> + return 0;
> +err_trace:
> + trace_array_put(arr);
> + trace_array_destroy(arr);
> +err:
> + return ret;
> +}
> +
> +bool tr_has_changed(const struct ddebug_query *query, const struct _ddebug *dp)
> +{
> + if (!dp->dst)
> + return true;
> + else if (dp->dst->type != DDEBUG_DST_TRACE)
> + return true;
> +
> + return query->dst->trace.arr != dp->dst->trace.arr;
> +}
> +
> +__printf(2, 3)
> +static void tr_print(const struct ddebug_dst *dst, const char *fmt, ...)
> +{
> + va_list args;
> + struct va_format vaf;
> +
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> +
> + trace_array_printk(dst->trace.arr, _THIS_IP_, "%pV", &vaf);
> +
> + va_end(args);
> +}
> +
> +__printf(3, 4)
> +static void tr_dev_print(const struct ddebug_dst *dst,
> + const struct device *dev, const char *fmt, ...)
> +{
> + va_list args;
> + struct va_format vaf;
> +
> + va_start(args, fmt);
> + vaf.fmt = fmt;
> + vaf.va = &args;
> +
> + trace_array_printk(dst->trace.arr, _THIS_IP_, "%pV", &vaf);
> + va_end(args);
> +}
> +
> +static void tr_free(struct ddebug_dst *dst, int tr_arr_destroy)
> +{
> + WARN_ON(!dst);
> +
> + v2pr_info("freed dst: type=trace, instance=%s\n", dst->trace.instance);
> +
> + kfree(dst->trace.instance);
> + dst->trace.instance = NULL;
> +
> + trace_array_put(dst->trace.arr);
> + /*
> + * destroy trace instance in case when query was not applied to any
> + * of the callsites, otherwise don't try to destroy it because it will
> + * be removed from sysfs together with captured debug logs, let a user
> + * to delete it manually later at convenient time
> + */
> + if (tr_arr_destroy)
> + trace_array_destroy(dst->trace.arr);
> + dst->trace.arr = NULL;
> +}
> +
> +bool syslog_has_changed(const struct ddebug_query *query,
> + const struct _ddebug *dp)
> +{
> + if (!dp->dst)
> + return false;
> +
> + return dp->dst->type != DDEBUG_DST_SYSLOG;
> +}
> +
> +static void syslog_free(struct ddebug_dst *dst, int arg)
> +{
> + v2pr_info("freed dst: type=syslog\n");
> +}
> +
> +static struct ddebug_dst_ops dst_ops[] = {
> + // syslog destination ops
> + {
> + .parse_and_init = NULL,
> + .has_changed = syslog_has_changed,
> + .print = NULL,
> + .dev_print = NULL,
> + .free = syslog_free,
> + },
> + // trace destination ops
> + {
> + .parse_and_init = tr_parse_and_init,
> + .has_changed = tr_has_changed,
> + .print = tr_print,
> + .dev_print = tr_dev_print,
> + .free = tr_free,
> + },
> +};
> +
> +static int dst_parse_and_init(struct ddebug_query *query, const char *arg)
> +{
> + int ret = -EINVAL;
> +
> + if (!arg)
> + goto err;
> +
> + if (query->dst)
> + goto err;
> +
> + query->dst = kzalloc(sizeof(struct ddebug_dst), GFP_KERNEL);
> + if (!query->dst) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + if (!strcmp(arg, "syslog")) {
> + query->dst->type = DDEBUG_DST_SYSLOG;
> + query->dst->ops = &dst_ops[DDEBUG_DST_SYSLOG];
> + } else if (!strcmp(arg, "trace") ||
> + !strncmp(arg, "trace:", 6)) {
> + query->dst->type = DDEBUG_DST_TRACE;
> + query->dst->ops = &dst_ops[DDEBUG_DST_TRACE];
> + } else
> + goto err_mem;
> +
> + if (query->dst->ops->parse_and_init) {
> + ret = query->dst->ops->parse_and_init(query, arg);
> + if (ret)
> + goto err_mem;
> + }
> +
> + v3pr_info("parsed dst: type=%s\n", describe_dst_type(query->dst->type));
> + return 0;
> +err_mem:
> + kfree(query->dst);
> +err:
> + return ret;
> +}
> +
> +static void dst_apply_change(const struct ddebug_query *query,
> + struct _ddebug *dp)
> +{
> + /* store old destination pointer */
> + struct ddebug_dst *old_dst = dp->dst;
> + struct ddebug_dst *new_dst;
> +
> + /* increase use count */
> + query->dst->use_count++;
> +
> + /*
> + * syslog destination does not require additional description
> + * however we use it to simplify control path processing
> + */
> + new_dst = (query->dst->type == DDEBUG_DST_SYSLOG) ? NULL : query->dst;
> +
> + /* update destination pointer */
> + dd_rcu_assign_pointer(dp->dst, new_dst);
> +
> + if (!old_dst)
> + return;
> +
> + old_dst->use_count--;
> + /* check if old destination pointer is still being used */
> + if (!old_dst->use_count) {
> + /*
> + * wait for all read rcu critical sections
> + * in progress to finish
> + */
> + dd_synchronize_srcu(&dst_srcu);
> + /* call destination specific free function */
> + old_dst->ops->free(old_dst, 0);
> + /* free old destination pointer */
> + kfree(old_dst);
> + }
> +}
> +
> +static void dst_try_free(struct ddebug_query *query)
> +{
> + if (!query)
> + return;
> +
> + /* free destination if it wasn't applied to any callsite */
> + if (!query->dst->use_count) {
> + if (query->dst->ops->free)
> + query->dst->ops->free(query->dst, 1);
> + /*
> + * for syslog we always free its destination because
> + * it is used only to simplify control path processing
> + */
> + if (query->dst->type)
> + v2pr_info("freed dst: didn't apply to any of the callsites\n");
> + kfree(query->dst);
> + query->dst = NULL;
> + }
> +}
> +
> +void __print_hex_dump_dst(const struct _ddebug *descriptor, const char *level,
> + const char *prefix_str, int prefix_type,
> + int rowsize, int groupsize, const void *buf,
> + size_t len, bool ascii)
> +{
> + if (get_dst_type(descriptor)) {
> + const u8 *ptr = buf;
> + struct ddebug_dst *dst;
> + int i, linelen, remaining = len, idx;
> + unsigned char linebuf[32 * 3 + 2 + 32 + 1];
> +
> + if (rowsize != 16 && rowsize != 32)
> + rowsize = 16;
> +
> + idx = dd_srcu_read_lock(&dst_srcu);
> + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> + WARN_ON(!dst);
> + for (i = 0; i < len; i += rowsize) {
> + linelen = min(remaining, rowsize);
> + remaining -= rowsize;
> +
> + hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
> + linebuf, sizeof(linebuf), ascii);
> +
> + switch (prefix_type) {
> + case DUMP_PREFIX_ADDRESS:
> + dst->ops->print(descriptor->dst, "%s%s%p: %s\n",
> + level, prefix_str, ptr + i, linebuf);
> + break;
> + case DUMP_PREFIX_OFFSET:
> + dst->ops->print(descriptor->dst, "%s%s%.8x: %s\n",
> + level, prefix_str, i, linebuf);
> + break;
> + default:
> + dst->ops->print(descriptor->dst, "%s%s%s\n",
> + level, prefix_str, linebuf);
> + break;
> + }
> + }
> +
> + dd_srcu_read_unlock(&dst_srcu, idx);
> + } else
> + print_hex_dump(level, prefix_str, prefix_type, rowsize,
> + groupsize, buf, len, ascii);
> +}
> +EXPORT_SYMBOL(__print_hex_dump_dst);
> +
> +#else
> +
> +struct ddebug_dst {
> +};
> +
> +struct dstbuf {
> +};
> +
> +static inline unsigned int get_dst_type(const struct _ddebug *dp)
> +{
> + return 0;
> +}
> +
> +static char *ddebug_describe_dst(const struct ddebug_dst *dst,
> + struct dstbuf *dstb)
> +{
> + return "";
> +}
> +
> +static char *ddebug_describe_dst_dp(struct _ddebug *dp,
> + struct dstbuf *dstb)
> +{
> + return "";
> +}
> +
> +static void dst_try_free(struct ddebug_query *query)
> +{
> +}
> +
> +#define DST_DST_STR ""
> +
> +#endif /* CONFIG_DYNAMIC_DEBUG_DST */
> +
> static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
> {
> /* trim any trailing newlines */
> int fmtlen = 0;
> + struct dstbuf dstb;
>
> if (query->format) {
> fmtlen = strlen(query->format);
> @@ -137,13 +611,14 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
> fmtlen--;
> }
>
> - v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s\n",
> + v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s %s=%s\n",
> msg,
> query->function ?: "",
> query->filename ?: "",
> query->module ?: "",
> fmtlen, query->format ?: "",
> - query->first_lineno, query->last_lineno, query->class_string);
> + query->first_lineno, query->last_lineno, query->class_string,
> + DST_DST_STR, ddebug_describe_dst(query->dst, &dstb));
> }
>
> static struct ddebug_class_map *ddebug_find_valid_class(struct ddebug_table const *dt,
> @@ -178,8 +653,12 @@ static int ddebug_change(const struct ddebug_query *query,
> unsigned int newflags;
> unsigned int nfound = 0;
> struct flagsbuf fbuf, nbuf;
> + struct dstbuf dstbc, dstbn;
> struct ddebug_class_map *map = NULL;
> int __outvar valid_class;
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + struct ddebug_dst_ops *dst_ops = query->dst->ops;
> +#endif
>
> /* search for matching ddebugs */
> mutex_lock(&ddebug_lock);
> @@ -243,8 +722,19 @@ static int ddebug_change(const struct ddebug_query *query,
> nfound++;
>
> newflags = (dp->flags & modifiers->mask) | modifiers->flags;
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + /* set destination type in newflags */
> + set_dst_type(&newflags, query->dst->type);
> + if (newflags == dp->flags &&
> + (!dst_ops->has_changed ||
> + !dst_ops->has_changed(query, dp)))
> + /* nothing changed */
> + continue;
> +#else
> if (newflags == dp->flags)
> continue;
> +#endif
> +
> #ifdef CONFIG_JUMP_LABEL
> if (dp->flags & _DPRINTK_FLAGS_PRINT) {
> if (!(newflags & _DPRINTK_FLAGS_PRINT))
> @@ -253,14 +743,24 @@ static int ddebug_change(const struct ddebug_query *query,
> static_branch_enable(&dp->key.dd_key_true);
> }
> #endif
> - v4pr_info("changed %s:%d [%s]%s %s => %s\n",
> + v4pr_info("changed %s:%d [%s]%s %s %s => %s %s\n",
> trim_prefix(dp->filename), dp->lineno,
> dt->mod_name, dp->function,
> ddebug_describe_flags(dp->flags, &fbuf),
> - ddebug_describe_flags(newflags, &nbuf));
> + ddebug_describe_dst_dp(dp, &dstbc),
> + ddebug_describe_flags(newflags, &nbuf),
> + ddebug_describe_dst(query->dst, &dstbn));
> dp->flags = newflags;
> +
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + if (dst_ops->has_changed &&
> + dst_ops->has_changed(query, dp))
> + /* destination changed, apply it */
> + dst_apply_change(query, dp);
> +#endif
> }
> }
> +
> mutex_unlock(&ddebug_lock);
>
> if (!nfound && verbose)
> @@ -456,6 +956,12 @@ static int ddebug_parse_query(char *words[], int nwords,
> return -EINVAL;
> } else if (!strcmp(keyword, "class")) {
> rc = check_set(&query->class_string, arg, "class");
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + } else if (!strcmp(keyword, "dst")) {
> + rc = dst_parse_and_init(query, arg);
> + if (rc)
> + return rc;
> +#endif
> } else {
> pr_err("unknown keyword \"%s\"\n", keyword);
> return -EINVAL;
> @@ -463,6 +969,15 @@ static int ddebug_parse_query(char *words[], int nwords,
> if (rc)
> return rc;
> }
> +
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + /* set destination to syslog by default if it was not explicitly set */
> + if (!query->dst) {
> + rc = dst_parse_and_init(query, "syslog");
> + if (rc)
> + return rc;
> + }
> +#endif
> if (!query->module && modname)
> /*
> * support $modname.dyndbg=<multiple queries>, when
> @@ -549,11 +1064,14 @@ static int ddebug_exec_query(char *query_string, const char *modname)
> }
> if (ddebug_parse_query(words, nwords-1, &query, modname)) {
> pr_err("query parse failed\n");
> + dst_try_free(&query);
> return -EINVAL;
> }
> +
> /* actually go and implement the change */
> nfound = ddebug_change(&query, &modifiers);
> vpr_info_dq(&query, nfound ? "applied" : "no-match");
> + dst_try_free(&query);
>
> return nfound;
> }
> @@ -856,6 +1374,10 @@ static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
>
> void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
> {
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + struct ddebug_dst *dst;
> + int idx;
> +#endif
> va_list args;
> struct va_format vaf;
> char buf[PREFIX_SIZE] = "";
> @@ -868,6 +1390,21 @@ void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
> vaf.fmt = fmt;
> vaf.va = &args;
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + if (!get_dst_type(descriptor))
> + goto syslog;
> +
> + idx = dd_srcu_read_lock(&dst_srcu);
> + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> + WARN_ON(!dst);
> + dst->ops->print(dst, "%s%pV", dynamic_emit_prefix(descriptor, buf),
> + &vaf);
> +
> + dd_srcu_read_unlock(&dst_srcu, idx);
> + va_end(args);
> + return;
> +syslog:
> +#endif
> printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
>
> va_end(args);
> @@ -877,6 +1414,10 @@ EXPORT_SYMBOL(__dynamic_pr_debug);
> void __dynamic_dev_dbg(struct _ddebug *descriptor,
> const struct device *dev, const char *fmt, ...)
> {
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + struct ddebug_dst *dst;
> + int idx;
> +#endif
> struct va_format vaf;
> va_list args;
>
> @@ -888,6 +1429,29 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor,
> vaf.fmt = fmt;
> vaf.va = &args;
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + if (!get_dst_type(descriptor))
> + goto syslog;
> +
> + idx = dd_srcu_read_lock(&dst_srcu);
> + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> + WARN_ON(!dst);
> + if (!dev) {
> + dst->ops->print(dst, "(NULL device *): %pV", &vaf);
> + } else {
> + char buf[PREFIX_SIZE] = "";
> +
> + dst->ops->dev_print(dst, dev, "%s%s %s: %pV",
> + dynamic_emit_prefix(descriptor, buf),
> + dev_driver_string(dev), dev_name(dev),
> + &vaf);
> + }
> +
> + dd_srcu_read_unlock(&dst_srcu, idx);
> + va_end(args);
> + return;
> +syslog:
> +#endif
> if (!dev) {
> printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
> } else {
> @@ -908,6 +1472,10 @@ EXPORT_SYMBOL(__dynamic_dev_dbg);
> void __dynamic_netdev_dbg(struct _ddebug *descriptor,
> const struct net_device *dev, const char *fmt, ...)
> {
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + struct ddebug_dst *dst;
> + int idx;
> +#endif
> struct va_format vaf;
> va_list args;
>
> @@ -919,6 +1487,35 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
> vaf.fmt = fmt;
> vaf.va = &args;
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + if (!get_dst_type(descriptor))
> + goto syslog;
> +
> + idx = dd_srcu_read_lock(&dst_srcu);
> + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> + WARN_ON(!dst);
> + if (dev && dev->dev.parent) {
> + char buf[PREFIX_SIZE] = "";
> +
> + dst->ops->dev_print(dst, dev->dev.parent,
> + "%s%s %s %s%s: %pV",
> + dynamic_emit_prefix(descriptor, buf),
> + dev_driver_string(dev->dev.parent),
> + dev_name(dev->dev.parent),
> + netdev_name(dev), netdev_reg_state(dev),
> + &vaf);
> + } else if (dev) {
> + dst->ops->print(dst, "%s%s: %pV", netdev_name(dev),
> + netdev_reg_state(dev), &vaf);
> + } else {
> + dst->ops->print(dst, "(NULL net_device *): %pV", &vaf);
> + }
> +
> + dd_srcu_read_unlock(&dst_srcu, idx);
> + va_end(args);
> + return;
> +syslog:
> +#endif
> if (dev && dev->dev.parent) {
> char buf[PREFIX_SIZE] = "";
>
> @@ -947,6 +1544,10 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg);
> void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
> const struct ib_device *ibdev, const char *fmt, ...)
> {
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + struct ddebug_dst *dst;
> + int idx;
> +#endif
> struct va_format vaf;
> va_list args;
>
> @@ -955,6 +1556,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
> vaf.fmt = fmt;
> vaf.va = &args;
>
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + if (!get_dst_type(descriptor))
> + goto syslog;
> +
> + idx = dd_srcu_read_lock(&dst_srcu);
> + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> + WARN_ON(!dst);
> + if (ibdev && ibdev->dev.parent) {
> + char buf[PREFIX_SIZE] = "";
> +
> + dst->ops->dev_print(dst, ibdev->dev.parent,
> + "%s%s %s %s: %pV",
> + dynamic_emit_prefix(descriptor, buf),
> + dev_driver_string(ibdev->dev.parent),
> + dev_name(ibdev->dev.parent),
> + dev_name(&ibdev->dev),
> + &vaf);
> + } else if (ibdev) {
> + dst->ops->print(dst, "%s: %pV", dev_name(&ibdev->dev), &vaf);
> + } else {
> + dst->ops->print(dst, "(NULL ib_device): %pV", &vaf);
> + }
> +
> + dd_srcu_read_unlock(&dst_srcu, idx);
> + va_end(args);
> + return;
> +syslog:
> +#endif
> if (ibdev && ibdev->dev.parent) {
> char buf[PREFIX_SIZE] = "";
>
> @@ -1127,11 +1756,16 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
> struct ddebug_iter *iter = m->private;
> struct _ddebug *dp = p;
> struct flagsbuf flags;
> + struct dstbuf dstb;
> char const *class;
>
> if (p == SEQ_START_TOKEN) {
> - seq_puts(m,
> - "# filename:lineno [module]function flags format\n");
> + seq_puts(m, "# filename:lineno [module]function flags format");
> +#if defined CONFIG_DYNAMIC_DEBUG_DST
> + seq_puts(m, " destination\n");
> +#else
> + seq_puts(m, "\n");
> +#endif
> return 0;
> }
>
> @@ -1149,6 +1783,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
> else
> seq_printf(m, " class unknown, _id:%d", dp->class_id);
> }
> + seq_printf(m, " %s", ddebug_describe_dst_dp(dp, &dstb));
> seq_puts(m, "\n");
>
> return 0;
> @@ -1351,6 +1986,9 @@ static void ddebug_remove_all_tables(void)
>
> static __initdata int ddebug_init_success;
>
> +
> +#ifndef CONFIG_DYNAMIC_DEBUG_DST_STATIC
> +
> static int __init dynamic_debug_init_control(void)
> {
> struct proc_dir_entry *procfs_dir;
> @@ -1374,6 +2012,15 @@ static int __init dynamic_debug_init_control(void)
> return 0;
> }
>
> +#else
> +
> +static int __init dynamic_debug_init_control(void)
> +{
> + return 0;
> +}
> +
> +#endif
> +
> static int __init dynamic_debug_init(void)
> {
> struct _ddebug *iter, *iter_mod_start;
wt., 24 paź 2023 o 18:24 Jason Baron <[email protected]> napisał(a):
>
>
>
> On 9/15/23 11:48 AM, Łukasz Bartosik wrote:
> > Add support for selection of dynamic debug logs destination.
> > When enabled it allows to configure destination of each callsite
> > individually. The option adds a framework (described by struct
> > ddebug_dst_ops) which provides flexible means to add a new destination
> > for debug logs. On top of the framework support for trace instance as
> > destination is added. By default destination of debug logs is syslog.
> > Syslog can be set explicitly by using dst keyword or is selected by
> > default when dst keyword is omitted, for example:
> >
> > localhost ~ # echo -n "module usbcore dst syslog =p" >
> > <debugfs>/dynamic_debug/control
> > or
> >
> > localhost ~ # echo -n "module usbcore =p" >
> > <debugfs>/dynamic_debug/control
> >
> > To enable a trace instance as a destination of debug logs a user
> > can for example issue the command:
> >
> > localhost ~ # echo -n "module usbcore dst trace:usb =p" >
> > <debugfs>/dynamic_debug/control
>
>
> Sorry for the late reply here. So I normally thing of the dynamic debug
> keywords- func,file,line,etc... as 'selectors'. Like these control which
> sites are selected. Whereas the 'dst' keyword is where the output of
> what is selected goes to. So it feels like it should be more like something:
>
> echo "module usbcore =T:usb" > <debugfs>/dyanmic_debug/control
>
> And then if there are other flags maybe we need a comma?
>
> echo "module usbcore =T:usb,ptm" > <debugfs>/dyanmic_debug/control
>
> or
>
> echo "module usbcore =ptmT:usb" > <debugfs>/dyanmic_debug/control
>
> And if you just have 'T' like:
>
> echo "module usbcore =ptmT" > <debugfs>/dyanmic_debug/control
>
> That means use the 'default' trace buffer. And 'p' continues to mean the
> printk buffer. So you could send lines to both. But you can't send
> output to more than one trace buffer (although we could relax that in
> the future, if needed with another ':' separator?).
>
Thanks for the comment. I will follow your suggestions.
Lukasz
> Thanks,
>
> -Jason
>
> >
> > The command will write debug logs from usbcore subsystem to the
> > trace instance named "usb" and the captured logs can be read
> > later with the command:
> >
> > localhost ~ # cat <debugfs>/tracing/instances/usb/trace
> >
> > When trace instance name is omitted the debug logs will be written
> > to "ddebug_default" trace instance:
> >
> > localhost ~ # cat <debugfs>/tracing/instances/ddebug_default/trace
> >
> > The DYNAMIC_DEBUG_DST option adds destination information
> > in the dynamic debug control:
> >
> > localhost ~ # cat <debugfs>/dynamic_debug/control
> > drivers/usb/core/usb-acpi.c:143 [usbcore]usb_acpi_set_power_state =p "acpi: power failed to be set\n" trace:usb
> > drivers/usb/core/usb-acpi.c:141 [usbcore]usb_acpi_set_power_state =p "acpi: power was set to %d\n" trace:usb
> >
> > There are two modes of operation for dynamic debug logs destination
> > which can be selected during configuration stage of a kernel:
> >
> > * Dynamic - callsites configuration and destination of debug logs
> > can be dynamically changed during runtime. This has slight impact
> > on performance because it requires SRCU to be used for synchronization.
> >
> > * Static - callsites configuration and destination of debug logs
> > can be configured only from a kernel boot command line. This option
> > hides <debugfs>/dynamic_debug/control interface. It does not require
> > SRCU synchronization because of the static configuration. It also
> > provides convenient means to capture debug logs on production systems
> > for issues which are difficult to reproduce but on the other hand
> > do not expose <debugfs>/dynamic_debug/control interface for security
> > reasons.
> >
> > For example to capture thunderbolt subsystem debug logs on production
> > system to trace instance named "thunderbolt":
> > * enable kernel configuration option CONFIG_DYNAMIC_DEBUG_CORE,
> > * enable kernel configuration option DYNAMIC_DEBUG_DST and set its
> > operation mode to Static.
> > * add the option to the thunderbolt's drivers/thunderbolt/Makefile
> > in order to compile in debug logs:
> > CFLAGS_nhi.o := -DDYNAMIC_DEBUG_MODULE
> > * compile the kernel and update it on a target device,
> > * finally append the entry to the kernel boot command line:
> > thunderbolt.dyndbg="dst trace:thunderbolt =p"
> >
> > The DYNAMIC_DEBUG_DST option increases each callsite size by
> > the pointer size.
> >
> > Signed-off-by: Łukasz Bartosik <[email protected]>
> > ---
> > Changelog v1:
> > - initial creation
> > ---
> > include/linux/dynamic_debug.h | 30 ++
> > lib/Kconfig.debug | 103 ++++++
> > lib/dynamic_debug.c | 659 +++++++++++++++++++++++++++++++++-
> > 3 files changed, 786 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
> > index 41682278d2e8..8dee10f66e72 100644
> > --- a/include/linux/dynamic_debug.h
> > +++ b/include/linux/dynamic_debug.h
> > @@ -38,8 +38,18 @@ struct _ddebug {
> > #define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
> > #define _DPRINTK_FLAGS_INCL_TID (1<<4)
> > #define _DPRINTK_FLAGS_INCL_SOURCENAME (1<<5)
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + /*
> > + * The 6th and 7th bits of the flags are used to determine
> > + * destination of debug logs, currently supported destinations
> > + * are defined in ddebug_dst_type enumeration
> > + */
> > +#define _DPRINTK_FLAGS_DST_SHIFT 6
> > +#define _DPRINTK_FLAGS_DST_MASK (3<<_DPRINTK_FLAGS_DST_SHIFT)
> > +#endif
> > +
> > #define _DPRINTK_FLAGS_INCL_ANY \
> > (_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
> > _DPRINTK_FLAGS_INCL_LINENO | _DPRINTK_FLAGS_INCL_TID |\
> > _DPRINTK_FLAGS_INCL_SOURCENAME)
> > @@ -54,6 +64,10 @@ struct _ddebug {
> > struct static_key_false dd_key_false;
> > } key;
> > #endif
> > +
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + struct ddebug_dst *dst;
> > +#endif
> > } __attribute__((aligned(8)));
> >
> > enum class_map_type {
> > @@ -280,10 +294,26 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
> > _dynamic_func_call(fmt, __dynamic_ibdev_dbg, \
> > dev, fmt, ##__VA_ARGS__)
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > +
> > +void __print_hex_dump_dst(const struct _ddebug *descriptor, const char *level,
> > + const char *prefix_str, int prefix_type, int rowsize,
> > + int groupsize, const void *buf, size_t len,
> > + bool ascii);
> > +
> > +#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
> > + groupsize, buf, len, ascii) \
> > + _dynamic_func_call(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
> > + __print_hex_dump_dst, \
> > + KERN_DEBUG, prefix_str, prefix_type, \
> > + rowsize, groupsize, buf, len, ascii)
> > +#else
> > +
> > #define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
> > groupsize, buf, len, ascii) \
> > _dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
> > print_hex_dump, \
> > KERN_DEBUG, prefix_str, prefix_type, \
> > rowsize, groupsize, buf, len, ascii)
> > +#endif
> >
> > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> > index f0c30ad83f22..4abfa126bde5 100644
> > --- a/lib/Kconfig.debug
> > +++ b/lib/Kconfig.debug
> > @@ -188,6 +188,109 @@ config DYNAMIC_DEBUG_CORE
> > the case of embedded system where the kernel image size is
> > sensitive for people.
> >
> > +menuconfig DYNAMIC_DEBUG_DST
> > + bool "Enable selection of dynamic debug logs destination"
> > + depends on DYNAMIC_DEBUG_CORE
> > + select TRACING
> > + help
> > + Add support for selecting destination of dynamic debug logs.
> > + When enabled it allows to configure destination of each callsite
> > + individually. The option adds a framework (described by struct
> > + ddebug_dst_ops) which provides flexible means to add a new destination
> > + for debug logs. On top of the framework support for trace instance as
> > + destination is added. By default destination of debug logs is syslog.
> > + Syslog can be set explicitly by using dst keyword or is selected by
> > + default when dst keyword is omitted, for example:
> > +
> > + localhost ~ # echo -n "module usbcore dst syslog =p" >
> > + <debugfs>/dynamic_debug/control
> > + or
> > +
> > + localhost ~ # echo -n "module usbcore =p" >
> > + <debugfs>/dynamic_debug/control
> > +
> > + To enable a trace instance as a destination of debug logs a user
> > + can for example issue the command:
> > +
> > + localhost ~ # echo -n "module usbcore dst trace:usb =p" >
> > + <debugfs>/dynamic_debug/control
> > +
> > + The command will write debug logs from usbcore subsystem to the
> > + trace instance named "usb" and the captured logs can be read
> > + later with the command:
> > +
> > + localhost ~ # cat <debugfs>/tracing/instances/usb/trace
> > +
> > + When trace instance name is omitted the debug logs will be written
> > + to "ddebug_default" trace instance:
> > +
> > + localhost ~ # cat <debugfs>/tracing/instances/ddebug_default/trace
> > +
> > + The DYNAMIC_DEBUG_DST option adds destination information
> > + in the dynamic debug control:
> > +
> > + localhost ~ # cat <debugfs>/dynamic_debug/control
> > + # filename:lineno [module]function flags format destination
> > + drivers/usb/core/usb-acpi.c:143 [usbcore]usb_acpi_set_power_state =p "acpi: power failed to be set\n" trace:usb
> > + drivers/usb/core/usb-acpi.c:141 [usbcore]usb_acpi_set_power_state =p "acpi: power was set to %d\n" trace:usb
> > +
> > + The DYNAMIC_DEBUG_DST option increases each callsite size by
> > + the pointer size.
> > +
> > +if DYNAMIC_DEBUG_DST
> > +
> > +choice
> > + prompt "Destination mode"
> > + default DYNAMIC_DEBUG_DST_DYNAMIC
> > + help
> > + There are two modes of operation for dynamic debug logs destination
> > + which can be selected during configuration stage of a kernel:
> > +
> > + * Dynamic - callsites configuration and destination of debug logs
> > + can be dynamically changed during runtime. This has slight impact
> > + on performance because it requires SRCU to be used for synchronization.
> > +
> > + * Static - callsites configuration and destination of debug logs
> > + can be configured only from a kernel boot command line. This option
> > + hides <debugfs>/dynamic_debug/control interface. It does not require
> > + SRCU synchronization because of the static configuration. It also
> > + provides convenient means to capture debug logs on production systems
> > + for issues which are difficult to reproduce and at the same time
> > + do not expose <debugfs>/dynamic_debug/control interface for security
> > + reasons.
> > +
> > +config DYNAMIC_DEBUG_DST_DYNAMIC
> > + bool "Dynamic"
> > + select SRCU
> > + help
> > + Callsites configuration and destination of debug logs can be
> > + dynamically changed during runtime. This has slight impact on
> > + performance because it requires SRCU to be used for synchronization.
> > +
> > + The interface which allows to dynamically change configuration is:
> > + <debugfs>/dynamic_debug/control
> > +
> > +config DYNAMIC_DEBUG_DST_STATIC
> > + bool "Static"
> > + help
> > + Callsites configuration and destination of debug logs can be
> > + configured only from a kernel boot command line. This option
> > + does not expose <debugfs>/dynamic_debug/control interface and
> > + therefore does not require SRCU synchronization. It also provides
> > + convenient means to capture debug logs on production systems
> > + for issues which are difficult to reproduce and at the same time
> > + do not expose <debugfs>/dynamic_debug/control interface
> > + for security reasons.
> > +
> > + For example to configure usbcore module to output its debug logs
> > + to trace instance named "usb" the following line would have to be
> > + appended to a kernel boot command line:
> > +
> > + usbcore.dyndbg="dst trace:usb =p"
> > +
> > +endchoice
> > +endif
> > +
> > config SYMBOLIC_ERRNAME
> > bool "Support symbolic error names in printf"
> > default y if PRINTK
> > diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
> > index 009f2ead09c1..0cb69b553554 100644
> > --- a/lib/dynamic_debug.c
> > +++ b/lib/dynamic_debug.c
> > @@ -44,6 +44,87 @@ extern struct _ddebug __stop___dyndbg[];
> > extern struct ddebug_class_map __start___dyndbg_classes[];
> > extern struct ddebug_class_map __stop___dyndbg_classes[];
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > +#include <linux/trace.h>
> > +
> > +enum ddebug_dst_type {
> > + DDEBUG_DST_SYSLOG, /* destination syslog */
> > + DDEBUG_DST_TRACE, /* destination trace instance */
> > + /* DDEBUG_DST_NOT_USED_1 */
> > + /* DDEBUG_DST_NOT_USED_2 */
> > + DDEBUG_DST_MAX,
> > +};
> > +
> > +/*
> > + * The structure holds information related to debug logs output
> > + * destination other than syslog. Currently only trace instance
> > + * is supported.
> > + */
> > +struct ddebug_dst {
> > + unsigned int use_count;
> > + enum ddebug_dst_type type;
> > + struct ddebug_dst_ops *ops;
> > + union {
> > + struct {
> > + const char *instance;
> > + struct trace_array *arr;
> > + } trace;
> > + };
> > +};
> > +
> > +#ifndef CONFIG_DYNAMIC_DEBUG_DST_STATIC
> > +
> > +/*
> > + * Instead of slicing code with #ifdefs create
> > + * wrappers around SRCU functions we use
> > + */
> > +DEFINE_STATIC_SRCU(dst_srcu);
> > +
> > +#define dd_srcu_dereference(p, ssp) srcu_dereference(p, ssp)
> > +#define dd_rcu_assign_pointer(p, v) rcu_assign_pointer(p, v)
> > +
> > +static inline int dd_srcu_read_lock(struct srcu_struct *ssp)
> > +{
> > + return srcu_read_lock(ssp);
> > +}
> > +
> > +static inline void dd_srcu_read_unlock(struct srcu_struct *ssp, int idx)
> > +{
> > + return srcu_read_unlock(ssp, idx);
> > +}
> > +
> > +static void dd_synchronize_srcu(struct srcu_struct *ssp)
> > +{
> > + synchronize_srcu(ssp);
> > +}
> > +
> > +#else
> > +
> > +struct dd_srcu_struct {
> > +};
> > +
> > +static struct dd_srcu_struct dst_srcu;
> > +
> > +#define dd_srcu_dereference(p, ssp) (p)
> > +#define dd_rcu_assign_pointer(p, v) ((p) = (v))
> > +
> > +static inline int dd_srcu_read_lock(struct dd_srcu_struct *ssp)
> > +{
> > + return 0;
> > +}
> > +
> > +static inline void dd_srcu_read_unlock(struct dd_srcu_struct *ssp, int idx)
> > +{
> > +}
> > +
> > +void dd_synchronize_srcu(struct dd_srcu_struct *ssp)
> > +{
> > +}
> > +
> > +#endif /* CONFIG_DYNAMIC_DEBUG_DST_STATIC */
> > +
> > +#endif /* CONFIG_DYNAMIC_DEBUG_DST */
> > +
> > struct ddebug_table {
> > struct list_head link, maps;
> > const char *mod_name;
> > @@ -58,6 +139,7 @@ struct ddebug_query {
> > const char *format;
> > const char *class_string;
> > unsigned int first_lineno, last_lineno;
> > + struct ddebug_dst *dst;
> > };
> >
> > struct ddebug_iter {
> > @@ -126,10 +208,402 @@ do { \
> > #define v3pr_info(fmt, ...) vnpr_info(3, fmt, ##__VA_ARGS__)
> > #define v4pr_info(fmt, ...) vnpr_info(4, fmt, ##__VA_ARGS__)
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > +
> > +static inline enum ddebug_dst_type get_dst_type(const struct _ddebug *dp)
> > +{
> > + return (dp->flags & _DPRINTK_FLAGS_DST_MASK) >>
> > + _DPRINTK_FLAGS_DST_SHIFT;
> > +}
> > +
> > +static void set_dst_type(unsigned int *newflags,
> > + enum ddebug_dst_type type)
> > +{
> > + *newflags &= ~_DPRINTK_FLAGS_DST_MASK;
> > + *newflags |= (type << _DPRINTK_FLAGS_DST_SHIFT);
> > +}
> > +
> > +#define DST_SYSLOG_STR "syslog"
> > +#define DST_TRACE_STR "trace"
> > +#define DST_INVALID_STR "invalid"
> > +#define DST_DST_STR "destination"
> > +
> > +struct dstbuf {
> > +#define DST_DESC_MAX_SIZE 32
> > + char buf[DST_DESC_MAX_SIZE];
> > +};
> > +
> > +static char *describe_dst_type(const enum ddebug_dst_type type)
> > +{
> > + switch (type) {
> > + case DDEBUG_DST_SYSLOG:
> > + return DST_SYSLOG_STR;
> > + case DDEBUG_DST_TRACE:
> > + return DST_TRACE_STR;
> > + default:
> > + return DST_INVALID_STR;
> > + }
> > +}
> > +
> > +static char *ddebug_describe_dst(const struct ddebug_dst *dst,
> > + struct dstbuf *dstb)
> > +{
> > + if (!dst)
> > + return DST_SYSLOG_STR;
> > +
> > + switch (dst->type) {
> > + case DDEBUG_DST_SYSLOG:
> > + return DST_SYSLOG_STR;
> > + case DDEBUG_DST_TRACE:
> > + snprintf(dstb->buf, DST_DESC_MAX_SIZE, "%s:%s",
> > + DST_TRACE_STR, dst->trace.instance);
> > + break;
> > + default:
> > + return DST_INVALID_STR;
> > + }
> > +
> > + return dstb->buf;
> > +}
> > +
> > +static char *ddebug_describe_dst_dp(struct _ddebug *dp, struct dstbuf *dstb)
> > +{
> > + if (!get_dst_type(dp))
> > + return DST_SYSLOG_STR;
> > +
> > + return ddebug_describe_dst(dp->dst, dstb);
> > +}
> > +
> > +/*
> > + * struct ddebug_dst_ops - dynamic debug logs destination operations
> > + *
> > + * @parse_and_init - parses and initializes destination
> > + * @has_changed - checks whether destination has changed for a callsite
> > + * @print - prints message for a callsite
> > + * @dev_print - prints device message for a callsite
> > + * @free - frees memory associated with a destination
> > + */
> > +struct ddebug_dst_ops {
> > + int (*parse_and_init)(struct ddebug_query *query, const char *arg);
> > + bool (*has_changed)(const struct ddebug_query *query,
> > + const struct _ddebug *dp);
> > + void (*print)(const struct ddebug_dst *dst, const char *fmt, ...);
> > + void (*dev_print)(const struct ddebug_dst *dst,
> > + const struct device *dev, const char *fmt, ...);
> > + void (*free)(struct ddebug_dst *dst, int arg);
> > +};
> > +
> > +static int tr_parse_and_init(struct ddebug_query *query,
> > + const char *arg)
> > +{
> > + char *instance = strchr(arg, ':');
> > + struct trace_array *arr;
> > + int ret = -EINVAL;
> > +
> > + if (instance)
> > + instance++;
> > + else
> > + instance = "ddebug_default";
> > +
> > + arr = trace_array_get_by_name(instance);
> > + if (!arr)
> > + goto err;
> > +
> > + ret = trace_array_init_printk(arr);
> > + if (ret)
> > + goto err_trace;
> > +
> > + query->dst->trace.instance = kstrdup(instance, GFP_KERNEL);
> > + if (!query->dst->trace.instance)
> > + goto err_trace;
> > +
> > + query->dst->trace.arr = arr;
> > +
> > + v3pr_info("parsed trace dst: instance name=%s, arrray=%p\n",
> > + query->dst->trace.instance, arr);
> > + return 0;
> > +err_trace:
> > + trace_array_put(arr);
> > + trace_array_destroy(arr);
> > +err:
> > + return ret;
> > +}
> > +
> > +bool tr_has_changed(const struct ddebug_query *query, const struct _ddebug *dp)
> > +{
> > + if (!dp->dst)
> > + return true;
> > + else if (dp->dst->type != DDEBUG_DST_TRACE)
> > + return true;
> > +
> > + return query->dst->trace.arr != dp->dst->trace.arr;
> > +}
> > +
> > +__printf(2, 3)
> > +static void tr_print(const struct ddebug_dst *dst, const char *fmt, ...)
> > +{
> > + va_list args;
> > + struct va_format vaf;
> > +
> > + va_start(args, fmt);
> > + vaf.fmt = fmt;
> > + vaf.va = &args;
> > +
> > + trace_array_printk(dst->trace.arr, _THIS_IP_, "%pV", &vaf);
> > +
> > + va_end(args);
> > +}
> > +
> > +__printf(3, 4)
> > +static void tr_dev_print(const struct ddebug_dst *dst,
> > + const struct device *dev, const char *fmt, ...)
> > +{
> > + va_list args;
> > + struct va_format vaf;
> > +
> > + va_start(args, fmt);
> > + vaf.fmt = fmt;
> > + vaf.va = &args;
> > +
> > + trace_array_printk(dst->trace.arr, _THIS_IP_, "%pV", &vaf);
> > + va_end(args);
> > +}
> > +
> > +static void tr_free(struct ddebug_dst *dst, int tr_arr_destroy)
> > +{
> > + WARN_ON(!dst);
> > +
> > + v2pr_info("freed dst: type=trace, instance=%s\n", dst->trace.instance);
> > +
> > + kfree(dst->trace.instance);
> > + dst->trace.instance = NULL;
> > +
> > + trace_array_put(dst->trace.arr);
> > + /*
> > + * destroy trace instance in case when query was not applied to any
> > + * of the callsites, otherwise don't try to destroy it because it will
> > + * be removed from sysfs together with captured debug logs, let a user
> > + * to delete it manually later at convenient time
> > + */
> > + if (tr_arr_destroy)
> > + trace_array_destroy(dst->trace.arr);
> > + dst->trace.arr = NULL;
> > +}
> > +
> > +bool syslog_has_changed(const struct ddebug_query *query,
> > + const struct _ddebug *dp)
> > +{
> > + if (!dp->dst)
> > + return false;
> > +
> > + return dp->dst->type != DDEBUG_DST_SYSLOG;
> > +}
> > +
> > +static void syslog_free(struct ddebug_dst *dst, int arg)
> > +{
> > + v2pr_info("freed dst: type=syslog\n");
> > +}
> > +
> > +static struct ddebug_dst_ops dst_ops[] = {
> > + // syslog destination ops
> > + {
> > + .parse_and_init = NULL,
> > + .has_changed = syslog_has_changed,
> > + .print = NULL,
> > + .dev_print = NULL,
> > + .free = syslog_free,
> > + },
> > + // trace destination ops
> > + {
> > + .parse_and_init = tr_parse_and_init,
> > + .has_changed = tr_has_changed,
> > + .print = tr_print,
> > + .dev_print = tr_dev_print,
> > + .free = tr_free,
> > + },
> > +};
> > +
> > +static int dst_parse_and_init(struct ddebug_query *query, const char *arg)
> > +{
> > + int ret = -EINVAL;
> > +
> > + if (!arg)
> > + goto err;
> > +
> > + if (query->dst)
> > + goto err;
> > +
> > + query->dst = kzalloc(sizeof(struct ddebug_dst), GFP_KERNEL);
> > + if (!query->dst) {
> > + ret = -ENOMEM;
> > + goto err;
> > + }
> > +
> > + if (!strcmp(arg, "syslog")) {
> > + query->dst->type = DDEBUG_DST_SYSLOG;
> > + query->dst->ops = &dst_ops[DDEBUG_DST_SYSLOG];
> > + } else if (!strcmp(arg, "trace") ||
> > + !strncmp(arg, "trace:", 6)) {
> > + query->dst->type = DDEBUG_DST_TRACE;
> > + query->dst->ops = &dst_ops[DDEBUG_DST_TRACE];
> > + } else
> > + goto err_mem;
> > +
> > + if (query->dst->ops->parse_and_init) {
> > + ret = query->dst->ops->parse_and_init(query, arg);
> > + if (ret)
> > + goto err_mem;
> > + }
> > +
> > + v3pr_info("parsed dst: type=%s\n", describe_dst_type(query->dst->type));
> > + return 0;
> > +err_mem:
> > + kfree(query->dst);
> > +err:
> > + return ret;
> > +}
> > +
> > +static void dst_apply_change(const struct ddebug_query *query,
> > + struct _ddebug *dp)
> > +{
> > + /* store old destination pointer */
> > + struct ddebug_dst *old_dst = dp->dst;
> > + struct ddebug_dst *new_dst;
> > +
> > + /* increase use count */
> > + query->dst->use_count++;
> > +
> > + /*
> > + * syslog destination does not require additional description
> > + * however we use it to simplify control path processing
> > + */
> > + new_dst = (query->dst->type == DDEBUG_DST_SYSLOG) ? NULL : query->dst;
> > +
> > + /* update destination pointer */
> > + dd_rcu_assign_pointer(dp->dst, new_dst);
> > +
> > + if (!old_dst)
> > + return;
> > +
> > + old_dst->use_count--;
> > + /* check if old destination pointer is still being used */
> > + if (!old_dst->use_count) {
> > + /*
> > + * wait for all read rcu critical sections
> > + * in progress to finish
> > + */
> > + dd_synchronize_srcu(&dst_srcu);
> > + /* call destination specific free function */
> > + old_dst->ops->free(old_dst, 0);
> > + /* free old destination pointer */
> > + kfree(old_dst);
> > + }
> > +}
> > +
> > +static void dst_try_free(struct ddebug_query *query)
> > +{
> > + if (!query)
> > + return;
> > +
> > + /* free destination if it wasn't applied to any callsite */
> > + if (!query->dst->use_count) {
> > + if (query->dst->ops->free)
> > + query->dst->ops->free(query->dst, 1);
> > + /*
> > + * for syslog we always free its destination because
> > + * it is used only to simplify control path processing
> > + */
> > + if (query->dst->type)
> > + v2pr_info("freed dst: didn't apply to any of the callsites\n");
> > + kfree(query->dst);
> > + query->dst = NULL;
> > + }
> > +}
> > +
> > +void __print_hex_dump_dst(const struct _ddebug *descriptor, const char *level,
> > + const char *prefix_str, int prefix_type,
> > + int rowsize, int groupsize, const void *buf,
> > + size_t len, bool ascii)
> > +{
> > + if (get_dst_type(descriptor)) {
> > + const u8 *ptr = buf;
> > + struct ddebug_dst *dst;
> > + int i, linelen, remaining = len, idx;
> > + unsigned char linebuf[32 * 3 + 2 + 32 + 1];
> > +
> > + if (rowsize != 16 && rowsize != 32)
> > + rowsize = 16;
> > +
> > + idx = dd_srcu_read_lock(&dst_srcu);
> > + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> > + WARN_ON(!dst);
> > + for (i = 0; i < len; i += rowsize) {
> > + linelen = min(remaining, rowsize);
> > + remaining -= rowsize;
> > +
> > + hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
> > + linebuf, sizeof(linebuf), ascii);
> > +
> > + switch (prefix_type) {
> > + case DUMP_PREFIX_ADDRESS:
> > + dst->ops->print(descriptor->dst, "%s%s%p: %s\n",
> > + level, prefix_str, ptr + i, linebuf);
> > + break;
> > + case DUMP_PREFIX_OFFSET:
> > + dst->ops->print(descriptor->dst, "%s%s%.8x: %s\n",
> > + level, prefix_str, i, linebuf);
> > + break;
> > + default:
> > + dst->ops->print(descriptor->dst, "%s%s%s\n",
> > + level, prefix_str, linebuf);
> > + break;
> > + }
> > + }
> > +
> > + dd_srcu_read_unlock(&dst_srcu, idx);
> > + } else
> > + print_hex_dump(level, prefix_str, prefix_type, rowsize,
> > + groupsize, buf, len, ascii);
> > +}
> > +EXPORT_SYMBOL(__print_hex_dump_dst);
> > +
> > +#else
> > +
> > +struct ddebug_dst {
> > +};
> > +
> > +struct dstbuf {
> > +};
> > +
> > +static inline unsigned int get_dst_type(const struct _ddebug *dp)
> > +{
> > + return 0;
> > +}
> > +
> > +static char *ddebug_describe_dst(const struct ddebug_dst *dst,
> > + struct dstbuf *dstb)
> > +{
> > + return "";
> > +}
> > +
> > +static char *ddebug_describe_dst_dp(struct _ddebug *dp,
> > + struct dstbuf *dstb)
> > +{
> > + return "";
> > +}
> > +
> > +static void dst_try_free(struct ddebug_query *query)
> > +{
> > +}
> > +
> > +#define DST_DST_STR ""
> > +
> > +#endif /* CONFIG_DYNAMIC_DEBUG_DST */
> > +
> > static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
> > {
> > /* trim any trailing newlines */
> > int fmtlen = 0;
> > + struct dstbuf dstb;
> >
> > if (query->format) {
> > fmtlen = strlen(query->format);
> > @@ -137,13 +611,14 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
> > fmtlen--;
> > }
> >
> > - v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s\n",
> > + v3pr_info("%s: func=\"%s\" file=\"%s\" module=\"%s\" format=\"%.*s\" lineno=%u-%u class=%s %s=%s\n",
> > msg,
> > query->function ?: "",
> > query->filename ?: "",
> > query->module ?: "",
> > fmtlen, query->format ?: "",
> > - query->first_lineno, query->last_lineno, query->class_string);
> > + query->first_lineno, query->last_lineno, query->class_string,
> > + DST_DST_STR, ddebug_describe_dst(query->dst, &dstb));
> > }
> >
> > static struct ddebug_class_map *ddebug_find_valid_class(struct ddebug_table const *dt,
> > @@ -178,8 +653,12 @@ static int ddebug_change(const struct ddebug_query *query,
> > unsigned int newflags;
> > unsigned int nfound = 0;
> > struct flagsbuf fbuf, nbuf;
> > + struct dstbuf dstbc, dstbn;
> > struct ddebug_class_map *map = NULL;
> > int __outvar valid_class;
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + struct ddebug_dst_ops *dst_ops = query->dst->ops;
> > +#endif
> >
> > /* search for matching ddebugs */
> > mutex_lock(&ddebug_lock);
> > @@ -243,8 +722,19 @@ static int ddebug_change(const struct ddebug_query *query,
> > nfound++;
> >
> > newflags = (dp->flags & modifiers->mask) | modifiers->flags;
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + /* set destination type in newflags */
> > + set_dst_type(&newflags, query->dst->type);
> > + if (newflags == dp->flags &&
> > + (!dst_ops->has_changed ||
> > + !dst_ops->has_changed(query, dp)))
> > + /* nothing changed */
> > + continue;
> > +#else
> > if (newflags == dp->flags)
> > continue;
> > +#endif
> > +
> > #ifdef CONFIG_JUMP_LABEL
> > if (dp->flags & _DPRINTK_FLAGS_PRINT) {
> > if (!(newflags & _DPRINTK_FLAGS_PRINT))
> > @@ -253,14 +743,24 @@ static int ddebug_change(const struct ddebug_query *query,
> > static_branch_enable(&dp->key.dd_key_true);
> > }
> > #endif
> > - v4pr_info("changed %s:%d [%s]%s %s => %s\n",
> > + v4pr_info("changed %s:%d [%s]%s %s %s => %s %s\n",
> > trim_prefix(dp->filename), dp->lineno,
> > dt->mod_name, dp->function,
> > ddebug_describe_flags(dp->flags, &fbuf),
> > - ddebug_describe_flags(newflags, &nbuf));
> > + ddebug_describe_dst_dp(dp, &dstbc),
> > + ddebug_describe_flags(newflags, &nbuf),
> > + ddebug_describe_dst(query->dst, &dstbn));
> > dp->flags = newflags;
> > +
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + if (dst_ops->has_changed &&
> > + dst_ops->has_changed(query, dp))
> > + /* destination changed, apply it */
> > + dst_apply_change(query, dp);
> > +#endif
> > }
> > }
> > +
> > mutex_unlock(&ddebug_lock);
> >
> > if (!nfound && verbose)
> > @@ -456,6 +956,12 @@ static int ddebug_parse_query(char *words[], int nwords,
> > return -EINVAL;
> > } else if (!strcmp(keyword, "class")) {
> > rc = check_set(&query->class_string, arg, "class");
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + } else if (!strcmp(keyword, "dst")) {
> > + rc = dst_parse_and_init(query, arg);
> > + if (rc)
> > + return rc;
> > +#endif
> > } else {
> > pr_err("unknown keyword \"%s\"\n", keyword);
> > return -EINVAL;
> > @@ -463,6 +969,15 @@ static int ddebug_parse_query(char *words[], int nwords,
> > if (rc)
> > return rc;
> > }
> > +
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + /* set destination to syslog by default if it was not explicitly set */
> > + if (!query->dst) {
> > + rc = dst_parse_and_init(query, "syslog");
> > + if (rc)
> > + return rc;
> > + }
> > +#endif
> > if (!query->module && modname)
> > /*
> > * support $modname.dyndbg=<multiple queries>, when
> > @@ -549,11 +1064,14 @@ static int ddebug_exec_query(char *query_string, const char *modname)
> > }
> > if (ddebug_parse_query(words, nwords-1, &query, modname)) {
> > pr_err("query parse failed\n");
> > + dst_try_free(&query);
> > return -EINVAL;
> > }
> > +
> > /* actually go and implement the change */
> > nfound = ddebug_change(&query, &modifiers);
> > vpr_info_dq(&query, nfound ? "applied" : "no-match");
> > + dst_try_free(&query);
> >
> > return nfound;
> > }
> > @@ -856,6 +1374,10 @@ static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
> >
> > void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
> > {
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + struct ddebug_dst *dst;
> > + int idx;
> > +#endif
> > va_list args;
> > struct va_format vaf;
> > char buf[PREFIX_SIZE] = "";
> > @@ -868,6 +1390,21 @@ void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
> > vaf.fmt = fmt;
> > vaf.va = &args;
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + if (!get_dst_type(descriptor))
> > + goto syslog;
> > +
> > + idx = dd_srcu_read_lock(&dst_srcu);
> > + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> > + WARN_ON(!dst);
> > + dst->ops->print(dst, "%s%pV", dynamic_emit_prefix(descriptor, buf),
> > + &vaf);
> > +
> > + dd_srcu_read_unlock(&dst_srcu, idx);
> > + va_end(args);
> > + return;
> > +syslog:
> > +#endif
> > printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
> >
> > va_end(args);
> > @@ -877,6 +1414,10 @@ EXPORT_SYMBOL(__dynamic_pr_debug);
> > void __dynamic_dev_dbg(struct _ddebug *descriptor,
> > const struct device *dev, const char *fmt, ...)
> > {
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + struct ddebug_dst *dst;
> > + int idx;
> > +#endif
> > struct va_format vaf;
> > va_list args;
> >
> > @@ -888,6 +1429,29 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor,
> > vaf.fmt = fmt;
> > vaf.va = &args;
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + if (!get_dst_type(descriptor))
> > + goto syslog;
> > +
> > + idx = dd_srcu_read_lock(&dst_srcu);
> > + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> > + WARN_ON(!dst);
> > + if (!dev) {
> > + dst->ops->print(dst, "(NULL device *): %pV", &vaf);
> > + } else {
> > + char buf[PREFIX_SIZE] = "";
> > +
> > + dst->ops->dev_print(dst, dev, "%s%s %s: %pV",
> > + dynamic_emit_prefix(descriptor, buf),
> > + dev_driver_string(dev), dev_name(dev),
> > + &vaf);
> > + }
> > +
> > + dd_srcu_read_unlock(&dst_srcu, idx);
> > + va_end(args);
> > + return;
> > +syslog:
> > +#endif
> > if (!dev) {
> > printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
> > } else {
> > @@ -908,6 +1472,10 @@ EXPORT_SYMBOL(__dynamic_dev_dbg);
> > void __dynamic_netdev_dbg(struct _ddebug *descriptor,
> > const struct net_device *dev, const char *fmt, ...)
> > {
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + struct ddebug_dst *dst;
> > + int idx;
> > +#endif
> > struct va_format vaf;
> > va_list args;
> >
> > @@ -919,6 +1487,35 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
> > vaf.fmt = fmt;
> > vaf.va = &args;
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + if (!get_dst_type(descriptor))
> > + goto syslog;
> > +
> > + idx = dd_srcu_read_lock(&dst_srcu);
> > + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> > + WARN_ON(!dst);
> > + if (dev && dev->dev.parent) {
> > + char buf[PREFIX_SIZE] = "";
> > +
> > + dst->ops->dev_print(dst, dev->dev.parent,
> > + "%s%s %s %s%s: %pV",
> > + dynamic_emit_prefix(descriptor, buf),
> > + dev_driver_string(dev->dev.parent),
> > + dev_name(dev->dev.parent),
> > + netdev_name(dev), netdev_reg_state(dev),
> > + &vaf);
> > + } else if (dev) {
> > + dst->ops->print(dst, "%s%s: %pV", netdev_name(dev),
> > + netdev_reg_state(dev), &vaf);
> > + } else {
> > + dst->ops->print(dst, "(NULL net_device *): %pV", &vaf);
> > + }
> > +
> > + dd_srcu_read_unlock(&dst_srcu, idx);
> > + va_end(args);
> > + return;
> > +syslog:
> > +#endif
> > if (dev && dev->dev.parent) {
> > char buf[PREFIX_SIZE] = "";
> >
> > @@ -947,6 +1544,10 @@ EXPORT_SYMBOL(__dynamic_netdev_dbg);
> > void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
> > const struct ib_device *ibdev, const char *fmt, ...)
> > {
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + struct ddebug_dst *dst;
> > + int idx;
> > +#endif
> > struct va_format vaf;
> > va_list args;
> >
> > @@ -955,6 +1556,34 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
> > vaf.fmt = fmt;
> > vaf.va = &args;
> >
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + if (!get_dst_type(descriptor))
> > + goto syslog;
> > +
> > + idx = dd_srcu_read_lock(&dst_srcu);
> > + dst = dd_srcu_dereference(descriptor->dst, &dst_srcu);
> > + WARN_ON(!dst);
> > + if (ibdev && ibdev->dev.parent) {
> > + char buf[PREFIX_SIZE] = "";
> > +
> > + dst->ops->dev_print(dst, ibdev->dev.parent,
> > + "%s%s %s %s: %pV",
> > + dynamic_emit_prefix(descriptor, buf),
> > + dev_driver_string(ibdev->dev.parent),
> > + dev_name(ibdev->dev.parent),
> > + dev_name(&ibdev->dev),
> > + &vaf);
> > + } else if (ibdev) {
> > + dst->ops->print(dst, "%s: %pV", dev_name(&ibdev->dev), &vaf);
> > + } else {
> > + dst->ops->print(dst, "(NULL ib_device): %pV", &vaf);
> > + }
> > +
> > + dd_srcu_read_unlock(&dst_srcu, idx);
> > + va_end(args);
> > + return;
> > +syslog:
> > +#endif
> > if (ibdev && ibdev->dev.parent) {
> > char buf[PREFIX_SIZE] = "";
> >
> > @@ -1127,11 +1756,16 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
> > struct ddebug_iter *iter = m->private;
> > struct _ddebug *dp = p;
> > struct flagsbuf flags;
> > + struct dstbuf dstb;
> > char const *class;
> >
> > if (p == SEQ_START_TOKEN) {
> > - seq_puts(m,
> > - "# filename:lineno [module]function flags format\n");
> > + seq_puts(m, "# filename:lineno [module]function flags format");
> > +#if defined CONFIG_DYNAMIC_DEBUG_DST
> > + seq_puts(m, " destination\n");
> > +#else
> > + seq_puts(m, "\n");
> > +#endif
> > return 0;
> > }
> >
> > @@ -1149,6 +1783,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
> > else
> > seq_printf(m, " class unknown, _id:%d", dp->class_id);
> > }
> > + seq_printf(m, " %s", ddebug_describe_dst_dp(dp, &dstb));
> > seq_puts(m, "\n");
> >
> > return 0;
> > @@ -1351,6 +1986,9 @@ static void ddebug_remove_all_tables(void)
> >
> > static __initdata int ddebug_init_success;
> >
> > +
> > +#ifndef CONFIG_DYNAMIC_DEBUG_DST_STATIC
> > +
> > static int __init dynamic_debug_init_control(void)
> > {
> > struct proc_dir_entry *procfs_dir;
> > @@ -1374,6 +2012,15 @@ static int __init dynamic_debug_init_control(void)
> > return 0;
> > }
> >
> > +#else
> > +
> > +static int __init dynamic_debug_init_control(void)
> > +{
> > + return 0;
> > +}
> > +
> > +#endif
> > +
> > static int __init dynamic_debug_init(void)
> > {
> > struct _ddebug *iter, *iter_mod_start;