2003-07-08 17:18:38

by Jim Keniston

[permalink] [raw]
Subject: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

diff -Naur linux.org/include/linux/kerror.h linux.kerror.patched/include/linux/kerror.h
--- linux.org/include/linux/kerror.h Wed Dec 31 16:00:00 1969
+++ linux.kerror.patched/include/linux/kerror.h Thu Jul 3 14:18:46 2003
@@ -0,0 +1,27 @@
+#ifndef _KERROR_H
+#define _KERROR_H
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/uio.h>
+#include <asm/types.h>
+
+#ifdef CONFIG_NET
+extern int kernel_error_event(void *data, size_t len, __u32 groups);
+extern int kernel_error_event_iov(const struct iovec *iov,
+ unsigned int nseg, __u32 groups);
+#else
+static inline int kernel_error_event(void *data, size_t len, __u32 groups)
+ { return -ENOSYS; }
+static inline int kernel_error_event_iov(const struct iovec *iov,
+ unsigned int nseg, __u32 groups)
+ { return -ENOSYS; }
+#endif /* CONFIG_NET */
+#endif /* __KERNEL__ */
+
+#define KERROR_GROUP_RAW 0x00000001
+#define KERROR_GROUP_EVLOG 0x00000002
+
+#define KERROR_GROUP_ALL (~(u32)0)
+
+#endif /* _KERROR_H */
diff -Naur linux.org/include/linux/netlink.h linux.kerror.patched/include/linux/netlink.h
--- linux.org/include/linux/netlink.h Thu Jul 3 14:18:46 2003
+++ linux.kerror.patched/include/linux/netlink.h Thu Jul 3 14:18:46 2003
@@ -10,6 +10,7 @@
#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */
#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_KERROR 7 /* kernel error event facility */
#define NETLINK_ARPD 8
#define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */
#define NETLINK_IP6_FW 13
diff -Naur linux.org/net/netlink/Makefile linux.kerror.patched/net/netlink/Makefile
--- linux.org/net/netlink/Makefile Thu Jul 3 14:18:46 2003
+++ linux.kerror.patched/net/netlink/Makefile Thu Jul 3 14:18:46 2003
@@ -2,5 +2,5 @@
# Makefile for the netlink driver.
#

-obj-y := af_netlink.o
+obj-y := af_netlink.o kerror.o
obj-$(CONFIG_NETLINK_DEV) += netlink_dev.o
diff -Naur linux.org/net/netlink/kerror.c linux.kerror.patched/net/netlink/kerror.c
--- linux.org/net/netlink/kerror.c Wed Dec 31 16:00:00 1969
+++ linux.kerror.patched/net/netlink/kerror.c Thu Jul 3 14:18:46 2003
@@ -0,0 +1,110 @@
+/* kerror.c: Kernel error event logging facility.
+ *
+ * Copyright (C) 2003 David S. Miller ([email protected])
+ * June 2003 - Jim Keniston and Dan Stekloff (kenistoj and [email protected])
+ * Fixed a couple of bugs and added iovec interface.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/netlink.h>
+#include <linux/kerror.h>
+#include <linux/init.h>
+#include <linux/uio.h>
+#include <net/sock.h>
+
+static struct sock *kerror_nl;
+
+static void kerror_netlink_rcv(struct sock *sk, int len)
+{
+ do {
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ /* Just ignore all the messages, they cannot be
+ * destined for the kernel.
+ */
+ kfree_skb(skb);
+ }
+ } while (kerror_nl && kerror_nl->sk_receive_queue.qlen);
+}
+
+/**
+ * kernel_error_event_iov() - Broadcast packet to NETLINK_KERROR sockets.
+ * @iov: the packet's data
+ * @nseg: number of segments in iov[]
+ * @groups: as with kernel_error_event()
+ */
+int kernel_error_event_iov(const struct iovec *iov, unsigned int nseg,
+ u32 groups)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ unsigned char *b, *p;
+ size_t len;
+ unsigned int seg;
+
+ if (!groups)
+ return -EINVAL;
+
+ len = iov_length(iov, nseg);
+ skb = alloc_skb(NLMSG_SPACE(len), GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, current->pid, 0, 0, len);
+ nlh->nlmsg_flags = 0;
+
+ p = NLMSG_DATA(nlh);
+ for (seg = 0; seg < nseg; seg++) {
+ memcpy(p, (const void*)iov[seg].iov_base, iov[seg].iov_len);
+ p += iov[seg].iov_len;
+ }
+ nlh->nlmsg_len = skb->tail - b;
+
+ NETLINK_CB(skb).dst_groups = groups;
+
+ return netlink_broadcast(kerror_nl, skb, 0, ~0, GFP_ATOMIC);
+
+nlmsg_failure:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/**
+ * kernel_error_event() - Broadcast packet to NETLINK_KERROR sockets.
+ * @data, @len: the packet's data
+ * @groups: the group(s) to which the packet pertains -- e.g.,
+ * KERROR_GROUP_EVLOG. On a recvmsg(), this shows up in
+ * ((struct sockaddr_nl*)(msg->msg_name))->nl_groups.
+ */
+int kernel_error_event(void *data, size_t len, u32 groups)
+{
+ struct iovec iov;
+ iov.iov_base = data;
+ iov.iov_len = len;
+ return kernel_error_event_iov(&iov, 1, groups);
+}
+
+static int __init kerror_init(void)
+{
+ printk(KERN_INFO "Initializing KERROR netlink socket\n");
+
+ kerror_nl = netlink_kernel_create(NETLINK_KERROR, kerror_netlink_rcv);
+ if (kerror_nl == NULL)
+ panic("kerror_init: cannot initialize kerror_nl\n");
+
+ return 0;
+}
+
+static void __exit kerror_exit(void)
+{
+ sock_release(kerror_nl->sk_socket);
+}
+
+module_init(kerror_init);
+module_exit(kerror_exit);
diff -Naur linux.org/net/netsyms.c linux.kerror.patched/net/netsyms.c
--- linux.org/net/netsyms.c Thu Jul 3 14:18:46 2003
+++ linux.kerror.patched/net/netsyms.c Thu Jul 3 14:18:46 2003
@@ -83,6 +83,7 @@
#endif

#include <linux/rtnetlink.h>
+#include <linux/kerror.h>

#ifdef CONFIG_IPX_MODULE
extern struct datalink_proto *make_EII_client(void);
@@ -505,6 +506,8 @@
EXPORT_SYMBOL(netlink_set_nonroot);
EXPORT_SYMBOL(netlink_register_notifier);
EXPORT_SYMBOL(netlink_unregister_notifier);
+EXPORT_SYMBOL(kernel_error_event);
+EXPORT_SYMBOL(kernel_error_event_iov);
#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
EXPORT_SYMBOL(netlink_attach);
EXPORT_SYMBOL(netlink_detach);


Attachments:
kerror-2.5.74.patch (5.49 kB)

2003-07-08 17:50:54

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

Jim Keniston <[email protected]> wrote:
>
> The enclosed patches provide a mechanism for reporting error events
> to user-mode applications via netlink.

Seems sane to me.

It needs to handle %z as well as %Z.

The layout of `struct kern_log_entry' may be problematic. Think of the
situation where a 64-bit kernel constructs one of these and sends it up to
32-bit userspace. Will the structure layout be the same under the 32-bit
compiler as under the 64-bit one?

How do you actually intend to use this? Will it be by adding new
evt_printf() calls to particular drivers, or replacing existing printk's or
both?

Would it make sense for evt_printf() to fall back to printk() if nobody is
listening to the log stream?

2003-07-08 18:59:29

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

Andrew Morton <[email protected]> writes:

> The layout of `struct kern_log_entry' may be problematic. Think of the
> situation where a 64-bit kernel constructs one of these and sends it up to
> 32-bit userspace. Will the structure layout be the same under the 32-bit
> compiler as under the 64-bit one?

No it won't. Best is to order the fields by size (arrays ordered by their
subtype). This should always give compatible alignment.

-Andi

2003-07-08 19:10:02

by Jim Keniston

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

Andrew Morton wrote:
>
> Jim Keniston <[email protected]> wrote:
> >
> > The enclosed patches provide a mechanism for reporting error events
> > to user-mode applications via netlink.
>
> Seems sane to me.
>
> It needs to handle %z as well as %Z.

Yes, thanks. I missed that change to vsnprintf().

>
> The layout of `struct kern_log_entry' may be problematic. Think of the
> situation where a 64-bit kernel constructs one of these and sends it up to
> 32-bit userspace. Will the structure layout be the same under the 32-bit
> compiler as under the 64-bit one?

I think so. Nothing is bigger than 4 bytes except log_facility[] (16-byte
array of char, which doesn't induce padding at all on i386). But I will find a
64K/32U ppc machine and check that.

>
> How do you actually intend to use this?

I envision it being used by a configuration/status-monitoring system that monitors
hotplug events, sysfs, etc. for configuration changes, and listens to the
proposed interface for error events. Binary-only events (logged with evl_write())
would have to be interpreted based on knowledge existing entirely in user space (either
coded into the monitor program, or provided as supplementary information via a formatting
template or some such). PRINTF-format events can carry and/or be supplemented with
similar info, but have the error message built in.

> Will it be by adding new
> evt_printf() calls to particular drivers, or replacing existing printk's or
> both?

There have been a variety of suggestions for how error reporting could be improved.
Two common ones are:
1. Leave printks alone, and log additional info in whatever format you want via netlink.
(E.g., Dave Miller recommended something like this.) This proposal supports that.

2. Migrate from raw printks to "smarter" versions -- e.g., dev_printk and friends,
or the proposed netdev_printk. Such macros now call just printk (after adding
relevant info), but could be modified to call evl_printk as well with the same args.

There are a zillion variations on this, of course...

>
> Would it make sense for evt_printf() to fall back to printk() if nobody is
> listening to the log stream?

That certainly makes sense for evl_printf. For evl_write, just do a hex dump or something.
So evlog.c would query kerror.c to see if anybody's listening. Would kerror.c
consult nl_table[] directly, or is there an anybody_listening() function that
does this?

Jim

2003-07-08 19:32:50

by Jim Keniston

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

If you're going to reply to this, please change "[email protected]" to "[email protected]" in your cc list. My apologies for the error.

Jim

2003-07-10 04:28:20

by James Morris

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

On Tue, 8 Jul 2003, Jim Keniston wrote:

+ kerror_nl = netlink_kernel_create(NETLINK_KERROR, kerror_netlink_rcv);
+ if (kerror_nl == NULL)
+ panic("kerror_init: cannot initialize kerror_nl\n");

You can simply use NULL instead of passing the dummy kerror_netlink_rcv
function.

+struct kern_log_entry {
+ __u16 log_kmagic; /* always LOGREC_KMAGIC */
+ __u16 log_kversion; /* which version of this struct? */
+ char log_facility[FACILITY_MAXLEN]; /* e.g., driver name */

These fields should generally be specified in ascending order to help with
alignment.

It may also be worth looking at how the ULOG code batches messages to
improve peformance.




- James
--
James Morris
<[email protected]>




2003-07-10 18:58:38

by Jim Keniston

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

James Morris wrote:
>
> On Tue, 8 Jul 2003, Jim Keniston wrote:
>
> + kerror_nl = netlink_kernel_create(NETLINK_KERROR, kerror_netlink_rcv);
> + if (kerror_nl == NULL)
> + panic("kerror_init: cannot initialize kerror_nl\n");
>
> You can simply use NULL instead of passing the dummy kerror_netlink_rcv
> function.

That begs the question: do we trust that nobody but the kernel will send
packets to a NETLINK_KERROR socket? Ordinary users can't, but any root
application can. Without kerror_netlink_rcv(), such packets don't get
dequeued.

>
> +struct kern_log_entry {
> + __u16 log_kmagic; /* always LOGREC_KMAGIC */
> + __u16 log_kversion; /* which version of this struct? */
> + char log_facility[FACILITY_MAXLEN]; /* e.g., driver name */
>
> These fields should generally be specified in ascending order to help with
> alignment.

We include log_kmagic and log_kversion so the receiving app (e.g. the evlog
daemon) can figure out which version of the kernel header it's getting. Note
that we're up to #3 (going on #4, with the changes you and others have
suggested). As long as we have to include log_kmagic and log_kversion, they
need to be first.

That said, I get the impression that people would be more comfortable if
log_facility[] were moved to the end. Other than that, can anybody point
out a specific area where there's likely to be an alignment problem? The
various members' offsets are the same on i386, ppc32, and ppc64. This should
also be true for s390 and s390x. I'd think it'd really matter only when kernel
and user on the same system are different architectures. ppc64K/ppc32U,
at least, works.

>
> It may also be worth looking at how the ULOG code batches messages to
> improve peformance.

Thanks for the pointer. Looks nice. If performance turns out to be an issue
(e.g., if for some reason people want to cc all printk messages through this
interface), I'll keep that in mind.

>
> - James
> --
> James Morris
> <[email protected]>

Jim K.

2003-07-11 15:23:21

by James Morris

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

On Thu, 10 Jul 2003, Jim Keniston wrote:

> James Morris wrote:
> >
> > On Tue, 8 Jul 2003, Jim Keniston wrote:
> >
> > + kerror_nl = netlink_kernel_create(NETLINK_KERROR, kerror_netlink_rcv);
> > + if (kerror_nl == NULL)
> > + panic("kerror_init: cannot initialize kerror_nl\n");
> >
> > You can simply use NULL instead of passing the dummy kerror_netlink_rcv
> > function.
>
> That begs the question: do we trust that nobody but the kernel will send
> packets to a NETLINK_KERROR socket? Ordinary users can't, but any root
> application can. Without kerror_netlink_rcv(), such packets don't get
> dequeued.

Indeed, the kernel socket buffer fills up.

I think this needs to be addressed in the netlink code, per the patch
below.

Comments?


- James
--
James Morris
<[email protected]>

diff -NurX dontdiff linux-2.5.75.orig/net/netlink/af_netlink.c linux-2.5.75.w1/net/netlink/af_netlink.c
--- linux-2.5.75.orig/net/netlink/af_netlink.c 2003-06-26 12:43:45.000000000 +1000
+++ linux-2.5.75.w1/net/netlink/af_netlink.c 2003-07-12 01:23:49.708254261 +1000
@@ -430,6 +430,10 @@
goto no_dst;
nlk = nlk_sk(sk);

+ /* Don't bother queuing skb if kernel socket has no input function */
+ if (nlk->pid == 0 && !nlk->data_ready)
+ goto no_dst;
+
#ifdef NL_EMULATE_DEV
if (nlk->handler) {
skb_orphan(skb);

2003-07-12 05:04:09

by David Miller

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

On Sat, 12 Jul 2003 01:37:44 +1000 (EST)
James Morris <[email protected]> wrote:

> On Thu, 10 Jul 2003, Jim Keniston wrote:
>
> > That begs the question: do we trust that nobody but the kernel will send
> > packets to a NETLINK_KERROR socket? Ordinary users can't, but any root
> > application can. Without kerror_netlink_rcv(), such packets don't get
> > dequeued.
>
> Indeed, the kernel socket buffer fills up.
>
> I think this needs to be addressed in the netlink code, per the patch
> below.

Looks good, I'll apply this.

2003-07-12 05:35:59

by David Miller

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

On Sat, 12 Jul 2003 01:37:44 +1000 (EST)
James Morris <[email protected]> wrote:

> Indeed, the kernel socket buffer fills up.
>
> I think this needs to be addressed in the netlink code, per the patch
> below.
...
> + /* Don't bother queuing skb if kernel socket has no input function */
> + if (nlk->pid == 0 && !nlk->data_ready)
> + goto no_dst;
> +

Oops, turns out this doesn't work. data_ready is never NULL, look at
how netlink_kernel_create() works.

Also, the broadcast case probably needs to be handled
too?

As an aside, to be honest what's so wrong with the socket receive
buffer filling up? The damage is limited to the receive buffer size
of the kernel netlink socket, but that's it.

2003-07-13 01:03:11

by James Morris

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

On Fri, 11 Jul 2003, David S. Miller wrote:

> > + /* Don't bother queuing skb if kernel socket has no input function */
> > + if (nlk->pid == 0 && !nlk->data_ready)
> > + goto no_dst;
> > +
>
> Oops, turns out this doesn't work. data_ready is never NULL, look at
> how netlink_kernel_create() works.

It's ok: sk->data_ready is never null, but nlk_sk(sk)->data_ready will be
null unless an input function is provided there.

> Also, the broadcast case probably needs to be handled
> too?

Netlink sockets created by netlink_kernel_create() do not subscribe to any
groups and are not broadcast to.

> As an aside, to be honest what's so wrong with the socket receive
> buffer filling up? The damage is limited to the receive buffer size
> of the kernel netlink socket, but that's it.

Agreed, it's not really harmful, but it's sloppy. Better to let the
application know that it can't send to the socket rather than let it keep
sending (with successful return codes) until the kernel socket buffer
fills up.


- James
--
James Morris
<[email protected]>

2003-07-13 05:29:07

by David Miller

[permalink] [raw]
Subject: Re: [PATCH - RFC] [1/2] 2.6 must-fix list - kernel error reporting

On Sun, 13 Jul 2003 11:17:35 +1000 (EST)
James Morris <[email protected]> wrote:

> On Fri, 11 Jul 2003, David S. Miller wrote:
>
> > Oops, turns out this doesn't work. data_ready is never NULL, look at
> > how netlink_kernel_create() works.
>
> It's ok: sk->data_ready is never null, but nlk_sk(sk)->data_ready will be
> null unless an input function is provided there.
>
> > Also, the broadcast case probably needs to be handled
> > too?
>
> Netlink sockets created by netlink_kernel_create() do not subscribe to any
> groups and are not broadcast to.

Oops, you're right on both counts, I brainfarted here.

I'll apply your original patch, thanks.