diff -Naur linux.org/include/linux/kerror.h linux.kerror.patched/include/linux/kerror.h
--- linux.org/include/linux/kerror.h Wed Dec 31 16:00:00 1969
+++ linux.kerror.patched/include/linux/kerror.h Mon Jul 14 09:53:00 2003
@@ -0,0 +1,27 @@
+#ifndef _KERROR_H
+#define _KERROR_H
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/uio.h>
+#include <asm/types.h>
+
+#ifdef CONFIG_NET
+extern int kernel_error_event(void *data, size_t len, __u32 groups);
+extern int kernel_error_event_iov(const struct iovec *iov,
+ unsigned int nseg, __u32 groups);
+#else
+static inline int kernel_error_event(void *data, size_t len, __u32 groups)
+ { return -ENOSYS; }
+static inline int kernel_error_event_iov(const struct iovec *iov,
+ unsigned int nseg, __u32 groups)
+ { return -ENOSYS; }
+#endif /* CONFIG_NET */
+#endif /* __KERNEL__ */
+
+#define KERROR_GROUP_RAW 0x00000001
+#define KERROR_GROUP_EVLOG 0x00000002
+
+#define KERROR_GROUP_ALL (~(u32)0)
+
+#endif /* _KERROR_H */
diff -Naur linux.org/include/linux/netlink.h linux.kerror.patched/include/linux/netlink.h
--- linux.org/include/linux/netlink.h Mon Jul 14 09:53:00 2003
+++ linux.kerror.patched/include/linux/netlink.h Mon Jul 14 09:53:00 2003
@@ -10,6 +10,7 @@
#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */
#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_KERROR 7 /* kernel error event facility */
#define NETLINK_ARPD 8
#define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */
#define NETLINK_IP6_FW 13
diff -Naur linux.org/net/netlink/Makefile linux.kerror.patched/net/netlink/Makefile
--- linux.org/net/netlink/Makefile Mon Jul 14 09:53:00 2003
+++ linux.kerror.patched/net/netlink/Makefile Mon Jul 14 09:53:00 2003
@@ -2,5 +2,5 @@
# Makefile for the netlink driver.
#
-obj-y := af_netlink.o
+obj-y := af_netlink.o kerror.o
obj-$(CONFIG_NETLINK_DEV) += netlink_dev.o
diff -Naur linux.org/net/netlink/kerror.c linux.kerror.patched/net/netlink/kerror.c
--- linux.org/net/netlink/kerror.c Wed Dec 31 16:00:00 1969
+++ linux.kerror.patched/net/netlink/kerror.c Mon Jul 14 09:53:00 2003
@@ -0,0 +1,97 @@
+/* kerror.c: Kernel error event logging facility.
+ *
+ * Copyright (C) 2003 David S. Miller ([email protected])
+ * June 2003 - Jim Keniston and Dan Stekloff (kenistoj and [email protected])
+ * Fixed a couple of bugs and added iovec interface.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/netlink.h>
+#include <linux/kerror.h>
+#include <linux/init.h>
+#include <linux/uio.h>
+#include <net/sock.h>
+
+static struct sock *kerror_nl;
+
+/**
+ * kernel_error_event_iov() - Broadcast packet to NETLINK_KERROR sockets.
+ * @iov: the packet's data
+ * @nseg: number of segments in iov[]
+ * @groups: as with kernel_error_event()
+ */
+int kernel_error_event_iov(const struct iovec *iov, unsigned int nseg,
+ u32 groups)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ unsigned char *b, *p;
+ size_t len;
+ unsigned int seg;
+
+ if (!groups)
+ return -EINVAL;
+
+ len = iov_length(iov, nseg);
+ skb = alloc_skb(NLMSG_SPACE(len), GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, current->pid, 0, 0, len);
+ nlh->nlmsg_flags = 0;
+
+ p = NLMSG_DATA(nlh);
+ for (seg = 0; seg < nseg; seg++) {
+ memcpy(p, (const void*)iov[seg].iov_base, iov[seg].iov_len);
+ p += iov[seg].iov_len;
+ }
+ nlh->nlmsg_len = skb->tail - b;
+
+ NETLINK_CB(skb).dst_groups = groups;
+
+ return netlink_broadcast(kerror_nl, skb, 0, ~0, GFP_ATOMIC);
+
+nlmsg_failure:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/**
+ * kernel_error_event() - Broadcast packet to NETLINK_KERROR sockets.
+ * @data, @len: the packet's data
+ * @groups: the group(s) to which the packet pertains -- e.g.,
+ * KERROR_GROUP_EVLOG. On a recvmsg(), this shows up in
+ * ((struct sockaddr_nl*)(msg->msg_name))->nl_groups.
+ */
+int kernel_error_event(void *data, size_t len, u32 groups)
+{
+ struct iovec iov;
+ iov.iov_base = data;
+ iov.iov_len = len;
+ return kernel_error_event_iov(&iov, 1, groups);
+}
+
+static int __init kerror_init(void)
+{
+ printk(KERN_INFO "Initializing KERROR netlink socket\n");
+
+ /* Note that we ignore all incoming messages on this socket. */
+ kerror_nl = netlink_kernel_create(NETLINK_KERROR, NULL);
+ if (kerror_nl == NULL)
+ panic("kerror_init: cannot initialize kerror_nl\n");
+
+ return 0;
+}
+
+static void __exit kerror_exit(void)
+{
+ sock_release(kerror_nl->sk_socket);
+}
+
+module_init(kerror_init);
+module_exit(kerror_exit);
diff -Naur linux.org/net/netsyms.c linux.kerror.patched/net/netsyms.c
--- linux.org/net/netsyms.c Mon Jul 14 09:53:00 2003
+++ linux.kerror.patched/net/netsyms.c Mon Jul 14 09:53:00 2003
@@ -83,6 +83,7 @@
#endif
#include <linux/rtnetlink.h>
+#include <linux/kerror.h>
#ifdef CONFIG_IPX_MODULE
extern struct datalink_proto *make_EII_client(void);
@@ -505,6 +506,8 @@
EXPORT_SYMBOL(netlink_set_nonroot);
EXPORT_SYMBOL(netlink_register_notifier);
EXPORT_SYMBOL(netlink_unregister_notifier);
+EXPORT_SYMBOL(kernel_error_event);
+EXPORT_SYMBOL(kernel_error_event_iov);
#if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
EXPORT_SYMBOL(netlink_attach);
EXPORT_SYMBOL(netlink_detach);
Jim Keniston <[email protected]> wrote:
>
> +int kernel_error_event_iov(const struct iovec *iov, unsigned int nseg,
> + u32 groups)
> +{
> ...
> +
> + return netlink_broadcast(kerror_nl, skb, 0, ~0, GFP_ATOMIC);
This appears to be deadlocky when called from interrupt handlers.
netlink_broadcast() does read_lock(&nl_table_lock). But nl_table_lock is
not an irq-safe lock.
Possibly netlink_broadcast() can be made callable from hardirq context, but
it looks to be non trivial. The various error and delivery handlers need
to be reviewed, the kfree_skb() calls should be thought about, etc.
Hello!
> netlink_broadcast() does read_lock(&nl_table_lock). But nl_table_lock is
> not an irq-safe lock.
Just as reminder, there are _no_ irq safe locks in net/*. A few of
local_irq_disable()s are segregated in interface to device drivers.
> Possibly netlink_broadcast() can be made callable from hardirq context, but
> it looks to be non trivial.
Trivial or non-trivial, before all this is highly not desired.
net/* is better to remain in the form free of knowledge of hardirqs.
Alexey
--- kerror.c.old Thu Jul 17 10:53:19 2003
+++ kerror.c Thu Jul 17 10:54:55 2003
@@ -3,10 +3,12 @@
* Copyright (C) 2003 David S. Miller ([email protected])
* June 2003 - Jim Keniston and Dan Stekloff (kenistoj and [email protected])
* Fixed a couple of bugs and added iovec interface.
+ * July 2003 - Jim Keniston - Added handling of packets logged from IRQ context.
*/
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/interrupt.h>
#include <linux/skbuff.h>
#include <linux/socket.h>
#include <linux/netlink.h>
@@ -17,6 +19,33 @@
static struct sock *kerror_nl;
+/* Packets logged from IRQ context are queued for broadcast by a tasklet. */
+static struct sk_buff_head delayed_pkts;
+static void broadcast_delayed_pkts(unsigned long);
+static DECLARE_TASKLET(delayed_pkts_tasklet, broadcast_delayed_pkts, 0);
+
+/**
+ * delayed_broadcast() - Schedule a tasklet to broadcast a packet.
+ * We want to broadcast the indicated packet, but can't because we're
+ * in a hardware interrupt and so can't call netlink_broadcast().
+ * Schedule a tasklet to do the job.
+ *
+ * @skb: the socket buffer to broadcast
+ */
+static void delayed_broadcast(struct sk_buff *skb)
+{
+ skb_queue_tail(&delayed_pkts, skb);
+ tasklet_schedule(&delayed_pkts_tasklet);
+}
+
+static void broadcast_delayed_pkts(unsigned long ignored)
+{
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue(&delayed_pkts)) != NULL) {
+ (void) netlink_broadcast(kerror_nl, skb, 0, ~0, GFP_ATOMIC);
+ }
+}
+
/**
* kernel_error_event_iov() - Broadcast packet to NETLINK_KERROR sockets.
* @iov: the packet's data
@@ -54,6 +83,11 @@
NETLINK_CB(skb).dst_groups = groups;
+ if (in_irq()) {
+ delayed_broadcast(skb);
+ return -EINPROGRESS;
+ }
+
return netlink_broadcast(kerror_nl, skb, 0, ~0, GFP_ATOMIC);
nlmsg_failure:
@@ -85,6 +119,7 @@
if (kerror_nl == NULL)
panic("kerror_init: cannot initialize kerror_nl\n");
+ skb_queue_head_init(&delayed_pkts);
return 0;
}
On Thu, 17 Jul 2003, Jim Keniston wrote:
> 3. Given the above, what should the evlog.c caller do when
> kernel_error_event_iov() returns -EINPROGRESS?
> a. Nothing. Figure the packet will probably get logged.
> b. Just to be safe, report it via printk, the same way we report dropped
> packets.
> We currently do (a). (b) would mean that every event logged from IRQ
> context would be cc-ed to printk.
I don't think this irq detection logic should be added at all here, let
the caller reschedule its logging if running in irq context.
- James
--
James Morris
<[email protected]>
James Morris wrote:
>
> On Thu, 17 Jul 2003, Jim Keniston wrote:
>
> > 3. Given the above, what should the evlog.c caller do when
> > kernel_error_event_iov() returns -EINPROGRESS?
> > a. Nothing. Figure the packet will probably get logged.
> > b. Just to be safe, report it via printk, the same way we report dropped
> > packets.
> > We currently do (a). (b) would mean that every event logged from IRQ
> > context would be cc-ed to printk.
>
> I don't think this irq detection logic should be added at all here, let
> the caller reschedule its logging if running in irq context.
>
> - James
> --
> James Morris
> <[email protected]>
Yes, this makes sense. At the kerror.c level, just return -EDEADLK if in_irq().
Delay packet delivery (via a tasklet, as before) at the evlog.c level instead.
That way, we know at the evlog.c level (in the tasklet) whether the event packet
was delivered to anybody, and can paraphrase it to printk if it wasn't.
Is this the sort of thing you had in mind?
Jim K
Jim Keniston wrote:
> James Morris wrote:
> >
> > On Thu, 17 Jul 2003, Jim Keniston wrote:
> >
> > > 3. Given the above, what should the evlog.c caller do when
> > > kernel_error_event_iov() returns -EINPROGRESS?
> > > a. Nothing. Figure the packet will probably get logged.
> > > b. Just to be safe, report it via printk, the same way we report dropped
> > > packets.
> > > We currently do (a). (b) would mean that every event logged from IRQ
> > > context would be cc-ed to printk.
> >
> > I don't think this irq detection logic should be added at all here, let
> > the caller reschedule its logging if running in irq context.
> >
> > - James
> > --
> > James Morris
> > <[email protected]>
>
> Yes, this makes sense. At the kerror.c level, just return -EDEADLK if in_irq().
> Delay packet delivery (via a tasklet, as before) at the evlog.c level instead.
> That way, we know at the evlog.c level (in the tasklet) whether the event packet
> was delivered to anybody, and can paraphrase it to printk if it wasn't.
>
> Is this the sort of thing you had in mind?
> Jim K
I implemented the above change. Now, an event logged from an interrupt
handler when nobody's listening to our socket (e.g., during boot) is
paraphrased to printk. Here are the updated patches:
http://prdownloads.sourceforge.net/evlog/kerror-2.5.75.patch?download
http://prdownloads.sourceforge.net/evlog/evlog-2.5.75.patch?download
http://prdownloads.sourceforge.net/evlog/kerrord.tar.gz?download
Jim K
On Fri, 18 Jul 2003, Jim Keniston wrote:
> > Yes, this makes sense. At the kerror.c level, just return -EDEADLK if in_irq().
> > Delay packet delivery (via a tasklet, as before) at the evlog.c level instead.
> > That way, we know at the evlog.c level (in the tasklet) whether the event packet
> > was delivered to anybody, and can paraphrase it to printk if it wasn't.
> >
> > Is this the sort of thing you had in mind?
Not exactly -- I don't think the logging framework should do any irq
detection. The caller should either know if its in an interrupt, or do
the detection itself.
- James
--
James Morris
<[email protected]>