Hi,
This is v3 of the tentative patch set I am sending for feedback.
It implements a network I/O backend for KDB/KGDB, the kernel
debugger.
NetKGDB is very similar to netconsole, on which it is based, but with
a crucial difference - netkgdb is designed to accept connections
from any hosts, i.e. it is not necessary to specify these ahead
of time. This makes it that much more useful in a "this host crashed
and a I want a developer to take a look at it" scenario, common to
large scale test/QA farms and automated testing harnesses. This
involves a minor change to the netpoll rx_hook, namely, to provide
source connection information.
This differs from v2 in adding the flush() callback in kgdb_io,
which would be necessary for correct behavior in gdbstub mode,
and enhances the debug core to support registering multiple
I/O ops. The later allows both kgdboc, netkgdb and any other
providers to coexist peacefully, and allow debugging both
interactively and via the network.
V2 cleaned up a lot of the strange cruft in v1 surrounding
dealing with netdev notifiers.
[PATCHv3 1/3] NETPOLL: Extend rx_hook support.
[PATCHv3 2/3] NETKGDB: Ethernet/UDP/IP KDB transport.
[PATCHv3 3/3] KGDB: Allow registering multiple I/O ops.
Thanks,
A
From: Andrei Warkentin <[email protected]>
Pass down source information to rx_hook, useful
for accepting connections from unspecified clients.
Cc: [email protected]
Cc: Jason Wessel <[email protected]>
Cc: Matt Mackall <[email protected]>
Signed-off-by: Andrei Warkentin <[email protected]>
Signed-off-by: Andrei Warkentin <[email protected]>
---
include/linux/netpoll.h | 10 +++++++++-
net/core/netpoll.c | 10 ++++------
2 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 5dfa091..9a9cfa1 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -11,12 +11,19 @@
#include <linux/interrupt.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
+#include <linux/if_ether.h>
+#include <net/tcp.h>
+#include <net/udp.h>
struct netpoll {
struct net_device *dev;
char dev_name[IFNAMSIZ];
const char *name;
- void (*rx_hook)(struct netpoll *, int, char *, int);
+ void (*rx_hook)(struct netpoll *,
+ u8 *h_source,
+ __be32 saddr,
+ struct udphdr *,
+ char *, int);
__be32 local_ip, remote_ip;
u16 local_port, remote_port;
@@ -40,6 +47,7 @@ struct netpoll_info {
struct netpoll *netpoll;
};
+void netpoll_poll_dev(struct net_device *dev);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
void netpoll_print_options(struct netpoll *np);
int netpoll_parse_options(struct netpoll *np, char *opt);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3d84fb9..c182bb2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -26,8 +26,6 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <net/tcp.h>
-#include <net/udp.h>
#include <asm/unaligned.h>
#include <trace/events/napi.h>
@@ -189,7 +187,7 @@ static void service_arp_queue(struct netpoll_info *npi)
}
}
-static void netpoll_poll_dev(struct net_device *dev)
+void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
@@ -615,9 +613,9 @@ int __netpoll_rx(struct sk_buff *skb)
if (np->local_port && np->local_port != ntohs(uh->dest))
continue;
- np->rx_hook(np, ntohs(uh->source),
- (char *)(uh+1),
- ulen - sizeof(struct udphdr));
+ np->rx_hook(np, eth_hdr(skb)->h_source,
+ iph->saddr, uh, (char *)(uh+1),
+ ulen - sizeof(struct udphdr));
hits++;
}
--
1.7.8.3
This allows multiple I/O ops, which is useful, if you want
to be able to support debugging say both via console and
via network (or 1394, dbgp, etc.)
Tested with kgdboc and netkgdb.
Signed-off-by: Andrei Warkentin <[email protected]>
---
drivers/usb/early/ehci-dbgp.c | 4 +-
include/linux/kgdb.h | 6 ++-
kernel/debug/debug_core.c | 130 +++++++++++++++++++++++++++--------------
kernel/debug/gdbstub.c | 43 ++++++-------
kernel/debug/kdb/kdb_io.c | 26 ++++-----
5 files changed, 125 insertions(+), 84 deletions(-)
diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 1fc8f12..e5db14a 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -97,7 +97,8 @@ static inline u32 dbgp_len_update(u32 x, u32 len)
#ifdef CONFIG_KGDB
static struct kgdb_io kgdbdbgp_io_ops;
-#define dbgp_kgdb_mode (dbg_io_ops == &kgdbdbgp_io_ops)
+static int kgdb_registered = 0;
+#define dbgp_kgdb_mode (kgdb_registered)
#else
#define dbgp_kgdb_mode (0)
#endif
@@ -1051,6 +1052,7 @@ static int __init kgdbdbgp_parse_config(char *str)
kgdbdbgp_wait_time = simple_strtoul(ptr, &ptr, 10);
}
kgdb_register_io_module(&kgdbdbgp_io_ops);
+ kgdb_registered = 1;
kgdbdbgp_io_ops.is_console = early_dbgp_console.index != -1;
return 0;
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index fa39183..c92cd30 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -17,6 +17,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/atomic.h>
+#include <linux/list.h>
#ifdef CONFIG_HAVE_ARCH_KGDB
#include <asm/kgdb.h>
#endif
@@ -276,6 +277,7 @@ struct kgdb_io {
void (*pre_exception) (void);
void (*post_exception) (void);
int is_console;
+ struct list_head list;
};
extern struct kgdb_arch arch_kgdb_ops;
@@ -284,7 +286,9 @@ extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
-extern struct kgdb_io *dbg_io_ops;
+extern int dbg_io_get_char(void);
+extern void dbg_io_put_char(u8, bool);
+extern void dbg_io_flush(void);
extern int kgdb_hex2long(char **ptr, unsigned long *long_val);
extern char *kgdb_mem2hex(char *mem, char *buf, int count);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 0d7c087..bd29f92 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -72,8 +72,8 @@ int kgdb_io_module_registered;
/* Guard for recursive entry */
static int exception_level;
-struct kgdb_io *dbg_io_ops;
-static DEFINE_SPINLOCK(kgdb_registration_lock);
+static DEFINE_MUTEX(kgdb_registration_lock);
+static LIST_HEAD(dbg_io_list);
/* kgdb console driver is loaded */
static int kgdb_con_registered;
@@ -384,7 +384,7 @@ setundefined:
*/
static int kgdb_io_ready(int print_wait)
{
- if (!dbg_io_ops)
+ if (list_empty(&dbg_io_list))
return 0;
if (kgdb_connected)
return 1;
@@ -455,6 +455,26 @@ static void dbg_touch_watchdogs(void)
rcu_cpu_stall_reset();
}
+void dbg_io_run_pre(void)
+{
+ struct kgdb_io *kio;
+
+ list_for_each_entry(kio, &dbg_io_list, list) {
+ if (kio->pre_exception)
+ kio->pre_exception();
+ }
+}
+
+void dbg_io_run_post(void)
+{
+ struct kgdb_io *kio;
+
+ list_for_each_entry_reverse(kio, &dbg_io_list, list) {
+ if (kio->post_exception)
+ kio->post_exception();
+ }
+}
+
static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
int exception_state)
{
@@ -565,8 +585,7 @@ return_normal:
goto kgdb_restore;
/* Call the I/O driver's pre_exception routine */
- if (dbg_io_ops->pre_exception)
- dbg_io_ops->pre_exception();
+ dbg_io_run_pre();
/*
* Get the passive CPU lock which will hold all the non-primary
@@ -625,8 +644,7 @@ cpu_master_loop:
}
/* Call the I/O driver's post_exception routine */
- if (dbg_io_ops->post_exception)
- dbg_io_ops->post_exception();
+ dbg_io_run_post();
if (!kgdb_single_step) {
raw_spin_unlock(&dbg_slave_lock);
@@ -734,7 +752,7 @@ static struct console kgdbcons = {
#ifdef CONFIG_MAGIC_SYSRQ
static void sysrq_handle_dbg(int key)
{
- if (!dbg_io_ops) {
+ if (list_empty(&dbg_io_list)) {
printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
return;
}
@@ -867,38 +885,35 @@ static void kgdb_initial_breakpoint(void)
int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
{
int err;
+ int first;
- spin_lock(&kgdb_registration_lock);
-
- if (dbg_io_ops) {
- spin_unlock(&kgdb_registration_lock);
-
- printk(KERN_ERR "kgdb: Another I/O driver is already "
- "registered with KGDB.\n");
- return -EBUSY;
- }
+ BUG_ON(!new_dbg_io_ops->read_char);
+ BUG_ON(!new_dbg_io_ops->write_char);
+ mutex_lock(&kgdb_registration_lock);
+ first = list_empty(&dbg_io_list);
if (new_dbg_io_ops->init) {
err = new_dbg_io_ops->init();
if (err) {
- spin_unlock(&kgdb_registration_lock);
+ mutex_unlock(&kgdb_registration_lock);
return err;
}
}
- dbg_io_ops = new_dbg_io_ops;
+ list_add(&new_dbg_io_ops->list, &dbg_io_list);
+ if (first) {
+ /* Arm KGDB now. */
+ kgdb_register_callbacks();
- spin_unlock(&kgdb_registration_lock);
+ if (kgdb_break_asap)
+ kgdb_initial_breakpoint();
+ }
+
+ mutex_unlock(&kgdb_registration_lock);
printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
new_dbg_io_ops->name);
- /* Arm KGDB now. */
- kgdb_register_callbacks();
-
- if (kgdb_break_asap)
- kgdb_initial_breakpoint();
-
return 0;
}
EXPORT_SYMBOL_GPL(kgdb_register_io_module);
@@ -913,37 +928,64 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
{
BUG_ON(kgdb_connected);
- /*
- * KGDB is no longer able to communicate out, so
- * unregister our callbacks and reset state.
- */
- kgdb_unregister_callbacks();
+ mutex_lock(&kgdb_registration_lock);
+ if (list_is_singular(&dbg_io_list)) {
+ /*
+ * KGDB is no longer able to communicate out, so
+ * unregister our callbacks and reset state.
+ */
+ kgdb_unregister_callbacks();
+ printk(KERN_INFO
+ "kgdb: debugger disabled.\n");
+ }
- spin_lock(&kgdb_registration_lock);
+ list_del(&old_dbg_io_ops->list);
+ mutex_unlock(&kgdb_registration_lock);
- WARN_ON_ONCE(dbg_io_ops != old_dbg_io_ops);
- dbg_io_ops = NULL;
+ printk(KERN_INFO
+ "kgdb: Unregistered I/O driver %s.\n",
+ old_dbg_io_ops->name);
- spin_unlock(&kgdb_registration_lock);
- printk(KERN_INFO
- "kgdb: Unregistered I/O driver %s, debugger disabled.\n",
- old_dbg_io_ops->name);
}
EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
int dbg_io_get_char(void)
{
- int ret = dbg_io_ops->read_char();
- if (ret == NO_POLL_CHAR)
- return -1;
- if (!dbg_kdb_mode)
+ struct kgdb_io *kio;
+ int ret = NO_POLL_CHAR;
+
+ list_for_each_entry(kio, &dbg_io_list, list) {
+ ret = kio->read_char();
+ if (ret == NO_POLL_CHAR)
+ continue;
+ if (!dbg_kdb_mode)
+ return ret;
+ if (ret == 127)
+ return 8;
return ret;
- if (ret == 127)
- return 8;
+ }
return ret;
}
+void dbg_io_flush(void)
+{
+ struct kgdb_io *kio;
+
+ list_for_each_entry(kio, &dbg_io_list, list)
+ if (kio->flush)
+ kio->flush();
+}
+
+void dbg_io_put_char(u8 data, bool skip_con)
+{
+ struct kgdb_io *kio;
+
+ list_for_each_entry(kio, &dbg_io_list, list)
+ if (!kio->is_console || !skip_con)
+ kio->write_char(data);
+}
+
/**
* kgdb_breakpoint - generate breakpoint exception
*
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index c22d8c2..cf4fdfd 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -79,9 +79,9 @@ static int gdbstub_read_wait(void)
#else
static int gdbstub_read_wait(void)
{
- int ret = dbg_io_ops->read_char();
+ int ret = dbg_io_get_char();
while (ret == NO_POLL_CHAR)
- ret = dbg_io_ops->read_char();
+ ret = dbg_io_get_char();
return ret;
}
#endif
@@ -125,12 +125,11 @@ static void get_packet(char *buffer)
if (checksum != xmitcsum)
/* failed checksum */
- dbg_io_ops->write_char('-');
+ dbg_io_put_char('-', false);
else
/* successful transfer */
- dbg_io_ops->write_char('+');
- if (dbg_io_ops->flush)
- dbg_io_ops->flush();
+ dbg_io_put_char('+', false);
+ dbg_io_flush();
}
buffer[count] = 0;
} while (checksum != xmitcsum);
@@ -150,21 +149,20 @@ static void put_packet(char *buffer)
* $<packet info>#<checksum>.
*/
while (1) {
- dbg_io_ops->write_char('$');
+ dbg_io_put_char('$', false);
checksum = 0;
count = 0;
while ((ch = buffer[count])) {
- dbg_io_ops->write_char(ch);
+ dbg_io_put_char(ch, false);
checksum += ch;
count++;
}
- dbg_io_ops->write_char('#');
- dbg_io_ops->write_char(hex_asc_hi(checksum));
- dbg_io_ops->write_char(hex_asc_lo(checksum));
- if (dbg_io_ops->flush)
- dbg_io_ops->flush();
+ dbg_io_put_char('#', false);
+ dbg_io_put_char(hex_asc_hi(checksum), false);
+ dbg_io_put_char(hex_asc_lo(checksum), false);
+ dbg_io_flush();
/* Now see what we get in reply. */
ch = gdbstub_read_wait();
@@ -183,9 +181,8 @@ static void put_packet(char *buffer)
* packet.
*/
if (ch == '$') {
- dbg_io_ops->write_char('-');
- if (dbg_io_ops->flush)
- dbg_io_ops->flush();
+ dbg_io_put_char('-', false);
+ dbg_io_flush();
return;
}
}
@@ -1097,7 +1094,7 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
gdbstub_prev_in_buf_pos = 0;
return 0;
}
- dbg_io_ops->write_char('+');
+ dbg_io_put_char('+', false);
put_packet(remcom_out_buffer);
return 0;
}
@@ -1115,19 +1112,19 @@ void gdbstub_exit(int status)
buffer[1] = hex_asc_hi(status);
buffer[2] = hex_asc_lo(status);
- dbg_io_ops->write_char('$');
+ dbg_io_put_char('$', false);
checksum = 0;
for (loop = 0; loop < 3; loop++) {
ch = buffer[loop];
checksum += ch;
- dbg_io_ops->write_char(ch);
+ dbg_io_put_char(ch, false);
}
- dbg_io_ops->write_char('#');
- dbg_io_ops->write_char(hex_asc_hi(checksum));
- dbg_io_ops->write_char(hex_asc_lo(checksum));
+ dbg_io_put_char('#', false);
+ dbg_io_put_char(hex_asc_hi(checksum), false);
+ dbg_io_put_char(hex_asc_lo(checksum), false);
/* make sure the output is flushed, lest the bootloader clobber it */
- dbg_io_ops->flush();
+ dbg_io_flush();
}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 5eb7e23..35ef3cb 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -689,14 +689,11 @@ kdb_printit:
if (!dbg_kdb_mode && kgdb_connected) {
gdbstub_msg_write(kdb_buffer, retlen);
} else {
- if (!dbg_io_ops->is_console) {
- len = strlen(kdb_buffer);
- cp = kdb_buffer;
- while (len--) {
- dbg_io_ops->write_char(*cp);
- cp++;
- }
- }
+ len = strlen(kdb_buffer);
+ cp = kdb_buffer;
+ while (len--)
+ dbg_io_put_char(*cp++, true);
+
while (c) {
c->write(c, kdb_buffer, retlen);
touch_nmi_watchdog();
@@ -743,14 +740,13 @@ kdb_printit:
kdb_input_flush();
c = console_drivers;
- if (!dbg_io_ops->is_console) {
- len = strlen(moreprompt);
- cp = moreprompt;
- while (len--) {
- dbg_io_ops->write_char(*cp);
- cp++;
- }
+ len = strlen(moreprompt);
+ cp = moreprompt;
+ while (len--) {
+ dbg_io_put_char(*cp, true);
+ cp++;
}
+
while (c) {
c->write(c, moreprompt, strlen(moreprompt));
touch_nmi_watchdog();
--
1.7.8.3
From: Andrei Warkentin <[email protected]>
Allows debugging a crashed kernel, and breaking into a
live kernel via a network interface.
Useful in testing/QA farms where physical access is not
necessarily easy/desired, and iLO-like solutions end up
using USB HID and not i8042-based keyboard.
Cc: [email protected]
Cc: Jason Wessel <[email protected]>
Cc: Matt Mackall <[email protected]>
Signed-off-by: Andrei Warkentin <[email protected]>
Signed-off-by: Andrei Warkentin <[email protected]>
---
Documentation/networking/netkgdb.txt | 104 +++++
drivers/net/Kconfig | 8 +-
drivers/net/Makefile | 1 +
drivers/net/netkgdb.c | 729 ++++++++++++++++++++++++++++++++++
4 files changed, 841 insertions(+), 1 deletions(-)
create mode 100644 Documentation/networking/netkgdb.txt
create mode 100644 drivers/net/netkgdb.c
diff --git a/Documentation/networking/netkgdb.txt b/Documentation/networking/netkgdb.txt
new file mode 100644
index 0000000..b2c70c1
--- /dev/null
+++ b/Documentation/networking/netkgdb.txt
@@ -0,0 +1,104 @@
+(based off netconsole.txt)
+
+Started by Andrei Warkentin <[email protected]>, 2012.02.24
+
+Please send bug reports to Andrey Warkentin <[email protected]>
+
+Introduction:
+=============
+
+This module allows debugging a crashed kernel over the network,
+where other means are not practical.
+
+It can be used either built-in or as a module. As a built-in,
+netkgdb initializes immediately after NIC cards and will bring up
+the specified interface as soon as possible. While this doesn't allow
+early kernel debugging, it's useful for handling random crashes and
+such.
+
+Sender and receiver configuration:
+==================================
+
+It takes an optional string configuration parameter "netkgdb" in the
+following format:
+
+ netkgdb=[src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+
+ where
+ src-port source for UDP packets (defaults to 7777)
+ src-ip source IP to use (interface address)
+ dev network interface (eth0)
+ tgt-port port for remote agent (7777)
+ tgt-ip IP address for remote agent
+ tgt-macaddr ethernet MAC address for remote agent (broadcast)
+
+Examples:
+
+ linux [email protected]/eth1,[email protected]/12:34:56:78:9a:bc
+
+ or
+
+ insmod netkgdb [email protected]/,@10.0.0.2/
+
+Note: the parameter is optional and largely unneeded unless you
+are running a listen server - netkgdb will accept connection from any
+IP on all interfaces and will reconfigure itself appropriately if
+the assigned interface IP address changes. This makes it useful
+in an environment where it's not known ahead of time what computer
+will connect to perform the crash analysis.
+
+Note: The format for the string is not netkgdb specific, and its
+parsing is part of Linux netpoll support. The netpoll code does
+need at least a local IP assigned, so the following will result
+in an error and netkgdb will not load if you don't already have
+an IP address assigned.
+
+Note: If you specify a local/remote netkgdb string as a kernel
+parameter, with the idea of having a remote server listen on
+a socket and wait for the kdb crash connection, don't expect
+to be able to connect from the remote host. This is peculiar
+behavior of the netpoll code - the interface is not *really*
+configured and your packets will never reach netkgdb. You
+will need to "ifconfig eth0 x.y.z.w' first. Or just wait
+for the crash :-).
+
+insmod netkgdb netkgdb=@/,@10.0.0.2/
+
+The remote host can run either 'netcat -u -l -p <port>' or 'nc -l -u <port>'.
+
+Using:
+======
+
+If the kernel is running, then any input will result in following string
+to be sent back -
+ Alive - '!!!BREAK!!!' to break in.
+
+This lets you know the machine is alive.
+
+Sending "!!!BREAK!!!", followed by a '\n' will result in breaking into
+KGDB/KDB.
+
+Miscellaneous notes:
+====================
+
+The following notes are only relevant if you want the debugged
+host to automatically send the KDB/KGDB data on a crash to a
+host preconfigured with the netkgdb= parameter.
+
+WARNING: the default target ethernet setting uses the broadcast
+ethernet address to send packets, which can cause increased load on
+other systems on the same ethernet segment. After the first reply
+connection, the target ethernet address is updated to match the source.
+
+TIP: some LAN switches may be configured to suppress ethernet broadcasts
+so it is advised to explicitly specify the remote agents' MAC addresses
+from the config parameters passed to netkgdb.
+
+TIP: to find out the MAC address of, say, 10.0.0.2, you may try using:
+
+ ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
+
+TIP: in case the remote agent is on a separate LAN subnet than
+the sender, it is suggested to try specifying the MAC address of the
+default gateway (you may use /sbin/route -n to find it out) as the
+remote MAC address instead.
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b982854..8deb605 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -160,6 +160,12 @@ config NETCONSOLE
If you want to log kernel messages over the network, enable this.
See <file:Documentation/networking/netconsole.txt> for details.
+config NETKGDB
+ tristate "Network kgdb I/O backend"
+ ---help---
+ If you want to debug the kernel over the network, enable this.
+ See <file:Documentation/networking/netkgdb.txt> for details.
+
config NETCONSOLE_DYNAMIC
bool "Dynamic reconfiguration of logging targets"
depends on NETCONSOLE && SYSFS && CONFIGFS_FS && \
@@ -171,7 +177,7 @@ config NETCONSOLE_DYNAMIC
See <file:Documentation/networking/netconsole.txt> for details.
config NETPOLL
- def_bool NETCONSOLE
+ def_bool NETCONSOLE || NETKGDB
config NETPOLL_TRAP
bool "Netpoll traffic trapping"
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index a6b8ce1..6eaa21f 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_MII) += mii.o
obj-$(CONFIG_MDIO) += mdio.o
obj-$(CONFIG_NET) += Space.o loopback.o
obj-$(CONFIG_NETCONSOLE) += netconsole.o
+obj-$(CONFIG_NETKGDB) += netkgdb.o
obj-$(CONFIG_PHYLIB) += phy/
obj-$(CONFIG_RIONET) += rionet.o
obj-$(CONFIG_NET_TEAM) += team/
diff --git a/drivers/net/netkgdb.c b/drivers/net/netkgdb.c
new file mode 100644
index 0000000..bd03cd7
--- /dev/null
+++ b/drivers/net/netkgdb.c
@@ -0,0 +1,729 @@
+/*
+ * linux/drivers/net/netkgdb.c
+ *
+ * Copyright (C) 2012 Andrei Warkentin <[email protected]>
+ *
+ * Based on Matt Mackall's netconsole and
+ * my FIQ/KGDB support code.
+ *
+ */
+
+#define pr_fmt(fmt) "netkgdb: " fmt
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+#include <linux/string.h>
+#include <linux/netpoll.h>
+#include <linux/workqueue.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/kgdb.h>
+
+MODULE_AUTHOR("Maintainer: Andrei Warkentin <[email protected]>");
+MODULE_DESCRIPTION("KGDB I/O driver for network interfaces");
+MODULE_LICENSE("GPL");
+
+#define DEFAULT_PORT 7777
+#define MAX_PARAM_LENGTH 256
+#define MAX_RING_SIZE PAGE_SIZE
+
+static char config[MAX_PARAM_LENGTH];
+module_param_string(netkgdb, config, MAX_PARAM_LENGTH, 0);
+MODULE_PARM_DESC(netkgdb, " netkgdb=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]");
+
+static LIST_HEAD(netkgdb_nts);
+
+/*
+ * Protects netkgdb_nts list and access to stored netkgdb targets
+ * between the net notifiers.
+ */
+static DEFINE_MUTEX(netkgdb_nts_lock);
+
+/**
+ * struct netkgdb_target - Represents a configured netkgdb target.
+ * @list: Links this target into the netkgdb_nts.
+ * @tx_np: netpoll structue used for KGDB/KDB traffic.
+ * @rx_np: netpoll structure used to listed to inbound connections.
+ * @bp: work_struct to break into KGDB/KDB.
+ * @ping: work_struct to return alive message.
+ * @tx_ready: set if there is an outgoing IP address assigned.
+ */
+struct netkgdb_target {
+ struct list_head list;
+ struct netpoll tx_np;
+ struct netpoll rx_np;
+ struct work_struct bp;
+ struct work_struct ping;
+ bool tx_ready;
+};
+
+static struct netkgdb_ring {
+ u8 buf[MAX_RING_SIZE];
+ int head;
+ int tail;
+} netkgdb_rx_ring, netkgdb_tx_ring;
+static int netkgdb_trapped = 0;
+
+/*
+ * Returns the offset of the first unconsumed character.
+ */
+static inline u8 *netkgdb_ring_off(struct netkgdb_ring *ring)
+{
+ return &ring->buf[ring->tail];
+}
+
+/*
+ * Returns the length of a contiguous buffer of unconsumed
+ * characters, to deal with ring buffer wraparound.
+ */
+static inline int netkgdb_ring_cont(struct netkgdb_ring *ring)
+{
+ if (ring->head < ring->tail)
+ return MAX_RING_SIZE - ring->tail;
+ else
+ return ring->head - ring->tail;
+}
+
+/*
+ * Returns the number of characters present in the ring.
+ */
+static inline int netkgdb_ring_level(struct netkgdb_ring *ring)
+{
+ int level = ring->head - ring->tail;
+
+ if (level < 0)
+ level = MAX_RING_SIZE + level;
+
+ return level;
+}
+
+/*
+ * Returns the number of characters that could be added to the ring.
+ */
+static inline int netkgdb_ring_room(struct netkgdb_ring *ring)
+{
+ return MAX_RING_SIZE - netkgdb_ring_level(ring) - 1;
+}
+
+/*
+ * Returns the character at a specific position in the ring,
+ * without consuming it.
+ */
+static inline u8 netkgdb_ring_peek(struct netkgdb_ring *ring, int i)
+{
+ return ring->buf[(ring->tail + i) % MAX_RING_SIZE];
+}
+
+/*
+ * Consumes a number of characters (up to count) in the ring.
+ */
+static inline int ring_consume(struct netkgdb_ring *ring, int count)
+{
+ count = min(count, netkgdb_ring_level(ring));
+
+ ring->tail = (ring->tail + count) % MAX_RING_SIZE;
+ smp_mb();
+ return count;
+}
+
+/*
+ * Adds a character to the ring, updating the number of unconsumed
+ * characters.
+ */
+static inline int ring_push(struct netkgdb_ring *ring, u8 data)
+{
+ if (netkgdb_ring_room(ring) == 0)
+ return 0;
+
+ ring->buf[ring->head] = data;
+ smp_mb();
+ ring->head = (ring->head + 1) % MAX_RING_SIZE;
+ smp_mb();
+
+ return 1;
+}
+
+/*
+ * Flushes the contents of the outgoing (TX) ring
+ * to all netkgdb targets that are up and have a remote
+ * host configured.
+ */
+static inline void netkgdb_tx_flush(void)
+{
+ int frag;
+ u8 *start;
+ struct netkgdb_target *nt;
+
+ /* In interrupt context on one cpu. Forget about the locks. */
+ while (netkgdb_ring_level(&netkgdb_tx_ring))
+ {
+ start = netkgdb_ring_off(&netkgdb_tx_ring);
+ frag = netkgdb_ring_cont(&netkgdb_tx_ring);
+
+ list_for_each_entry(nt, &netkgdb_nts, list) {
+ if (nt->tx_ready &&
+ nt->tx_np.dev &&
+ netif_running(nt->tx_np.dev))
+ netpoll_send_udp(&nt->tx_np, start, frag);
+ }
+
+ ring_consume(&netkgdb_tx_ring, frag);
+ }
+}
+
+/*
+ * Called from KGDB/KDB, returns the the next character read
+ * from the network interface(s), while processing any
+ * connect() requests from remote hosts and flushing
+ * the TX ring.
+ */
+static int netkgdb_char_get(void)
+{
+ u8 c;
+ struct netkgdb_target *nt;
+
+ /* In interrupt context on ONE cpu. Forget about the locks. */
+ list_for_each_entry(nt, &netkgdb_nts, list) {
+
+ /*
+ * Polls the devices, both for KGDB I/O and
+ * new incoming connections.
+ */
+ if (netif_running(nt->rx_np.dev))
+ netpoll_poll_dev(nt->rx_np.dev);
+ }
+
+ /* Flush any output we didn't get to in char_put. */
+ netkgdb_tx_flush();
+
+ if (!netkgdb_ring_level(&netkgdb_rx_ring))
+ return NO_POLL_CHAR;
+
+ c = netkgdb_ring_peek(&netkgdb_rx_ring, 0);
+ if (c == '\n')
+ c = '\r';
+ ring_consume(&netkgdb_rx_ring, 1);
+ return c;
+}
+
+/*
+ * Sends a character to remote hosts, taking care to
+ * avoid singe-character packets. Only flush the ring
+ * on a full ring or at the end of the line. Whatever
+ * didn't get flushed will get flushed in netkgdb_char_get.
+ * There is, of course, some potential for missed data,
+ * like if KDB/KGDB crashed while sending data, but it's
+ * probably insignificant given the likelyhood and the
+ * amount of lost data (less than a line).
+ */
+static void netkgdb_char_put(u8 c)
+{
+ while (!ring_push(&netkgdb_tx_ring, c))
+ netkgdb_tx_flush();
+
+ if (c == '\n')
+ netkgdb_tx_flush();
+}
+
+/*
+ * Invoked before KDB/KGDB takes control.
+ */
+static void netkgdb_exp_pre(void)
+{
+ netpoll_set_trap(1);
+ netkgdb_trapped = 1;
+}
+
+/*
+ * Invoked on KDB/KGDB exit.
+ */
+static void netkgdb_exp_post(void)
+{
+ netkgdb_trapped = 0;
+ netpoll_set_trap(0);
+}
+
+static struct kgdb_io netkgdb_io_ops = {
+ .name = "netkgdb",
+ .flush = netkgdb_tx_flush,
+ .read_char = netkgdb_char_get,
+ .write_char = netkgdb_char_put,
+ .pre_exception = netkgdb_exp_pre,
+ .post_exception = netkgdb_exp_post,
+};
+
+/*
+ * Handles receiving the '!!!BREAK!!!' string from
+ * a remote host. We cannot do this from the rx_hook
+ * itself, otherwise we'll deadlock due to reentering
+ * netpoll.
+ */
+void netkgdb_work_bp(struct work_struct *work)
+{
+ struct netkgdb_target *nt = container_of(work,
+ struct netkgdb_target,
+ bp);
+
+ pr_emerg("breaking into KGDB from %pI4:%d\n",
+ &nt->tx_np.remote_ip,
+ nt->tx_np.remote_port);
+ kgdb_breakpoint();
+}
+
+/*
+ * Handles sending an alive message to a remote host,
+ * as a response to network messages while the local
+ * host is not crashed. Once again, cannot be done
+ * from rx_hook context, due to netpoll reentrance
+ * issue.
+ */
+void netkgdb_work_ping(struct work_struct *work)
+{
+ char break_string[] = "Alive - '!!!BREAK!!!' to break in.\n";
+ struct netkgdb_target *nt = container_of(work,
+ struct netkgdb_target,
+ ping);
+
+ if (nt->tx_ready &&
+ nt->tx_np.dev &&
+ netif_running(nt->tx_np.dev))
+ netpoll_send_udp(&nt->tx_np, break_string,
+ sizeof(break_string) - 1);
+}
+
+/*
+ * Invoked on every received UDP/IP message received from
+ * a known host, setup in netkgdb_nt_initial, or by
+ * netkgdb_in_rx_hook. If the host is not crashed, it will
+ * return an alive message and allow breaking in, otherwise
+ * it fills the RX ring with received data. Note that there
+ * is no locking on the RX ring - when we crash we are guaranteed
+ * to run on one CP.
+ */
+void netkgdb_io_rx_hook(struct netpoll *np,
+ u8 *h_source,
+ __be32 saddr,
+ struct udphdr *uh,
+ char *data,
+ int len)
+{
+ int count = 0;
+ char break_string[] = "!!!BREAK!!!\n";
+ struct netkgdb_target *nt = container_of(np,
+ struct netkgdb_target,
+ tx_np);
+
+ if (!netkgdb_trapped) {
+ if ((len == sizeof(break_string) - 1) &&
+ !memcmp(data, break_string,
+ sizeof(break_string) - 1))
+ schedule_work(&nt->bp);
+ else
+ schedule_work(&nt->ping);
+ return;
+ }
+
+ while (count < len)
+ ring_push(&netkgdb_rx_ring, data[count++]);
+}
+
+/*
+ * Invoked on every UDP/IP message received from a remote
+ * host. For a known host, which we configured for, does nothing.
+ * For an unknown host, modifies the KGDB I/O netpoll struct
+ * with the new remote address, so that netkgdb_io_rx_hook can
+ * receive messages and KDB can send data to the new adddress.
+ */
+void netkgdb_in_rx_hook(struct netpoll *np,
+ u8 *h_source,
+ __be32 saddr,
+ struct udphdr *uh,
+ char *data,
+ int len)
+{
+ struct netkgdb_target *nt = container_of(np,
+ struct netkgdb_target,
+ rx_np);
+
+ if (!nt->tx_np.dev)
+ return;
+
+ if (nt->tx_np.remote_ip != saddr ||
+ nt->tx_np.remote_port != ntohs(uh->source) ||
+ memcmp(np->remote_mac, h_source, ETH_ALEN)) {
+
+ /*
+ * This is safe because npinfo->rx_lock is taken
+ * and is shared with tx_np. The npinfo->rx_lock
+ * also means this is safe with any up() or down()
+ * work going on.
+ */
+ nt->tx_np.remote_ip = saddr;
+ nt->tx_np.remote_port = ntohs(uh->source);
+ memcpy(np->remote_mac, h_source, ETH_ALEN);
+ nt->tx_ready = true;
+
+ pr_info("accepted from %pI4:%d\n",
+ &saddr, ntohs(uh->source));
+
+ netkgdb_io_rx_hook(&nt->tx_np, h_source, saddr, uh, data, len);
+ }
+}
+
+/*
+ * Cleans up a netkgdb_target structure.
+ */
+static void netkgdb_nt_free(struct netkgdb_target *nt)
+{
+ nt->tx_ready = false;
+ flush_work(&nt->bp);
+ flush_work(&nt->ping);
+ if (nt->rx_np.dev)
+ netpoll_cleanup(&nt->rx_np);
+ if (nt->tx_np.dev)
+ netpoll_cleanup(&nt->tx_np);
+ kfree(nt);
+}
+
+/*
+ * Creates a new netkgdb_target structure, filling in
+ * defaults. Does not add it to target list or register
+ * with netpoll.
+ */
+static struct netkgdb_target *netkgdb_nt_alloc(char *dev_name)
+{
+ struct netkgdb_target *nt;
+
+ nt = kzalloc(sizeof(*nt), GFP_KERNEL);
+ if (!nt) {
+ pr_err("failed to allocate memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ INIT_WORK(&nt->bp, netkgdb_work_bp);
+ INIT_WORK(&nt->ping, netkgdb_work_ping);
+ nt->tx_np.name = "netkgdb-io";
+ strlcpy(nt->tx_np.dev_name, dev_name, IFNAMSIZ);
+ nt->tx_np.local_port = DEFAULT_PORT;
+ nt->tx_np.remote_port = DEFAULT_PORT;
+ nt->tx_np.rx_hook = netkgdb_io_rx_hook;
+ memset(nt->tx_np.remote_mac, 0xff, ETH_ALEN);
+
+ nt->rx_np.name = "netkgdb-inbound";
+ strlcpy(nt->rx_np.dev_name, dev_name, IFNAMSIZ);
+ nt->rx_np.local_port = DEFAULT_PORT;
+ nt->rx_np.rx_hook = netkgdb_in_rx_hook;
+ memset(nt->rx_np.remote_mac, 0xff, ETH_ALEN);
+ return nt;
+}
+
+/*
+ * Called in response to network interface going down.
+ * __netpoll_cleanup must be used over netpoll_cleanup, due
+ * rtnl_lock being taken by network dev notifiers.
+ */
+static void netkgdb_nt_down(struct netkgdb_target *nt)
+{
+
+ if (nt->tx_np.dev ||
+ nt->rx_np.dev)
+ pr_info("down %s\n", nt->tx_np.dev->name);
+
+ nt->tx_ready = false;
+ if (nt->tx_np.dev) {
+ __netpoll_cleanup(&nt->tx_np);
+ dev_put(nt->tx_np.dev);
+ nt->tx_np.dev = NULL;
+ }
+ if (nt->rx_np.dev) {
+ __netpoll_cleanup(&nt->rx_np);
+ dev_put(nt->rx_np.dev);
+ nt->rx_np.dev = NULL;
+ }
+}
+
+/*
+ * Called in response to network interface going up.
+ * __netpoll_setup must be used over netpoll_setup, due
+ * rtnl_lock being taken by network dev notifiers.
+ */
+static int netkgdb_nt_up(struct netkgdb_target *nt,
+ struct in_ifaddr *ifa)
+{
+ int err;
+
+ nt->tx_np.local_ip = ifa->ifa_local;
+ nt->tx_np.dev = ifa->ifa_dev->dev;
+ dev_hold(nt->tx_np.dev);
+ err = __netpoll_setup(&nt->tx_np);
+ if (err)
+ goto done;
+
+ nt->rx_np.local_ip = ifa->ifa_local;
+ nt->rx_np.dev = ifa->ifa_dev->dev;
+ dev_hold(nt->rx_np.dev);
+ err = __netpoll_setup(&nt->rx_np);
+ if (err) {
+ netpoll_cleanup(&nt->tx_np);
+ goto done;
+ }
+
+ if (nt->tx_np.remote_ip && nt->tx_np.remote_port)
+ nt->tx_ready = true;
+
+done:
+ if (!err)
+ pr_info("up %s (%pI4:%d)\n",
+ ifa->ifa_dev->dev->name,
+ &ifa->ifa_local,
+ nt->tx_np.local_port);
+ else {
+ pr_err("couldn't configure %s (%pI4)\n",
+ ifa->ifa_dev->dev->name,
+ &ifa->ifa_local);
+ if (nt->tx_np.dev) {
+ dev_put(nt->tx_np.dev);
+ nt->tx_np.dev = NULL;
+ }
+ if (nt->rx_np.dev) {
+ dev_put(nt->rx_np.dev);
+ nt->rx_np.dev = NULL;
+ }
+ }
+ return err;
+}
+
+/*
+ * Allocates a netkgdb_target and sets it up
+ * for a new network interface coming up.
+ */
+static void netkgdb_nt_late(struct in_ifaddr *ifa)
+{
+ int err;
+ struct netkgdb_target *nt;
+
+ nt = netkgdb_nt_alloc(ifa->ifa_dev->dev->name);
+ if (IS_ERR(nt))
+ return;
+
+ err = netkgdb_nt_up(nt, ifa);
+
+ if (!err)
+ list_add(&nt->list, &netkgdb_nts);
+
+ if (err)
+ netkgdb_nt_free(nt);
+}
+
+/*
+ * Allocate a target (from boot/module param) and
+ * setup netpoll for it
+ */
+static int netkgdb_nt_initial(char *target_config)
+{
+ int err;
+ struct netkgdb_target *nt;
+
+ nt = netkgdb_nt_alloc("eth0");
+ if (IS_ERR(nt))
+ return PTR_ERR(nt);
+
+ /* Parse parameters and setup netpoll */
+ err = netpoll_parse_options(&nt->tx_np, target_config);
+ if (err)
+ goto done;
+
+ if (nt->tx_np.remote_ip && nt->tx_np.remote_port)
+ nt->tx_ready = true;
+
+ err = netpoll_setup(&nt->tx_np);
+ if (err)
+ goto done;
+
+ /* Synchronize inbound np with I/O np options. */
+ strlcpy(nt->rx_np.dev_name, nt->tx_np.dev_name, IFNAMSIZ);
+ nt->rx_np.local_port = nt->tx_np.local_port;
+ nt->rx_np.local_ip = nt->tx_np.local_ip;
+
+ err = netpoll_setup(&nt->rx_np);
+ if (err)
+ goto done;
+
+ mutex_lock(&netkgdb_nts_lock);
+ list_add(&nt->list, &netkgdb_nts);
+ mutex_unlock(&netkgdb_nts_lock);
+
+done:
+ if (err)
+ netkgdb_nt_free(nt);
+ return err;
+}
+
+/*
+ * Handle network address notifications.
+ */
+static int netkgdb_inetaddr_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netkgdb_target *nt;
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ bool found = false;
+
+ mutex_lock(&netkgdb_nts_lock);
+ switch (event) {
+ case NETDEV_UP:
+ list_for_each_entry(nt, &netkgdb_nts, list) {
+ if (!strcmp(nt->tx_np.dev_name, dev->name)) {
+ found = true;
+ netkgdb_nt_down(nt);
+ netkgdb_nt_up(nt, ifa);
+ break;
+ }
+ }
+ if (!found)
+ netkgdb_nt_late(ifa);
+ break;
+ case NETDEV_DOWN:
+ list_for_each_entry(nt, &netkgdb_nts, list) {
+ if (nt->tx_np.dev == dev)
+ netkgdb_nt_down(nt);
+ }
+ break;
+ }
+ mutex_unlock(&netkgdb_nts_lock);
+
+ return NOTIFY_DONE;
+}
+
+/*
+ * Handle network interface device notifications.
+ */
+static int netkgdb_netdev_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct netkgdb_target *nt;
+ struct net_device *dev = ptr;
+
+ if (!(event == NETDEV_CHANGENAME ||
+ event == NETDEV_UNREGISTER ||
+ event == NETDEV_RELEASE ||
+ event == NETDEV_JOIN))
+ return NOTIFY_DONE;
+
+ mutex_lock(&netkgdb_nts_lock);
+ list_for_each_entry(nt, &netkgdb_nts, list) {
+ if (nt->tx_np.dev == dev) {
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ strlcpy(nt->tx_np.dev_name, dev->name, IFNAMSIZ);
+ strlcpy(nt->rx_np.dev_name, dev->name, IFNAMSIZ);
+ break;
+ case NETDEV_RELEASE:
+ case NETDEV_JOIN:
+ case NETDEV_UNREGISTER:
+ /*
+ * rtnl_lock already held
+ */
+ netkgdb_nt_down(nt);
+ break;
+ }
+ }
+ }
+ mutex_unlock(&netkgdb_nts_lock);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block netkgdb_netdev_notifier = {
+ .notifier_call = netkgdb_netdev_event,
+};
+
+static struct notifier_block netkgdb_inetaddr_notifier = {
+ .notifier_call = netkgdb_inetaddr_event,
+};
+
+static int __init netkgdb_init(void)
+{
+ int err;
+ struct netkgdb_target *nt, *tmp;
+ char *input = config;
+
+ /*
+ * Not having a netkgdb= parameter itself is not
+ * a mistake, asn netkgdb_nt_late will take care
+ * of things. However, passing a bad parameter
+ * is a mistake (like not having a local IP address).
+ */
+ if (strnlen(input, MAX_PARAM_LENGTH)) {
+ err = netkgdb_nt_initial(input);
+ if (err)
+ goto fail;
+ }
+
+ err = register_netdevice_notifier(&netkgdb_netdev_notifier);
+ if (err)
+ goto fail;
+
+ err = register_inetaddr_notifier(&netkgdb_inetaddr_notifier);
+ if (err)
+ goto undo_netdev;
+
+ err = kgdb_register_io_module(&netkgdb_io_ops);
+ if (err) {
+ pr_err("failed to register I/O ops\n");
+ goto undo_inetaddr;
+ }
+
+ pr_info("started\n");
+ return err;
+
+undo_inetaddr:
+ unregister_inetaddr_notifier(&netkgdb_inetaddr_notifier);
+
+undo_netdev:
+ unregister_netdevice_notifier(&netkgdb_netdev_notifier);
+
+fail:
+ pr_err("cleaning up\n");
+ list_for_each_entry_safe(nt, tmp, &netkgdb_nts, list) {
+ list_del(&nt->list);
+ netkgdb_nt_free(nt);
+ }
+
+ return err;
+}
+
+static void __exit netkgdb_cleanup(void)
+{
+ struct netkgdb_target *nt, *tmp;
+
+ kgdb_register_io_module(&netkgdb_io_ops);
+ unregister_netdevice_notifier(&netkgdb_netdev_notifier);
+
+ list_for_each_entry_safe(nt, tmp, &netkgdb_nts, list) {
+ list_del(&nt->list);
+ netkgdb_nt_free(nt);
+ }
+}
+
+/*
+ * Use late_initcall to ensure netkgdb is
+ * initialized after network device driver if built-in.
+ *
+ * late_initcall() and module_init() are identical if built as module.
+ */
+late_initcall(netkgdb_init);
+module_exit(netkgdb_cleanup);
+
+#ifndef MODULE
+static int __init option_setup(char *opt)
+{
+ strlcpy(config, opt, MAX_PARAM_LENGTH);
+ return 1;
+}
+__setup("netkgdb=", option_setup);
+#endif /* MODULE */
--
1.7.8.3
It's pretty pointless to post two difference versions merely an hour
apart.
I've personally written off reviewing your work until things settle
down a bit.
2012/2/26 David Miller <[email protected]>:
>
> It's pretty pointless to post two difference versions merely an hour
> apart.
>
> I've personally written off reviewing your work until things settle
> down a bit.
Hi David,
Sorry about the spam! I swear I'm done... I just didn't feel like
getting flamed for something I missed out on in v2 and fixed (namely,
flush() callback for gdbstub), but looks like I got flamed nonetheless
;-).
Take care,
A
On Sun, 26 Feb 2012 22:30:10 EST, Andrei Warkentin said:
> +Note: the parameter is optional and largely unneeded unless you
> +are running a listen server - netkgdb will accept connection from any
> +IP on all interfaces and will reconfigure itself appropriately if
This *really* needs a discussion of the security implications of this. Do
you *really* want to have a kgdb that will accept connections from *anywhere*?
Sounds like an insta-root waiting to happen.
> +the assigned interface IP address changes. This makes it useful
> +in an environment where it's not known ahead of time what computer
> +will connect to perform the crash analysis.
Exactly. You don't know ahead of time who's going to connect. That's the
problem...
Hi,
----- Original Message -----
> From: "Valdis Kletnieks" <[email protected]>
> To: "Andrei Warkentin" <[email protected]>
> Cc: [email protected], [email protected], "Andrei Warkentin" <[email protected]>,
> [email protected], "Jason Wessel" <[email protected]>, "Matt Mackall" <[email protected]>
> Sent: Monday, February 27, 2012 12:29:03 AM
> Subject: Re: [PATCHv3 2/3] NETKGDB: Ethernet/UDP/IP KDB transport.
>
> This *really* needs a discussion of the security implications of
> this. Do
> you *really* want to have a kgdb that will accept connections from
> *anywhere*?
> Sounds like an insta-root waiting to happen.
>
It does *really* need documentation. It's not meant for general purpose use.
Just like Android devices come with KDB compiled out, this shouldn't be
built in. That means that netkgdb should be default off and come with a
huge warning. It is insta-root. And that's the message it should carry with it.
It's no different from running kgdboc - anyone has root access if they press
Alt-PrtScrn-g, right?
> > +the assigned interface IP address changes. This makes it useful
> > +in an environment where it's not known ahead of time what computer
> > +will connect to perform the crash analysis.
>
> Exactly. You don't know ahead of time who's going to connect. That's
> the
> problem...
It's not a problem unless you make it one. If I am running a private test farm
where I provision 250+ machines or VMs, and I have 10-15 devs that could possibly
investigate the issue, it's not realistic to know ahead of times what internal
IP address the connection could come from.
I will update the docs and the Kconfig (default=off). That should be
sufficient for PEBCAK failures.
Cheers,
A
On 02/26/2012 09:30 PM, Andrei Warkentin wrote:
> From: Andrei Warkentin <[email protected]>
>
> Pass down source information to rx_hook, useful
> for accepting connections from unspecified clients.
>
> Cc: [email protected]
> Cc: Jason Wessel <[email protected]>
> Cc: Matt Mackall <[email protected]>
> Signed-off-by: Andrei Warkentin <[email protected]>
> Signed-off-by: Andrei Warkentin <[email protected]>
> ---
> include/linux/netpoll.h | 10 +++++++++-
> net/core/netpoll.c | 10 ++++------
> 2 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
> index 5dfa091..9a9cfa1 100644
> --- a/include/linux/netpoll.h
> +++ b/include/linux/netpoll.h
> @@ -11,12 +11,19 @@
> #include <linux/interrupt.h>
> #include <linux/rcupdate.h>
> #include <linux/list.h>
> +#include <linux/if_ether.h>
> +#include <net/tcp.h>
> +#include <net/udp.h>
>
> struct netpoll {
> struct net_device *dev;
> char dev_name[IFNAMSIZ];
> const char *name;
> - void (*rx_hook)(struct netpoll *, int, char *, int);
> + void (*rx_hook)(struct netpoll *,
> + u8 *h_source,
> + __be32 saddr,
> + struct udphdr *,
> + char *, int);
I am just now starting to look how this patch set compares to kgdboe. For the kgdboe the patch is a bit different. The kgdboe opted to just pass the skb so as to cut down on the number of arguments to the function call.
>From the kgdboe patch:
- void (*rx_hook)(struct netpoll *, int, char *, int);
+ void (*rx_hook)(struct netpoll *, int, char *, int, struct sk_buff *);
>
> __be32 local_ip, remote_ip;
> u16 local_port, remote_port;
> @@ -40,6 +47,7 @@ struct netpoll_info {
> struct netpoll *netpoll;
> };
>
> +void netpoll_poll_dev(struct net_device *dev);
> void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
> void netpoll_print_options(struct netpoll *np);
> int netpoll_parse_options(struct netpoll *np, char *opt);
> diff --git a/net/core/netpoll.c b/net/core/netpoll.c
> index 3d84fb9..c182bb2 100644
> --- a/net/core/netpoll.c
> +++ b/net/core/netpoll.c
> @@ -26,8 +26,6 @@
> #include <linux/workqueue.h>
> #include <linux/slab.h>
> #include <linux/export.h>
> -#include <net/tcp.h>
> -#include <net/udp.h>
> #include <asm/unaligned.h>
> #include <trace/events/napi.h>
>
> @@ -189,7 +187,7 @@ static void service_arp_queue(struct netpoll_info *npi)
> }
> }
>
> -static void netpoll_poll_dev(struct net_device *dev)
> +void netpoll_poll_dev(struct net_device *dev)
This is interesting and I will have to look into this further... A large part of the reason kgdboe never went anywhere was all around the locking problems the ability to safely use the network hardware and restore the state when it was done. It appears you made this change so as to make a lockless call directly instead of going through netpoll_poll(). I am not entirely sure you could safely do this.
In kgdboe we always had:
+static int eth_get_char(void)
+{
+ int chr;
+
+ while (atomic_read(&in_count) == 0)
+ netpoll_poll(&np);
If it is the case that you really can safely call the netpoll_poll_dev() without the locks then the horrible sync irq state etc... could go away in kgdboe, and then it would be worth considering digging up all the ethernet polling errata fixes that live of out the mainline and perhaps submit some for review.
Jason.
Hi,
Thank you for the review, Jason. Comments inline.
----- Original Message -----
> From: "Jason Wessel" <[email protected]>
> To: "Andrei Warkentin" <[email protected]>
> Cc: [email protected], [email protected], "Andrei Warkentin" <[email protected]>,
> [email protected], "Matt Mackall" <[email protected]>
> Sent: Monday, February 27, 2012 6:17:12 PM
> Subject: Re: [PATCHv3 1/3] NETPOLL: Extend rx_hook support.
>
>
> I am just now starting to look how this patch set compares to kgdboe.
> For the kgdboe the patch is a bit different. The kgdboe opted to
> just pass the skb so as to cut down on the number of arguments to
> the function call.
>
> From the kgdboe patch:
>
> - void (*rx_hook)(struct netpoll *, int, char *, int);
> + void (*rx_hook)(struct netpoll *, int, char *, int, struct
> sk_buff *);
>
>
Interesting, I thought about passing the skb, but decided I didn't
want to copy and paste the skb parsing code, especially given
that it's always UDP anyway. I still have reservations about
passing the physical address, but I don't think anyone tried
to use netpoll or a non-ethernet device anyway.
> >
> > +void netpoll_poll_dev(struct net_device *dev);
> > void netpoll_send_udp(struct netpoll *np, const char *msg, int
> >
> > -static void netpoll_poll_dev(struct net_device *dev)
> > +void netpoll_poll_dev(struct net_device *dev)
>
>
> This is interesting and I will have to look into this further... A
> large part of the reason kgdboe never went anywhere was all around
> the locking problems the ability to safely use the network hardware
> and restore the state when it was done. It appears you made this
> change so as to make a lockless call directly instead of going
> through netpoll_poll(). I am not entirely sure you could safely do
> this.
>
> In kgdboe we always had:
>
> +static int eth_get_char(void)
> +{
> + int chr;
> +
> + while (atomic_read(&in_count) == 0)
> + netpoll_poll(&np);
>
>
> If it is the case that you really can safely call the
> netpoll_poll_dev() without the locks then the horrible sync irq
> state etc... could go away in kgdboe, and then it would be worth
> considering digging up all the ethernet polling errata fixes that
> live of out the mainline and perhaps submit some for review.
>
I didn't look deeply at kgdboe (probably should have...). Anyway, netpoll_poll
doesn't seem to exist. netpoll_poll_dev is called from netpoll_send_skb_on_dev
and the only contract I see is running with the interrupts disabled - something
that is satisfied by running in the context of KDB.
This is slight OT, but...are WiFi drivers sufficiently similar that netpoll "just works?"
Thanks again.
A
On Sun, 26 Feb 2012 22:30:08 -0500
Andrei Warkentin <[email protected]> wrote:
> netkgdb is designed to accept connections
> from any hosts, i.e. it is not necessary to specify these ahead
> of time. This makes it that much more useful in a "this host crashed
> and a I want a developer to take a look at it" scenario, common to
> large scale test/QA farms and automated testing harnesses.
And opens a security hole so wide that no production network
or distro should ever enable it!
On 02/27/2012 05:33 PM, Andrei Warkentin wrote:
>> From: "Jason Wessel" <[email protected]>
>> To: "Andrei Warkentin" <[email protected]>
>> Cc: [email protected], [email protected], "Andrei Warkentin" <[email protected]>,
>> [email protected], "Matt Mackall" <[email protected]>
>> Sent: Monday, February 27, 2012 6:17:12 PM
>> Subject: Re: [PATCHv3 1/3] NETPOLL: Extend rx_hook support.
>>
>>
>> I am just now starting to look how this patch set compares to kgdboe.
>> For the kgdboe the patch is a bit different. The kgdboe opted to
>> just pass the skb so as to cut down on the number of arguments to
>> the function call.
>>
>> From the kgdboe patch:
>>
>> - void (*rx_hook)(struct netpoll *, int, char *, int);
>> + void (*rx_hook)(struct netpoll *, int, char *, int, struct
>> sk_buff *);
>>
>>
>
> Interesting, I thought about passing the skb, but decided I didn't
> want to copy and paste the skb parsing code, especially given
> that it's always UDP anyway. I still have reservations about
> passing the physical address, but I don't think anyone tried
> to use netpoll or a non-ethernet device anyway.
With some input from the netdev folks we should decide what makes the most sense and stick with that implementation. The previous kgdboe change to just pass the skb was signed off long ago as something ok.
>> + while (atomic_read(&in_count) == 0)
>> + netpoll_poll(&np);
>>
>>
>> If it is the case that you really can safely call the
>> netpoll_poll_dev() without the locks then the horrible sync irq
>> state etc... could go away in kgdboe, and then it would be worth
>> considering digging up all the ethernet polling errata fixes that
>> live of out the mainline and perhaps submit some for review.
>>
The code has changed since the last time that kgdboe actually worked.
The v3.1 commit 234b921dbcf killed off the netpoll_poll() function
as well as the exports needed to allow kgdboe to work as a kernel
module. The patch you created here also needs to once again export
the netpoll_poll_dev() so that we can call it from a kernel module
because kgdboe or the ethernet kdb patch you submitted should be able
to be used a loadable module as well as a builtin.
>
> I didn't look deeply at kgdboe (probably should have...). Anyway, netpoll_poll
> doesn't seem to exist. netpoll_poll_dev is called from netpoll_send_skb_on_dev
> and the only contract I see is running with the interrupts disabled - something
> that is satisfied by running in the context of KDB.
All that netpoll_poll() did was to call netpoll_poll_dev(). I have
not yet looked at the differences between kgdboe and the netkdb code
you proposed but I would have suspected it also falls victim to the
ethernet preemption problem which prevented kgdboe from ever being
considered for a mainline merge. Certainly there are ways to fix this
problem but most involved changes to scheduling, core net code, or
substantial driver specific changes.
I almost have kgdboe working against the 3.3 kernel so we can have
both a code and functional comparison. I would like to do some house
cleaning and merging of all the other out of tree patches for things
like kgdb over usb as well as kdb usb keyboard, so we can see if any
of it makes sense to submit to the mainline.
>
> This is slight OT, but...are WiFi drivers sufficiently similar that netpoll "just works?"
Yes and no. Yes, you could use the NET_POLL api to transmit packets
if the driver implemented polling hooks, but no in the sense that most
of the time you need a user space driver to manage the keys which are
time sensitive for things like WPA (usually the job of wpa
supplicant). This is going to prevent you from having WiFi work at
all while the kernel is in the critical exception state.
Jason.
Hi,
----- Original Message -----
> From: "Jason Wessel" <[email protected]>
> To: "Andrei Warkentin" <[email protected]>
> Cc: [email protected], [email protected], "Andrei Warkentin" <[email protected]>,
> [email protected], "Matt Mackall" <[email protected]>, "Andrei Warkentin"
> <[email protected]>
> Sent: Tuesday, February 28, 2012 11:06:44 AM
> Subject: Re: [PATCHv3 1/3] NETPOLL: Extend rx_hook support.
>
>
> All that netpoll_poll() did was to call netpoll_poll_dev(). I have
> not yet looked at the differences between kgdboe and the netkdb code
> you proposed but I would have suspected it also falls victim to the
> ethernet preemption problem which prevented kgdboe from ever being
> considered for a mainline merge. Certainly there are ways to fix
> this
> problem but most involved changes to scheduling, core net code, or
> substantial driver specific changes.
>
I see, I read up on the issues w.r.t. preemption. Could this be worked
around by modifiying affected drivers to bypass locking if they are
used in KDB context? Make some accessor netdev-specific lock/unlocks
that won't do anything if running in KDB context.
>
> >
> > This is slight OT, but...are WiFi drivers sufficiently similar that
> > netpoll "just works?"
>
> Yes and no. Yes, you could use the NET_POLL api to transmit packets
> if the driver implemented polling hooks, but no in the sense that
> most
> of the time you need a user space driver to manage the keys which are
> time sensitive for things like WPA (usually the job of wpa
> supplicant). This is going to prevent you from having WiFi work at
> all while the kernel is in the critical exception state.
>
Ah right. I was thinking just open networking. I should give that a try...
A