Linus, please pull the kgdb git tree for 2.6.33
git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/linux-2.6-kgdb.git for_linus
Summary:
The patches in this series have been in linux-next and thoroughly
tested over the last two kernel cycles. There are no new features
here, everything is just a fix against a problem found in the use of
kgdb.
The perf event API merge has caused kgdb's HW breakpoint support to
cease working. This makes the kgdb test suite fail and hang the
kernel in most cases (you would see this with a allyesconfig). I have
an experimental patch which I will work with the perf event
maintainers to repair the kgdb hw breakpoint support during the
2.6.33 cycle.
Thanks,
Jason.
Short log follows:
---
Geert Uytterhoeven (1):
kgdb: Replace strstr() by strchr() for single-character needles
Jason Wessel (6):
kgdb: Read buffer overflow
kgdb,i386: Fix corner case access to ss with NMI watch dog exception
kgdb: allow for cpu switch when single stepping
kgdb,x86: do not set kgdb_single_step on x86
kgdb: continue and warn on signal passing from gdb
kgdb: Always process the whole breakpoint list on activate or deactivate
Roel Kluin (2):
kgdb,x86: remove redundant test
kgdbts: Read buffer overflow
arch/x86/kernel/kgdb.c | 14 +++++++----
drivers/misc/kgdbts.c | 14 ++++++++---
kernel/kgdb.c | 56 ++++++++++++++++++++++++++++++++++-------------
3 files changed, 59 insertions(+), 25 deletions(-)
From: Roel Kluin <[email protected]>
The for loop starts with a breakno of 0, and ends when it's 4. so
this test is always true.
CC: Ingo Molnar <[email protected]>
Signed-off-by: Roel Kluin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Jason Wessel <[email protected]>
---
arch/x86/kernel/kgdb.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 20a5b36..f93d015 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -220,8 +220,7 @@ static void kgdb_correct_hw_break(void)
dr7 |= ((breakinfo[breakno].len << 2) |
breakinfo[breakno].type) <<
((breakno << 2) + 16);
- if (breakno >= 0 && breakno <= 3)
- set_debugreg(breakinfo[breakno].addr, breakno);
+ set_debugreg(breakinfo[breakno].addr, breakno);
} else {
if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
--
1.6.4.rc1
Roel Kluin reported an error found with Parfait. Where we want to
ensure that that kgdb_info[-1] never gets accessed.
Also check to ensure any negative tid does not exceed the size of the
shadow CPU array, else report critical debug context because it is an
internal kgdb failure.
Reported-by: Roel Kluin <[email protected]>
Signed-off-by: Jason Wessel <[email protected]>
---
kernel/kgdb.c | 7 ++++++-
1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 7d70146..29357a9 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -541,12 +541,17 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
*/
if (tid == 0 || tid == -1)
tid = -atomic_read(&kgdb_active) - 2;
- if (tid < 0) {
+ if (tid < -1 && tid > -NR_CPUS - 2) {
if (kgdb_info[-tid - 2].task)
return kgdb_info[-tid - 2].task;
else
return idle_task(-tid - 2);
}
+ if (tid <= 0) {
+ printk(KERN_ERR "KGDB: Internal thread select error\n");
+ dump_stack();
+ return NULL;
+ }
/*
* find_task_by_pid_ns() does not take the tasklist lock anymore
--
1.6.4.rc1
From: Roel Kluin <[email protected]>
Prevent write to put_buf[BUFMAX] in kgdb test suite.
If put_buf_cnt was BUFMAX - 1 at the earlier test,
`\0' is written to put_buf[BUFMAX].
Signed-off-by: Roel Kluin <[email protected]>
Signed-off-by: Jason Wessel <[email protected]>
---
drivers/misc/kgdbts.c | 6 ++++++
1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index e4ff50b..2ab0492 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -712,6 +712,12 @@ static int run_simple_test(int is_get_char, int chr)
/* End of packet == #XX so look for the '#' */
if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') {
+ if (put_buf_cnt >= BUFMAX) {
+ eprintk("kgdbts: ERROR: put buffer overflow on"
+ " '%s' line %i\n", ts.name, ts.idx);
+ put_buf_cnt = 0;
+ return 0;
+ }
put_buf[put_buf_cnt] = '\0';
v2printk("put%i: %s\n", ts.idx, put_buf);
/* Trigger check here */
--
1.6.4.rc1
From: Geert Uytterhoeven <[email protected]>
Some versions of gcc replace calls to strstr() with single-character
"needle" string parameters by calls to strchr() behind our back.
This causes linking errors if strchr() is defined as an inline function
in <asm/string.h> (e.g. on m68k, which BTW doesn't have kgdb support).
Prevent this by explicitly calling strchr() instead.
Signed-off-by: Geert Uytterhoeven <[email protected]>
Signed-off-by: Jason Wessel <[email protected]>
---
drivers/misc/kgdbts.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 2ab0492..fcb6ec1 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -891,16 +891,16 @@ static void kgdbts_run_tests(void)
int nmi_sleep = 0;
int i;
- ptr = strstr(config, "F");
+ ptr = strchr(config, 'F');
if (ptr)
fork_test = simple_strtol(ptr + 1, NULL, 10);
- ptr = strstr(config, "S");
+ ptr = strchr(config, 'S');
if (ptr)
do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
- ptr = strstr(config, "N");
+ ptr = strchr(config, 'N');
if (ptr)
nmi_sleep = simple_strtol(ptr+1, NULL, 10);
- ptr = strstr(config, "I");
+ ptr = strchr(config, 'I');
if (ptr)
sstep_test = simple_strtol(ptr+1, NULL, 10);
--
1.6.4.rc1
It is possible for the user_mode_vm(regs) check to return true on the
i368 arch for a non master kgdb cpu or when the master kgdb cpu
handles the NMI watch dog exception.
The solution is simply to select the correct gdb_ss location
based on the check to user_mode_vm(regs).
CC: Ingo Molnar <[email protected]>
Acked-by: H. Peter Anvin <[email protected]>
Signed-off-by: Jason Wessel <[email protected]>
---
arch/x86/kernel/kgdb.c | 10 ++++++++--
1 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f93d015..aefae46 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -86,9 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_DS] = regs->ds;
gdb_regs[GDB_ES] = regs->es;
gdb_regs[GDB_CS] = regs->cs;
- gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
+ if (user_mode_vm(regs)) {
+ gdb_regs[GDB_SS] = regs->ss;
+ gdb_regs[GDB_SP] = regs->sp;
+ } else {
+ gdb_regs[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
+ }
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -101,8 +107,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
-#endif
gdb_regs[GDB_SP] = kernel_stack_pointer(regs);
+#endif
}
/**
--
1.6.4.rc1
The kgdb core should not assume that a single step operation of a
kernel thread will complete on the same CPU. The single step flag is
set at the "thread" level and it is possible in a multi cpu system
that a kernel thread can get scheduled on another cpu the next time it
is run.
As a further safety net in case a slave cpu is hung, the debug master
cpu will try 100 times before giving up and assuming control of the
slave cpus is no longer possible. It is more useful to be able to get
some information out of kgdb instead of spinning forever.
Signed-off-by: Jason Wessel <[email protected]>
---
kernel/kgdb.c | 20 +++++++++++++++-----
1 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 29357a9..ca21fe9 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -129,6 +129,7 @@ struct task_struct *kgdb_usethread;
struct task_struct *kgdb_contthread;
int kgdb_single_step;
+pid_t kgdb_sstep_pid;
/* Our I/O buffers. */
static char remcom_in_buffer[BUFMAX];
@@ -1400,6 +1401,7 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
struct kgdb_state kgdb_var;
struct kgdb_state *ks = &kgdb_var;
unsigned long flags;
+ int sstep_tries = 100;
int error = 0;
int i, cpu;
@@ -1430,13 +1432,14 @@ acquirelock:
cpu_relax();
/*
- * Do not start the debugger connection on this CPU if the last
- * instance of the exception handler wanted to come into the
- * debugger on a different CPU via a single step
+ * For single stepping, try to only enter on the processor
+ * that was single stepping. To gaurd against a deadlock, the
+ * kernel will only try for the value of sstep_tries before
+ * giving up and continuing on.
*/
if (atomic_read(&kgdb_cpu_doing_single_step) != -1 &&
- atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
-
+ (kgdb_info[cpu].task &&
+ kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) {
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog();
clocksource_touch_watchdog();
@@ -1529,6 +1532,13 @@ acquirelock:
}
kgdb_restore:
+ if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
+ int sstep_cpu = atomic_read(&kgdb_cpu_doing_single_step);
+ if (kgdb_info[sstep_cpu].task)
+ kgdb_sstep_pid = kgdb_info[sstep_cpu].task->pid;
+ else
+ kgdb_sstep_pid = 0;
+ }
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
touch_softlockup_watchdog();
--
1.6.4.rc1
On an SMP system the kgdb_single_step flag has the possibility to
indefinitely hang the system in the case. Consider the case where,
CPU 1 has the schedule lock and CPU 0 is set to single step, there is
no way for CPU 0 to run another task.
The easy way to observe the problem is to make 2 cpus busy, and run
the kgdb test suite. You will see that it hangs the system very
quickly.
while [ 1 ] ; do find /proc > /dev/null 2>&1 ; done &
while [ 1 ] ; do find /proc > /dev/null 2>&1 ; done &
echo V1 > /sys/module/kgdbts/parameters/kgdbts
The side effect of this patch is that there is the possibility
to miss a breakpoint in the case that a single step operation
was executed to step over a breakpoint in common code.
The trade off of the missed breakpoint is preferred to
hanging the kernel. This can be fixed in the future by
using kprobes or another strategy to step over planted
breakpoints with out of line execution.
CC: Ingo Molnar <[email protected]>
Signed-off-by: Jason Wessel <[email protected]>
---
arch/x86/kernel/kgdb.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index aefae46..dd74fe7 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -400,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
/* set the trace bit if we're stepping */
if (remcomInBuffer[0] == 's') {
linux_regs->flags |= X86_EFLAGS_TF;
- kgdb_single_step = 1;
atomic_set(&kgdb_cpu_doing_single_step,
raw_smp_processor_id());
}
--
1.6.4.rc1
On some architectures for the segv trap, gdb wants to pass the signal
back on continue. For kgdb this is not the default behavior, because
it can cause the kernel to crash if you arbitrarily pass back a
exception outside of kgdb.
Instead of causing instability, pass a message back to gdb about the
supported kgdb signal passing and execute a standard kgdb continue
operation.
Signed-off-by: Jason Wessel <[email protected]>
---
kernel/kgdb.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index ca21fe9..8584eac 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1210,8 +1210,10 @@ static int gdb_cmd_exception_pass(struct kgdb_state *ks)
return 1;
} else {
- error_packet(remcom_out_buffer, -EINVAL);
- return 0;
+ kgdb_msg_write("KGDB only knows signal 9 (pass)"
+ " and 15 (pass and disconnect)\n"
+ "Executing a continue without signal passing\n", 0);
+ remcom_in_buffer[0] = 'c';
}
/* Indicate fall through */
--
1.6.4.rc1
This patch fixes 2 edge cases in using kgdb in conjunction with gdb.
1) kgdb_deactivate_sw_breakpoints() should process the entire array of
breakpoints. The failure to do so results in breakpoints that you
cannot remove, because a break point can only be removed if its
state flag is set to BP_SET.
The easy way to duplicate this problem is to plant a break point in
a kernel module and then unload the kernel module.
2) kgdb_activate_sw_breakpoints() should process the entire array of
breakpoints. The failure to do so results in missed breakpoints
when a breakpoint cannot be activated.
Signed-off-by: Jason Wessel <[email protected]>
---
kernel/kgdb.c | 23 +++++++++++++++--------
1 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 8584eac..2eb517e 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -625,7 +625,8 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
static int kgdb_activate_sw_breakpoints(void)
{
unsigned long addr;
- int error = 0;
+ int error;
+ int ret = 0;
int i;
for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -635,13 +636,16 @@ static int kgdb_activate_sw_breakpoints(void)
addr = kgdb_break[i].bpt_addr;
error = kgdb_arch_set_breakpoint(addr,
kgdb_break[i].saved_instr);
- if (error)
- return error;
+ if (error) {
+ ret = error;
+ printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
+ continue;
+ }
kgdb_flush_swbreak_addr(addr);
kgdb_break[i].state = BP_ACTIVE;
}
- return 0;
+ return ret;
}
static int kgdb_set_sw_break(unsigned long addr)
@@ -688,7 +692,8 @@ static int kgdb_set_sw_break(unsigned long addr)
static int kgdb_deactivate_sw_breakpoints(void)
{
unsigned long addr;
- int error = 0;
+ int error;
+ int ret = 0;
int i;
for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
@@ -697,13 +702,15 @@ static int kgdb_deactivate_sw_breakpoints(void)
addr = kgdb_break[i].bpt_addr;
error = kgdb_arch_remove_breakpoint(addr,
kgdb_break[i].saved_instr);
- if (error)
- return error;
+ if (error) {
+ printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
+ ret = error;
+ }
kgdb_flush_swbreak_addr(addr);
kgdb_break[i].state = BP_SET;
}
- return 0;
+ return ret;
}
static int kgdb_remove_sw_break(unsigned long addr)
--
1.6.4.rc1