2004-10-18 08:41:56

by S. P. Prasanna

[permalink] [raw]
Subject: [0/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1

Hi,

Often kernel developer try to moniter global variables.
These patches provide a simple interface for monitering global variables.
Please see the comments in the watchpoint patch for details.
These patches can be applied over 2.6.9-rc4-mm1.

Please provide your comments.

Thanks
Prasanna
--

Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<[email protected]>


2004-10-18 08:46:39

by S. P. Prasanna

[permalink] [raw]
Subject: Re: [1/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1

This patch provides global debug register settings.
Used by kernel watchpoint interface patch.
---
Signed-off-by: Prasanna S Panchamukhi <[email protected]>


---

linux-2.6.9-rc4-prasanna/arch/i386/Kconfig.debug | 7
linux-2.6.9-rc4-prasanna/arch/i386/kernel/Makefile | 1
linux-2.6.9-rc4-prasanna/arch/i386/kernel/debugreg.c | 178 +++++++++++++++++++
linux-2.6.9-rc4-prasanna/arch/i386/kernel/process.c | 28 ++
linux-2.6.9-rc4-prasanna/arch/i386/kernel/ptrace.c | 5
linux-2.6.9-rc4-prasanna/arch/i386/kernel/signal.c | 3
linux-2.6.9-rc4-prasanna/arch/i386/kernel/traps.c | 4
linux-2.6.9-rc4-prasanna/include/asm-i386/debugreg.h | 162 +++++++++++++++++
8 files changed, 383 insertions(+), 5 deletions(-)

diff -puN arch/i386/Kconfig.debug~kprobes-debug-regs-2.6.9-rc4-mm1 arch/i386/Kconfig.debug
--- linux-2.6.9-rc4/arch/i386/Kconfig.debug~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/Kconfig.debug 2004-10-18 13:49:28.000000000 +0530
@@ -29,6 +29,13 @@ config KPROBES
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".

+config DEBUGREG
+ bool "Global Debug Registers"
+ depends on DEBUG_KERNEL
+ help
+ Global debug register settings will be honoured if this is turned on.
+ If in doubt, say "N".
+
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
diff -puN /dev/null arch/i386/kernel/debugreg.c
--- /dev/null 2003-01-30 15:54:37.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/debugreg.c 2004-10-18 13:49:18.000000000 +0530
@@ -0,0 +1,178 @@
+/*
+ * This provides a debug register allocation mechanism, to be
+ * used by all debuggers, which need debug registers.
+ *
+ * Author: [email protected]
+ * [email protected]
+ */
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <asm/debugreg.h>
+
+struct debugreg dr_list[DR_MAX];
+unsigned long dr7_global_mask = 0;
+static spinlock_t dr_lock = SPIN_LOCK_UNLOCKED;
+
+static inline void set_dr7_global_mask(int regnum)
+{
+ switch (regnum) {
+ case 0: dr7_global_mask |= DR7_DR0_BITS; break;
+ case 1: dr7_global_mask |= DR7_DR1_BITS; break;
+ case 2: dr7_global_mask |= DR7_DR2_BITS; break;
+ case 3: dr7_global_mask |= DR7_DR3_BITS; break;
+ }
+ return;
+}
+
+static inline void clear_dr7_global_mask(int regnum)
+{
+ switch (regnum) {
+ case 0: dr7_global_mask &= ~DR7_DR0_BITS; break;
+ case 1: dr7_global_mask &= ~DR7_DR1_BITS; break;
+ case 2: dr7_global_mask &= ~DR7_DR2_BITS; break;
+ case 3: dr7_global_mask &= ~DR7_DR3_BITS; break;
+ }
+ return;
+}
+
+static int get_dr(int regnum, int flag)
+{
+ if ((flag == DR_ALLOC_GLOBAL) && (dr_list[regnum].flag == DR_UNUSED)) {
+ dr_list[regnum].flag = DR_GLOBAL;
+ set_dr7_global_mask(regnum);
+ return regnum;
+ }
+ else if ((dr_list[regnum].flag == DR_UNUSED) || (dr_list[regnum].flag == DR_LOCAL)) {
+ dr_list[regnum].use_count++;
+ dr_list[regnum].flag = DR_LOCAL;
+ return regnum;
+ }
+ return -1;
+}
+
+static int get_any_dr(int flag)
+{
+ int i;
+ if (flag == DR_ALLOC_LOCAL) {
+ for (i = 0; i < DR_MAX; i++) {
+ if (dr_list[i].flag == DR_LOCAL) {
+ dr_list[i].use_count++;
+ return i;
+ } else if (dr_list[i].flag == DR_UNUSED) {
+ dr_list[i].flag = DR_LOCAL;
+ dr_list[i].use_count = 1;
+ return i;
+ }
+ }
+ } else {
+ for (i = DR_MAX-1; i >= 0; i--) {
+ if (dr_list[i].flag == DR_UNUSED) {
+ dr_list[i].flag = DR_GLOBAL;
+ set_dr7_global_mask(i);
+ return i;
+ }
+ }
+ }
+ return -1;
+}
+
+static inline void dr_free_local(int regnum)
+{
+ if (! (--dr_list[regnum].use_count))
+ dr_list[regnum].flag = DR_UNUSED;
+ return;
+}
+
+static inline void dr_free_global(int regnum)
+{
+ dr_list[regnum].flag = DR_UNUSED;
+ dr_list[regnum].use_count = 0;
+ clear_dr7_global_mask(regnum);
+ return;
+}
+
+int dr_alloc(int regnum, int flag)
+{
+ int ret;
+
+ spin_lock(&dr_lock);
+ if (regnum == DR_ANY)
+ ret = get_any_dr(flag);
+ else if (regnum >= DR_MAX)
+ ret = -1;
+ else
+ ret = get_dr(regnum, flag);
+ spin_unlock(&dr_lock);
+ return ret;
+}
+
+int dr_free(int regnum)
+{
+ spin_lock(&dr_lock);
+ if (regnum >= DR_MAX || dr_list[regnum].flag == DR_UNUSED) {
+ spin_unlock(&dr_lock);
+ return -1;
+ }
+ if (dr_list[regnum].flag == DR_LOCAL)
+ dr_free_local(regnum);
+ else
+ dr_free_global(regnum);
+ spin_unlock(&dr_lock);
+ return 0;
+}
+
+void dr_inc_use_count(unsigned long mask)
+{
+ int i;
+
+ spin_lock(&dr_lock);
+ for (i =0; i < DR_MAX; i++) {
+ if (DR_IS_LOCAL(mask, i))
+ dr_list[i].use_count++;
+ }
+ spin_unlock(&dr_lock);
+}
+
+void dr_dec_use_count(unsigned long mask)
+{
+ int i;
+
+ spin_lock(&dr_lock);
+ for (i =0; i < DR_MAX; i++) {
+ if (DR_IS_LOCAL(mask, i))
+ dr_free_local(i);
+ }
+ spin_unlock(&dr_lock);
+}
+
+/*
+ * This routine decides if the ptrace request is for enabling or disabling
+ * a debug reg, and accordingly calls dr_alloc() or dr_free().
+ *
+ * gdb uses ptrace to write to debug registers. It assumes that writing to
+ * debug register always succeds and it doesn't check the return value of
+ * ptrace. Now with this new global debug register allocation/freeing,
+ * ptrace request for a local debug register can fail, if the required debug
+ * register is already globally allocated. Since gdb fails to notice this
+ * failure, it sometimes tries to free a debug register, which is not
+ * allocated for it.
+ */
+int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7)
+{
+ int i, dr_shift = 1UL;
+ for (i = 0; i < DR_MAX; i++, dr_shift <<= 2) {
+ if ((old_dr7 ^ new_dr7) & dr_shift) {
+ if (new_dr7 & dr_shift)
+ dr_alloc(i, DR_ALLOC_LOCAL);
+ else
+ dr_free(i);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+EXPORT_SYMBOL(dr_alloc);
+EXPORT_SYMBOL(dr_free);
diff -puN arch/i386/kernel/Makefile~kprobes-debug-regs-2.6.9-rc4-mm1 arch/i386/kernel/Makefile
--- linux-2.6.9-rc4/arch/i386/kernel/Makefile~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/Makefile 2004-10-18 13:49:28.000000000 +0530
@@ -35,6 +35,7 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_DEBUGREG) += debugreg.o

EXTRA_AFLAGS := -traditional

diff -puN arch/i386/kernel/process.c~kprobes-debug-regs-2.6.9-rc4-mm1 arch/i386/kernel/process.c
--- linux-2.6.9-rc4/arch/i386/kernel/process.c~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/process.c 2004-10-18 13:49:18.000000000 +0530
@@ -50,6 +50,7 @@
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
+#include <asm/debugreg.h>

#include <linux/irq.h>
#include <linux/err.h>
@@ -317,6 +318,8 @@ void exit_thread(void)
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
+ if (tsk->thread.debugreg[7])
+ dr_dec_use_count(tsk->thread.debugreg[7]);
perfctr_exit_thread(&tsk->thread);
}

@@ -324,6 +327,8 @@ void flush_thread(void)
{
struct task_struct *tsk = current;

+ if (tsk->thread.debugreg[7])
+ dr_dec_use_count(tsk->thread.debugreg[7]);
memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
@@ -416,6 +421,9 @@ int copy_thread(int nr, unsigned long cl
desc->b = LDT_entry_b(&info);
}

+ if (tsk->thread.debugreg[7])
+ dr_inc_use_count(tsk->thread.debugreg[7]);
+
err = 0;
out:
if (err && p->thread.io_bitmap_ptr) {
@@ -593,6 +601,24 @@ struct task_struct fastcall * __switch_t
/*
* Now maybe reload the debug registers
*/
+#ifdef CONFIG_DEBUGREG
+{
+ /*
+ * Don't reload global debug registers. Don't touch the global debug
+ * register settings in dr7.
+ */
+ unsigned long next_dr7 = next->debugreg[7];
+ if (unlikely(next_dr7)) {
+ if (DR7_L0(next_dr7)) loaddebug(next, 0);
+ if (DR7_L1(next_dr7)) loaddebug(next, 1);
+ if (DR7_L2(next_dr7)) loaddebug(next, 2);
+ if (DR7_L3(next_dr7)) loaddebug(next, 3);
+ /* no 4 and 5 */
+ loaddebug(next, 6);
+ load_process_dr7(next_dr7);
+ }
+}
+#else
if (unlikely(next->debugreg[7])) {
loaddebug(next, 0);
loaddebug(next, 1);
@@ -602,7 +628,7 @@ struct task_struct fastcall * __switch_t
loaddebug(next, 6);
loaddebug(next, 7);
}
-
+#endif
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
handle_io_bitmap(next, tss);

diff -puN arch/i386/kernel/ptrace.c~kprobes-debug-regs-2.6.9-rc4-mm1 arch/i386/kernel/ptrace.c
--- linux-2.6.9-rc4/arch/i386/kernel/ptrace.c~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/ptrace.c 2004-10-18 13:49:18.000000000 +0530
@@ -353,6 +353,11 @@ asmlinkage int sys_ptrace(long request,

addr -= (long) &dummy->u_debugreg;
addr = addr >> 2;
+
+ if (addr == 7 && (enable_debugreg(child->thread.debugreg[addr], data)) < 0) {
+ ret = -EBUSY;
+ break;
+ }
child->thread.debugreg[addr] = data;
ret = 0;
}
diff -puN arch/i386/kernel/signal.c~kprobes-debug-regs-2.6.9-rc4-mm1 arch/i386/kernel/signal.c
--- linux-2.6.9-rc4/arch/i386/kernel/signal.c~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/signal.c 2004-10-18 13:49:18.000000000 +0530
@@ -25,6 +25,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/debugreg.h>
#include "sigframe.h"

#define DEBUG_SIG 0
@@ -600,7 +601,7 @@ int fastcall do_signal(struct pt_regs *r
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
- __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7]));
+ load_process_dr7(current->thread.debugreg[7]);

/* Whee! Actually deliver the signal. */
handle_signal(signr, &info, &ka, oldset, regs);
diff -puN arch/i386/kernel/traps.c~kprobes-debug-regs-2.6.9-rc4-mm1 arch/i386/kernel/traps.c
--- linux-2.6.9-rc4/arch/i386/kernel/traps.c~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/traps.c 2004-10-18 13:49:28.000000000 +0530
@@ -812,9 +812,7 @@ asmlinkage void do_debug(struct pt_regs
* the signal is delivered.
*/
clear_dr7:
- __asm__("movl %0,%%db7"
- : /* no output */
- : "r" (0));
+ load_process_dr7(0);
CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,)
return;

diff -puN include/asm-i386/debugreg.h~kprobes-debug-regs-2.6.9-rc4-mm1 include/asm-i386/debugreg.h
--- linux-2.6.9-rc4/include/asm-i386/debugreg.h~kprobes-debug-regs-2.6.9-rc4-mm1 2004-10-18 13:49:18.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/include/asm-i386/debugreg.h 2004-10-18 13:49:18.000000000 +0530
@@ -61,4 +61,166 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */

+struct debugreg {
+ unsigned long flag;
+ unsigned long use_count;
+};
+
+/* debugreg flags */
+#define DR_UNUSED 0
+#define DR_LOCAL 1
+#define DR_GLOBAL 2
+
+#define DR_MAX 4
+#define DR_ANY DR_MAX + 1
+
+/* global or local allocation requests */
+#define DR_ALLOC_GLOBAL 0
+#define DR_ALLOC_LOCAL 1
+
+#define DR7_RW_SET(dr, regnum, rw) do { \
+ (dr) &= ~(0x3 << (16 + (4 * (regnum)))); \
+ (dr) |= (((rw) & 0x3) << (16 + (4 * (regnum)))); \
+ } while (0)
+
+#define DR7_RW_VAL(dr, regnum) \
+ (((dr) >> (16 + (4 * (regnum)))) & 0x3)
+
+#define DR7_LEN_SET(dr, regnum, len) do { \
+ (dr) &= ~(0x3 << (18 + (4 * (regnum)))); \
+ (dr) |= (((len-1) & 0x3) << (18 + (4 * (regnum)))); \
+ } while (0)
+
+#define DR7_LEN_VAL(dr, regnum) \
+ (((dr) >> (18 + (4 * (regnum)))) & 0x3)
+
+#define DR7_L0(dr) (((dr))&0x1)
+#define DR7_L1(dr) (((dr)>>2)&0x1)
+#define DR7_L2(dr) (((dr)>>4)&0x1)
+#define DR7_L3(dr) (((dr)>>6)&0x1)
+
+#define DR_IS_LOCAL(dr, num) ((dr) & (1UL << (num <<1)))
+
+/* Set the rw, len and global flag in dr7 for a debug register */
+#define SET_DR7(dr, regnum, access, len) do { \
+ DR7_RW_SET(dr, regnum, access); \
+ DR7_LEN_SET(dr, regnum, len); \
+ dr |= (2UL << regnum*2); \
+ } while (0)
+
+/* Disable a debug register by clearing the global/local flag in dr7 */
+#define RESET_DR7(dr, regnum) dr &= ~(3UL << regnum*2)
+
+#define DR7_DR0_BITS 0x000F0003
+#define DR7_DR1_BITS 0x00F0000C
+#define DR7_DR2_BITS 0x0F000030
+#define DR7_DR3_BITS 0xF00000C0
+
+#define DR_TRAP_MASK 0xF
+
+#define DR_TYPE_EXECUTE 0x0
+#define DR_TYPE_WRITE 0x1
+#define DR_TYPE_IO 0x2
+#define DR_TYPE_RW 0x3
+
+#define get_dr(regnum, val) \
+ __asm__("movl %%db" #regnum ", %0" \
+ :"=r" (val))
+static inline unsigned long read_dr(int regnum)
+{
+ unsigned long val = 0;
+ switch (regnum) {
+ case 0: get_dr(0, val); break;
+ case 1: get_dr(1, val); break;
+ case 2: get_dr(2, val); break;
+ case 3: get_dr(3, val); break;
+ case 6: get_dr(6, val); break;
+ case 7: get_dr(7, val); break;
+ }
+ return val;
+}
+#undef get_dr
+
+#define set_dr(regnum, val) \
+ __asm__("movl %0,%%db" #regnum \
+ : /* no output */ \
+ :"r" (val))
+static inline void write_dr(int regnum, unsigned long val)
+{
+ switch (regnum) {
+ case 0: set_dr(0, val); break;
+ case 1: set_dr(1, val); break;
+ case 2: set_dr(2, val); break;
+ case 3: set_dr(3, val); break;
+ case 7: set_dr(7, val); break;
+ }
+ return;
+}
+#undef set_dr
+
+#ifdef CONFIG_DEBUGREG
+/*
+ * Given the debug status register, returns the debug register number
+ * which caused the debug trap.
+ */
+static inline int dr_trap(unsigned int condition)
+{
+ int i, reg_shift = 1UL;
+ for (i = 0; i < DR_MAX; i++, reg_shift <<= 1)
+ if ((condition & reg_shift))
+ return i;
+ return -1;
+}
+
+/*
+ * Given the debug status register, returns the address due to which
+ * the debug trap occured.
+ */
+static inline unsigned long dr_trap_addr(unsigned int condition)
+{
+ int regnum = dr_trap(condition);
+
+ if (regnum == -1)
+ return -1;
+ return read_dr(regnum);
+}
+
+/*
+ * Given the debug status register, returns the type of debug trap:
+ * execute, read/write, write or io.
+ */
+static inline int dr_trap_type(unsigned int condition)
+{
+ int regnum = dr_trap(condition);
+
+ if (regnum == -1)
+ return -1;
+ return DR7_RW_VAL(read_dr(7), regnum);
+}
+
+/* Function declarations */
+
+extern int dr_alloc(int regnum, int flag);
+extern int dr_free(int regnum);
+extern void dr_inc_use_count(unsigned long mask);
+extern void dr_dec_use_count(unsigned long mask);
+extern struct debugreg dr_list[DR_MAX];
+extern unsigned long dr7_global_mask;
+extern int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7);
+
+static inline void load_process_dr7(unsigned long curr_dr7)
+{
+ write_dr(7, (read_dr(7) & dr7_global_mask) | curr_dr7);
+}
+#else
+static inline int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7) { return 0; }
+static inline void load_process_dr7(unsigned long curr_dr7)
+{
+ write_dr(7, curr_dr7);
+}
+
+static void dr_inc_use_count(unsigned long mask) { }
+static void dr_dec_use_count(unsigned long mask) { }
+
+#endif /* CONFIG_DEBUGREG */
#endif

_
Thanks
Prasanna
--

Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<[email protected]>

2004-10-18 08:47:16

by S. P. Prasanna

[permalink] [raw]
Subject: Re: [2/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1


This patch provides a simple interface for kernel-space watchpoints
using processor's debug registers. Using Kwatch interface users can
moniter kernel global variables and dump the debugging information such
as kernel stack, global variables, processor registers.

int register_kwatch(unsigned long addr, u8 length, u8 type,
kwatch_handler_t handler)

-length of the breakpoint can be 1,2 or 4 bytes long.
-type can be read, write, execute.
0 Break on instruction execution only.
1 Break on data writes only.
3 Break on data reads or writes but not instruction fetches.

-return value is the debug register number allocated/used
for setting up this watch point.

Sample code:

This sample code sets a watchpoint on the instruction
excution at do_fork.

struct kwatch kp;
void kwatch_handler(struct kwatch *p, struct pt_regs *regs)
{
.......<do-any-thing>........
}

debug_regs_num = register_kwatch(do_fork, 1, 0, kwatch_handler);

Signed-off-by: Prasanna S Panchamukhi <[email protected]>
---


---

linux-2.6.9-rc4-prasanna/arch/i386/Kconfig.debug | 7
linux-2.6.9-rc4-prasanna/arch/i386/kernel/Makefile | 1
linux-2.6.9-rc4-prasanna/arch/i386/kernel/kwatch.c | 171 +++++++++++++++++++++
linux-2.6.9-rc4-prasanna/arch/i386/kernel/traps.c | 1
linux-2.6.9-rc4-prasanna/include/asm-i386/kwatch.h | 31 +++
5 files changed, 211 insertions(+)

diff -puN arch/i386/Kconfig.debug~kprobes-kernel-watchpoint-2.6.9-rc4-mm1 arch/i386/Kconfig.debug
--- linux-2.6.9-rc4/arch/i386/Kconfig.debug~kprobes-kernel-watchpoint-2.6.9-rc4-mm1 2004-10-18 13:50:25.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/Kconfig.debug 2004-10-18 13:50:25.000000000 +0530
@@ -36,6 +36,13 @@ config DEBUGREG
Global debug register settings will be honoured if this is turned on.
If in doubt, say "N".

+config KWATCH
+ bool "Kwatch points"
+ depends on DEBUG_KERNEL && DEBUGREG
+ help
+ This enables kernel-space watchpoints using processor's debug
+ registers. If in doubt, say "N".
+
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
diff -puN /dev/null arch/i386/kernel/kwatch.c
--- /dev/null 2003-01-30 15:54:37.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/kwatch.c 2004-10-18 13:50:25.000000000 +0530
@@ -0,0 +1,171 @@
+/*
+ * Support for kernel watchpoints.
+ * (C) 2002 Vamsi Krishna S <[email protected]>.
+ */
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/kwatch.h>
+#include <asm/kdebug.h>
+#include <asm/debugreg.h>
+#include <asm/bitops.h>
+
+static struct kwatch kwatch_list[DR_MAX];
+static spinlock_t kwatch_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long kwatch_in_progress; /* currently being handled */
+
+struct dr_info {
+ int debugreg;
+ unsigned long addr;
+ int type;
+};
+
+static inline void write_smp_dr(void *info)
+{
+ struct dr_info *dr = (struct dr_info *)info;
+
+ if (cpu_has_de && dr->type == DR_TYPE_IO)
+ set_in_cr4(X86_CR4_DE);
+ write_dr(dr->debugreg, dr->addr);
+}
+
+/* Update the debug register on all CPUs */
+static void sync_dr(int debugreg, unsigned long addr, int type)
+{
+ struct dr_info dr;
+ dr.debugreg = debugreg;
+ dr.addr = addr;
+ dr.type = type;
+ smp_call_function(write_smp_dr, &dr, 0, 0);
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+ * remain disabled thorough out this function.
+ */
+int kwatch_handler(unsigned long condition, struct pt_regs *regs)
+{
+ int debugreg = dr_trap(condition);
+ unsigned long addr = dr_trap_addr(condition);
+ int retval = 0;
+
+ if (!(condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))) {
+ return 0;
+ }
+
+ /* We're in an interrupt, but this is clear and BUG()-safe. */
+ preempt_disable();
+
+ /* If we are recursing, we already hold the lock. */
+ if (kwatch_in_progress) {
+ goto recursed;
+ }
+ set_bit(debugreg, &kwatch_in_progress);
+
+ spin_lock(&kwatch_lock);
+ if (kwatch_list[debugreg].addr != addr)
+ goto out;
+
+ if (kwatch_list[debugreg].handler) {
+ kwatch_list[debugreg].handler(&kwatch_list[debugreg], regs);
+ }
+
+ if (kwatch_list[debugreg].type == DR_TYPE_EXECUTE)
+ regs->eflags |= RF_MASK;
+out:
+ clear_bit(debugreg, &kwatch_in_progress);
+ spin_unlock(&kwatch_lock);
+ preempt_enable_no_resched();
+ return retval;
+
+recursed:
+ if (kwatch_list[debugreg].type == DR_TYPE_EXECUTE)
+ regs->eflags |= RF_MASK;
+ preempt_enable_no_resched();
+ return 1;
+}
+
+int register_kwatch(unsigned long addr, u8 length, u8 type,
+ kwatch_handler_t handler)
+{
+ int debugreg;
+ unsigned long dr7, flags;
+
+ debugreg = dr_alloc(DR_ANY, DR_ALLOC_GLOBAL);
+ if (debugreg < 0) {
+ return -1;
+ }
+
+ spin_lock_irqsave(&kwatch_lock, flags);
+ kwatch_list[debugreg].addr = addr;
+ kwatch_list[debugreg].length = length;
+ kwatch_list[debugreg].type = type;
+ kwatch_list[debugreg].handler = handler;
+ spin_unlock_irqrestore(&kwatch_lock, flags);
+
+ write_dr(debugreg, (unsigned long)addr);
+ sync_dr(debugreg, (unsigned long)addr, type);
+ if (cpu_has_de && type == DR_TYPE_IO)
+ set_in_cr4(X86_CR4_DE);
+
+ dr7 = read_dr(7);
+ SET_DR7(dr7, debugreg, type, length);
+ write_dr(7, dr7);
+ sync_dr(7, dr7, 0);
+ return debugreg;
+}
+
+void unregister_kwatch(int debugreg)
+{
+ unsigned long flags;
+ unsigned long dr7 = read_dr(7);
+
+ RESET_DR7(dr7, debugreg);
+ write_dr(7, dr7);
+ sync_dr(7, dr7, 0);
+ dr_free(debugreg);
+
+ spin_lock_irqsave(&kwatch_lock, flags);
+ kwatch_list[debugreg].addr = 0;
+ kwatch_list[debugreg].handler = NULL;
+ spin_unlock_irqrestore(&kwatch_lock, flags);
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kwatch_exceptions_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ switch (val) {
+ case DIE_DEBUG:
+ if (kwatch_handler(args->err, args->regs))
+ return NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block kwatch_exceptions_nb = {
+ .notifier_call = kwatch_exceptions_notify,
+ .priority = 0x7ffffffe /* we need to notified second*/
+};
+
+static int __init init_kwatch(void)
+{
+ int err = 0;
+
+ err = register_die_notifier(&kwatch_exceptions_nb);
+ return err;
+}
+
+__initcall(init_kwatch);
+
+EXPORT_SYMBOL_GPL(register_kwatch);
+EXPORT_SYMBOL_GPL(unregister_kwatch);
diff -puN arch/i386/kernel/Makefile~kprobes-kernel-watchpoint-2.6.9-rc4-mm1 arch/i386/kernel/Makefile
--- linux-2.6.9-rc4/arch/i386/kernel/Makefile~kprobes-kernel-watchpoint-2.6.9-rc4-mm1 2004-10-18 13:50:25.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/Makefile 2004-10-18 13:50:25.000000000 +0530
@@ -36,6 +36,7 @@ obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_DEBUGREG) += debugreg.o
+obj-$(CONFIG_KWATCH) += kwatch.o

EXTRA_AFLAGS := -traditional

diff -puN arch/i386/kernel/traps.c~kprobes-kernel-watchpoint-2.6.9-rc4-mm1 arch/i386/kernel/traps.c
--- linux-2.6.9-rc4/arch/i386/kernel/traps.c~kprobes-kernel-watchpoint-2.6.9-rc4-mm1 2004-10-18 13:50:25.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/arch/i386/kernel/traps.c 2004-10-18 13:50:25.000000000 +0530
@@ -43,6 +43,7 @@
#include <asm/io.h>
#include <asm/atomic.h>
#include <asm/debugreg.h>
+#include <asm/kwatch.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/nmi.h>
diff -puN /dev/null include/asm-i386/kwatch.h
--- /dev/null 2003-01-30 15:54:37.000000000 +0530
+++ linux-2.6.9-rc4-prasanna/include/asm-i386/kwatch.h 2004-10-18 13:50:25.000000000 +0530
@@ -0,0 +1,31 @@
+#ifndef _ASM_KWATCH_H
+#define _ASM_KWATCH_H
+/*
+ * Dynamic Probes (kwatch points) support
+ * Vamsi Krishna S <[email protected]>, Oct, 2002
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct kwatch;
+typedef void (*kwatch_handler_t)(struct kwatch *, struct pt_regs *);
+
+struct kwatch {
+ unsigned long addr; /* location of watchpoint */
+ u8 length; /* range of address */
+ u8 type; /* type of watchpoint */
+ kwatch_handler_t handler;
+};
+
+#define RF_MASK 0x00010000
+
+#ifdef CONFIG_KWATCH
+extern int register_kwatch(unsigned long addr, u8 length, u8 type, kwatch_handler_t handler);
+extern void unregister_kwatch(int debugreg);
+extern int kwatch_handler(unsigned long condition, struct pt_regs *regs);
+#else
+static inline int register_kwatch(unsigned long addr, u8 length, u8 type, kwatch_handler_t handler) { return -ENOSYS; }
+static inline void unregister_kwatch(int debugreg) { }
+static inline int kwatch_handler(unsigned long condition, struct pt_regs *regs) { return 0; }
+#endif
+#endif /* _ASM_KWATCH_H */

_
Thanks
Prasanna
--

Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<[email protected]>

2004-10-18 09:22:24

by Keith Owens

[permalink] [raw]
Subject: Re: [1/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1

On Mon, 18 Oct 2004 14:15:25 +0530,
Prasanna S Panchamukhi <[email protected]> wrote:
>This patch provides global debug register settings.
>Used by kernel watchpoint interface patch.

>+config DEBUGREG
>+ bool "Global Debug Registers"
>+ depends on DEBUG_KERNEL
>+ help
>+ Global debug register settings will be honoured if this is turned on.
>+ If in doubt, say "N".
>+

I like most of the patch, but Kconfig is wrong. This option should not
be exposed to end users, instead CONFIG_DEBUGREG should be selected by
the debug code that calls debugreg. IOW, kgdb, kdb or kwatch select
debugreg, not the other way around.

2004-10-18 12:46:08

by Andi Kleen

[permalink] [raw]
Subject: Re: [1/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1

> +config DEBUGREG
> + bool "Global Debug Registers"

I agree with Keith that it shouldn't be user visible. I would always
enable it in fact.

> +{
> + int i;
> + if (flag == DR_ALLOC_LOCAL) {

[...] This all would be simpler if you used lib/idr.c, no?

> +int dr_free(int regnum)
> +{
> + spin_lock(&dr_lock);
> + if (regnum >= DR_MAX || dr_list[regnum].flag == DR_UNUSED) {
> + spin_unlock(&dr_lock);
> + return -1;

This should printk

> +#ifdef CONFIG_DEBUGREG
> +{
> + /*
> + * Don't reload global debug registers. Don't touch the global debug
> + * register settings in dr7.
> + */
> + unsigned long next_dr7 = next->debugreg[7];
> + if (unlikely(next_dr7)) {
> + if (DR7_L0(next_dr7)) loaddebug(next, 0);
> + if (DR7_L1(next_dr7)) loaddebug(next, 1);
> + if (DR7_L2(next_dr7)) loaddebug(next, 2);
> + if (DR7_L3(next_dr7)) loaddebug(next, 3);

I would do this differently - check instead if the registers
are different between the tasks and only reload when different.
This will make updating/freeing more expensive because
you will need to change all tasks, but imho it's worth it.

And then no ifdefs please.

> */
> clear_dr7:
> - __asm__("movl %0,%%db7"
> - : /* no output */
> - : "r" (0));
> + load_process_dr7(0);
> CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,)

That's mm (and should go away anyways because debug notifiers are better)
I would do the patch against mainline so that it can be actually merged.

-Andi

2004-10-22 06:17:15

by S. P. Prasanna

[permalink] [raw]
Subject: Re: [1/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1

Hi,

I have updated the patch with Andi Kleen's and Keith Owens suggestions.
This patch can be applied over 2.6.9-final.
Kindly review and provide your feedback.

Thanks
Prasanna


This patch provides debug register allocation mechanism.
Useful for debuggers like IOW, kgdb, kdb, kernel watchpoint.
---
Signed-off-by: Prasanna S Panchamukhi <[email protected]>

---


---

linux-2.6.9-final-prasanna/arch/i386/Kconfig.debug | 8
linux-2.6.9-final-prasanna/arch/i386/kernel/Makefile | 1
linux-2.6.9-final-prasanna/arch/i386/kernel/debugreg.c | 288 +++++++++++++++++
linux-2.6.9-final-prasanna/arch/i386/kernel/process.c | 25 +
linux-2.6.9-final-prasanna/arch/i386/kernel/ptrace.c | 5
linux-2.6.9-final-prasanna/arch/i386/kernel/signal.c | 3
linux-2.6.9-final-prasanna/arch/i386/kernel/traps.c | 4
linux-2.6.9-final-prasanna/include/asm-i386/debugreg.h | 162 +++++++++
8 files changed, 486 insertions(+), 10 deletions(-)

diff -puN arch/i386/Kconfig.debug~kprobes-debug-regs-2.6.9-final arch/i386/Kconfig.debug
--- linux-2.6.9-final/arch/i386/Kconfig.debug~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/Kconfig.debug 2004-10-22 11:09:34.000000000 +0530
@@ -29,6 +29,14 @@ config KPROBES
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".

+config DEBUGREG
+ bool "Global Debug Registers"
+ depends on DEBUG_KERNEL
+ default off
+ help
+ Global debug register settings need to be enabled by the
+ debuggers using it.
+
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
diff -puN /dev/null arch/i386/kernel/debugreg.c
--- /dev/null 2003-01-30 15:54:37.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/debugreg.c 2004-10-22 11:09:34.000000000 +0530
@@ -0,0 +1,288 @@
+/*
+ * Debug register
+ * arch/i386/kernel/debugreg.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <[email protected]> and
+ * Bharata Rao <[email protected]> to provide debug register
+ * allocation mechanism.
+ * 2004-Oct Updated by Prasanna S Panchamukhi <[email protected]> with
+ * idr_allocations mechanism as suggested by Andi Kleen.
+ */
+/*
+ * This provides a debug register allocation mechanism, to be
+ * used by all debuggers, which need debug registers.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/idr.h>
+#include <asm/system.h>
+#include <asm/debugreg.h>
+
+struct debugreg dr_list[DR_MAX];
+unsigned long dr7_global_mask = 0;
+static spinlock_t dr_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_IDR(debugreg_idr);
+static spinlock_t debugreg_idr_lock = SPIN_LOCK_UNLOCKED;
+
+static inline void set_dr7_global_mask(int regnum)
+{
+ switch (regnum) {
+ case 0: dr7_global_mask |= DR7_DR0_BITS; break;
+ case 1: dr7_global_mask |= DR7_DR1_BITS; break;
+ case 2: dr7_global_mask |= DR7_DR2_BITS; break;
+ case 3: dr7_global_mask |= DR7_DR3_BITS; break;
+ }
+ return;
+}
+
+static inline void clear_dr7_global_mask(int regnum)
+{
+ switch (regnum) {
+ case 0: dr7_global_mask &= ~DR7_DR0_BITS; break;
+ case 1: dr7_global_mask &= ~DR7_DR1_BITS; break;
+ case 2: dr7_global_mask &= ~DR7_DR2_BITS; break;
+ case 3: dr7_global_mask &= ~DR7_DR3_BITS; break;
+ }
+ return;
+}
+
+/*
+ * See if specific debug register is free.
+ */
+static int specific_debugreg(unsigned int regnum)
+{
+ int r, n;
+
+ if (regnum >= DR_MAX)
+ return -EINVAL;
+
+ spin_lock(&debugreg_idr_lock);
+
+ if (idr_find(&debugreg_idr, regnum)) {
+ r = -EBUSY;
+ goto out;
+ }
+
+ r = idr_pre_get(&debugreg_idr, GFP_KERNEL);
+ if (!r) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ r = idr_get_new_above(&debugreg_idr, specific_debugreg, regnum, &n);
+ if (r) {
+ goto out;
+ }
+
+ if (n != regnum) {
+ idr_remove(&debugreg_idr, n);
+ r = -EBUSY;
+ goto out;
+ }
+
+out:
+ spin_unlock(&debugreg_idr_lock);
+ return r;
+}
+
+static int next_free_debugreg(unsigned int *regnum)
+{
+ int r;
+ unsigned int n;
+
+ spin_lock(&debugreg_idr_lock);
+
+ r = idr_pre_get(&debugreg_idr, GFP_KERNEL);
+ if (!r) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ r = idr_get_new(&debugreg_idr, next_free_debugreg, &n);
+ if (r) {
+ goto out;
+ }
+
+ if (n >= DR_MAX) {
+ idr_remove(&debugreg_idr, n);
+ r = -ENOSPC;
+ goto out;
+ }
+
+ *regnum = n;
+
+out:
+ spin_unlock(&debugreg_idr_lock);
+ return r;
+}
+
+static int free_debugreg(int regnum)
+{
+ spin_lock(&debugreg_idr_lock);
+ idr_remove(&debugreg_idr, regnum);
+ spin_unlock(&debugreg_idr_lock);
+}
+
+static int get_dr(int regnum, int flag)
+{
+ if ((flag == DR_ALLOC_GLOBAL) && ((specific_debugreg(regnum)) >= 0)) {
+ dr_list[regnum].flag = DR_GLOBAL;
+ set_dr7_global_mask(regnum);
+ return regnum;
+ }
+ else if (((specific_debugreg(regnum)) >= 0) || (dr_list[regnum].flag == DR_LOCAL)) {
+ dr_list[regnum].use_count++;
+ dr_list[regnum].flag = DR_LOCAL;
+ return regnum;
+ }
+ return -1;
+}
+
+static int get_any_dr(int flag)
+{
+ int i;
+
+ if (flag == DR_ALLOC_LOCAL) {
+ for (i = 0; i < DR_MAX; i++) {
+ if (dr_list[i].flag == DR_LOCAL) {
+ dr_list[i].use_count++;
+ return i;
+ }
+ }
+ if ((next_free_debugreg(&i)) >= 0) {
+ dr_list[i].flag = DR_LOCAL;
+ dr_list[i].use_count = 1;
+ return i;
+ }
+ } else {
+ if ((next_free_debugreg(&i)) >= 0) {
+ dr_list[i].flag = DR_GLOBAL;
+ set_dr7_global_mask(i);
+ return i;
+ }
+ }
+ return -1;
+}
+
+static inline void dr_free_local(int regnum)
+{
+ if (! (--dr_list[regnum].use_count)) {
+ free_debugreg(regnum);
+ dr_list[regnum].flag = DR_UNUSED;
+ }
+ return;
+}
+
+static inline void dr_free_global(int regnum)
+{
+ free_debugreg(regnum);
+ dr_list[regnum].flag = DR_UNUSED;
+ dr_list[regnum].use_count = 0;
+ clear_dr7_global_mask(regnum);
+ return;
+}
+
+int dr_alloc(int regnum, int flag)
+{
+ int ret;
+
+ spin_lock(&dr_lock);
+ if (regnum == DR_ANY) {
+ ret = get_any_dr(flag);
+ } else if (regnum >= DR_MAX) {
+ printk("dr_alloc:Cannot allocate debug register %d\n",regnum);
+ ret = -1;
+ } else {
+ ret = get_dr(regnum, flag);
+ }
+ spin_unlock(&dr_lock);
+ return ret;
+}
+
+int dr_free(int regnum)
+{
+ spin_lock(&dr_lock);
+ if (regnum >= DR_MAX || dr_list[regnum].flag == DR_UNUSED) {
+ spin_unlock(&dr_lock);
+ printk("dr_free:Cannot free debug register %d\n",regnum);
+ return -1;
+ }
+ if (dr_list[regnum].flag == DR_LOCAL)
+ dr_free_local(regnum);
+ else
+ dr_free_global(regnum);
+ spin_unlock(&dr_lock);
+ return 0;
+}
+
+void dr_inc_use_count(unsigned long mask)
+{
+ int i;
+
+ spin_lock(&dr_lock);
+ for (i = 0; i < DR_MAX; i++) {
+ if (DR_IS_LOCAL(mask, i))
+ dr_list[i].use_count++;
+ }
+ spin_unlock(&dr_lock);
+}
+
+void dr_dec_use_count(unsigned long mask)
+{
+ int i;
+
+ spin_lock(&dr_lock);
+ for (i = 0; i < DR_MAX; i++) {
+ if (DR_IS_LOCAL(mask, i))
+ dr_free_local(i);
+ }
+ spin_unlock(&dr_lock);
+}
+
+/*
+ * This routine decides if the ptrace request is for enabling or disabling
+ * a debug reg, and accordingly calls dr_alloc() or dr_free().
+ *
+ * gdb uses ptrace to write to debug registers. It assumes that writing to
+ * debug register always succeds and it doesn't check the return value of
+ * ptrace. Now with this new global debug register allocation/freeing,
+ * ptrace request for a local debug register can fail, if the required debug
+ * register is already globally allocated. Since gdb fails to notice this
+ * failure, it sometimes tries to free a debug register, which is not
+ * allocated for it.
+ */
+int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7)
+{
+ int i, dr_shift = 1UL;
+ for (i = 0; i < DR_MAX; i++, dr_shift <<= 2) {
+ if ((old_dr7 ^ new_dr7) & dr_shift) {
+ if (new_dr7 & dr_shift)
+ dr_alloc(i, DR_ALLOC_LOCAL);
+ else
+ dr_free(i);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+EXPORT_SYMBOL(dr_alloc);
+EXPORT_SYMBOL(dr_free);
diff -puN arch/i386/kernel/Makefile~kprobes-debug-regs-2.6.9-final arch/i386/kernel/Makefile
--- linux-2.6.9-final/arch/i386/kernel/Makefile~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/Makefile 2004-10-22 11:09:34.000000000 +0530
@@ -32,6 +32,7 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_DEBUGREG) += debugreg.o

EXTRA_AFLAGS := -traditional

diff -puN arch/i386/kernel/process.c~kprobes-debug-regs-2.6.9-final arch/i386/kernel/process.c
--- linux-2.6.9-final/arch/i386/kernel/process.c~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/process.c 2004-10-22 11:11:26.000000000 +0530
@@ -49,6 +49,7 @@
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
#endif
+#include <asm/debugreg.h>

#include <linux/irq.h>
#include <linux/err.h>
@@ -316,12 +317,16 @@ void exit_thread(void)
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
put_cpu();
}
+ if (tsk->thread.debugreg[7])
+ dr_dec_use_count(tsk->thread.debugreg[7]);
}

void flush_thread(void)
{
struct task_struct *tsk = current;

+ if (tsk->thread.debugreg[7])
+ dr_dec_use_count(tsk->thread.debugreg[7]);
memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
@@ -413,6 +418,9 @@ int copy_thread(int nr, unsigned long cl
desc->b = LDT_entry_b(&info);
}

+ if (tsk->thread.debugreg[7])
+ dr_inc_use_count(tsk->thread.debugreg[7]);
+
err = 0;
out:
if (err && p->thread.io_bitmap_ptr) {
@@ -588,14 +596,19 @@ struct task_struct fastcall * __switch_t
/*
* Now maybe reload the debug registers
*/
- if (unlikely(next->debugreg[7])) {
- loaddebug(next, 0);
- loaddebug(next, 1);
- loaddebug(next, 2);
- loaddebug(next, 3);
+ /*
+ * Don't reload global debug registers. Don't touch the global debug
+ * register settings in dr7.
+ */
+ unsigned long next_dr7 = next->debugreg[7];
+ if (unlikely(next_dr7)) {
+ if (prev->debugreg[0] != next->debugreg[0]) loaddebug(next, 0);
+ if (prev->debugreg[1] != next->debugreg[1]) loaddebug(next, 1);
+ if (prev->debugreg[2] != next->debugreg[2]) loaddebug(next, 2);
+ if (prev->debugreg[3] != next->debugreg[3]) loaddebug(next, 3);
/* no 4 and 5 */
loaddebug(next, 6);
- loaddebug(next, 7);
+ load_process_dr7(next_dr7);
}

if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
diff -puN arch/i386/kernel/ptrace.c~kprobes-debug-regs-2.6.9-final arch/i386/kernel/ptrace.c
--- linux-2.6.9-final/arch/i386/kernel/ptrace.c~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/ptrace.c 2004-10-22 11:09:34.000000000 +0530
@@ -353,6 +353,11 @@ asmlinkage int sys_ptrace(long request,

addr -= (long) &dummy->u_debugreg;
addr = addr >> 2;
+
+ if (addr == 7 && (enable_debugreg(child->thread.debugreg[addr], data)) < 0) {
+ ret = -EBUSY;
+ break;
+ }
child->thread.debugreg[addr] = data;
ret = 0;
}
diff -puN arch/i386/kernel/signal.c~kprobes-debug-regs-2.6.9-final arch/i386/kernel/signal.c
--- linux-2.6.9-final/arch/i386/kernel/signal.c~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/signal.c 2004-10-22 11:09:34.000000000 +0530
@@ -25,6 +25,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/debugreg.h>
#include "sigframe.h"

#define DEBUG_SIG 0
@@ -600,7 +601,7 @@ int fastcall do_signal(struct pt_regs *r
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
- __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7]));
+ load_process_dr7(current->thread.debugreg[7]);

/* Whee! Actually deliver the signal. */
handle_signal(signr, &info, &ka, oldset, regs);
diff -puN arch/i386/kernel/traps.c~kprobes-debug-regs-2.6.9-final arch/i386/kernel/traps.c
--- linux-2.6.9-final/arch/i386/kernel/traps.c~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/traps.c 2004-10-22 11:09:34.000000000 +0530
@@ -760,9 +760,7 @@ asmlinkage void do_debug(struct pt_regs
* the signal is delivered.
*/
clear_dr7:
- __asm__("movl %0,%%db7"
- : /* no output */
- : "r" (0));
+ load_process_dr7(0);
return;

debug_vm86:
diff -puN include/asm-i386/debugreg.h~kprobes-debug-regs-2.6.9-final include/asm-i386/debugreg.h
--- linux-2.6.9-final/include/asm-i386/debugreg.h~kprobes-debug-regs-2.6.9-final 2004-10-22 11:09:34.000000000 +0530
+++ linux-2.6.9-final-prasanna/include/asm-i386/debugreg.h 2004-10-22 11:09:34.000000000 +0530
@@ -61,4 +61,166 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */

+struct debugreg {
+ unsigned long flag;
+ unsigned long use_count;
+};
+
+/* debugreg flags */
+#define DR_UNUSED 0
+#define DR_LOCAL 1
+#define DR_GLOBAL 2
+
+#define DR_MAX 4
+#define DR_ANY DR_MAX + 1
+
+/* global or local allocation requests */
+#define DR_ALLOC_GLOBAL 0
+#define DR_ALLOC_LOCAL 1
+
+#define DR7_RW_SET(dr, regnum, rw) do { \
+ (dr) &= ~(0x3 << (16 + (4 * (regnum)))); \
+ (dr) |= (((rw) & 0x3) << (16 + (4 * (regnum)))); \
+ } while (0)
+
+#define DR7_RW_VAL(dr, regnum) \
+ (((dr) >> (16 + (4 * (regnum)))) & 0x3)
+
+#define DR7_LEN_SET(dr, regnum, len) do { \
+ (dr) &= ~(0x3 << (18 + (4 * (regnum)))); \
+ (dr) |= (((len-1) & 0x3) << (18 + (4 * (regnum)))); \
+ } while (0)
+
+#define DR7_LEN_VAL(dr, regnum) \
+ (((dr) >> (18 + (4 * (regnum)))) & 0x3)
+
+#define DR7_L0(dr) (((dr))&0x1)
+#define DR7_L1(dr) (((dr)>>2)&0x1)
+#define DR7_L2(dr) (((dr)>>4)&0x1)
+#define DR7_L3(dr) (((dr)>>6)&0x1)
+
+#define DR_IS_LOCAL(dr, num) ((dr) & (1UL << (num <<1)))
+
+/* Set the rw, len and global flag in dr7 for a debug register */
+#define SET_DR7(dr, regnum, access, len) do { \
+ DR7_RW_SET(dr, regnum, access); \
+ DR7_LEN_SET(dr, regnum, len); \
+ dr |= (2UL << regnum*2); \
+ } while (0)
+
+/* Disable a debug register by clearing the global/local flag in dr7 */
+#define RESET_DR7(dr, regnum) dr &= ~(3UL << regnum*2)
+
+#define DR7_DR0_BITS 0x000F0003
+#define DR7_DR1_BITS 0x00F0000C
+#define DR7_DR2_BITS 0x0F000030
+#define DR7_DR3_BITS 0xF00000C0
+
+#define DR_TRAP_MASK 0xF
+
+#define DR_TYPE_EXECUTE 0x0
+#define DR_TYPE_WRITE 0x1
+#define DR_TYPE_IO 0x2
+#define DR_TYPE_RW 0x3
+
+#define get_dr(regnum, val) \
+ __asm__("movl %%db" #regnum ", %0" \
+ :"=r" (val))
+static inline unsigned long read_dr(int regnum)
+{
+ unsigned long val = 0;
+ switch (regnum) {
+ case 0: get_dr(0, val); break;
+ case 1: get_dr(1, val); break;
+ case 2: get_dr(2, val); break;
+ case 3: get_dr(3, val); break;
+ case 6: get_dr(6, val); break;
+ case 7: get_dr(7, val); break;
+ }
+ return val;
+}
+#undef get_dr
+
+#define set_dr(regnum, val) \
+ __asm__("movl %0,%%db" #regnum \
+ : /* no output */ \
+ :"r" (val))
+static inline void write_dr(int regnum, unsigned long val)
+{
+ switch (regnum) {
+ case 0: set_dr(0, val); break;
+ case 1: set_dr(1, val); break;
+ case 2: set_dr(2, val); break;
+ case 3: set_dr(3, val); break;
+ case 7: set_dr(7, val); break;
+ }
+ return;
+}
+#undef set_dr
+
+#ifdef CONFIG_DEBUGREG
+/*
+ * Given the debug status register, returns the debug register number
+ * which caused the debug trap.
+ */
+static inline int dr_trap(unsigned int condition)
+{
+ int i, reg_shift = 1UL;
+ for (i = 0; i < DR_MAX; i++, reg_shift <<= 1)
+ if ((condition & reg_shift))
+ return i;
+ return -1;
+}
+
+/*
+ * Given the debug status register, returns the address due to which
+ * the debug trap occured.
+ */
+static inline unsigned long dr_trap_addr(unsigned int condition)
+{
+ int regnum = dr_trap(condition);
+
+ if (regnum == -1)
+ return -1;
+ return read_dr(regnum);
+}
+
+/*
+ * Given the debug status register, returns the type of debug trap:
+ * execute, read/write, write or io.
+ */
+static inline int dr_trap_type(unsigned int condition)
+{
+ int regnum = dr_trap(condition);
+
+ if (regnum == -1)
+ return -1;
+ return DR7_RW_VAL(read_dr(7), regnum);
+}
+
+/* Function declarations */
+
+extern int dr_alloc(int regnum, int flag);
+extern int dr_free(int regnum);
+extern void dr_inc_use_count(unsigned long mask);
+extern void dr_dec_use_count(unsigned long mask);
+extern struct debugreg dr_list[DR_MAX];
+extern unsigned long dr7_global_mask;
+extern int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7);
+
+static inline void load_process_dr7(unsigned long curr_dr7)
+{
+ write_dr(7, (read_dr(7) & dr7_global_mask) | curr_dr7);
+}
+#else
+static inline int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7) { return 0; }
+static inline void load_process_dr7(unsigned long curr_dr7)
+{
+ write_dr(7, curr_dr7);
+}
+
+static void dr_inc_use_count(unsigned long mask) { }
+static void dr_dec_use_count(unsigned long mask) { }
+
+#endif /* CONFIG_DEBUGREG */
#endif

_
--
Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<[email protected]>

2004-10-22 06:46:30

by S. P. Prasanna

[permalink] [raw]
Subject: Re: [2/2] PATCH Kernel watchpoint interface-2.6.9-rc4-mm1

Hi All,

Please find the updated kernel watchpoint patch below. This patch can
be applied over 2.6.9-final and helpful to setting hardware breakpoints.

Thanks
Prasanna


This patch provides a simple interface for kernel-space watchpoints
using processor's debug registers. Using Kwatch interface users can
monitor kernel global variables and dump the debugging information such
as kernel stack, global variables, processor registers.

int register_kwatch(unsigned long addr, u8 length, u8 type,
kwatch_handler_t handler)

-length of the breakpoint can be 1,2 or 4 bytes long.
-type can be read, write, execute.
0 Break on instruction execution only.
1 Break on data writes only.
3 Break on data reads or writes but not instruction fetches.

-return value is the debug register number allocated/used
for setting up this watch point.

Sample code:

This sample code sets a watchpoint on the instruction
excution at do_fork.

struct kwatch kp;
void kwatch_handler(struct kwatch *p, struct pt_regs *regs)
{
.......<do-any-thing>........
}

debug_regs_num = register_kwatch(do_fork, 1, 0, kwatch_handler);

Signed-off-by: Prasanna S Panchamukhi <[email protected]>
---


---

linux-2.6.9-final-prasanna/arch/i386/Kconfig.debug | 8
linux-2.6.9-final-prasanna/arch/i386/kernel/Makefile | 1
linux-2.6.9-final-prasanna/arch/i386/kernel/kwatch.c | 190 +++++++++++++++++++
linux-2.6.9-final-prasanna/include/asm-i386/kwatch.h | 50 +++++
4 files changed, 249 insertions(+)

diff -puN arch/i386/Kconfig.debug~kprobes-kernel-watchpoint-2.6.9-final arch/i386/Kconfig.debug
--- linux-2.6.9-final/arch/i386/Kconfig.debug~kprobes-kernel-watchpoint-2.6.9-final 2004-10-22 12:01:09.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/Kconfig.debug 2004-10-22 12:07:24.000000000 +0530
@@ -29,6 +29,14 @@ config KPROBES
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".

+config KWATCH
+ bool "Kwatch points"
+ depends on DEBUG_KERNEL
+ select DEBUGREG
+ help
+ This enables kernel-space watchpoints using processor's debug
+ registers. If in doubt, say "N".
+
config DEBUGREG
bool "Global Debug Registers"
depends on DEBUG_KERNEL
diff -puN /dev/null arch/i386/kernel/kwatch.c
--- /dev/null 2003-01-30 15:54:37.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/kwatch.c 2004-10-22 12:01:09.000000000 +0530
@@ -0,0 +1,190 @@
+/*
+ * Kernel Watchpoint interface.
+ * arch/i386/kernel/kwatch.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <[email protected]> for
+ * Kernel Watchpoint implementation.
+ */
+#include <linux/config.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/kwatch.h>
+#include <asm/kdebug.h>
+#include <asm/debugreg.h>
+#include <asm/bitops.h>
+
+static struct kwatch kwatch_list[DR_MAX];
+static spinlock_t kwatch_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long kwatch_in_progress; /* currently being handled */
+
+struct dr_info {
+ int debugreg;
+ unsigned long addr;
+ int type;
+};
+
+static inline void write_smp_dr(void *info)
+{
+ struct dr_info *dr = (struct dr_info *)info;
+
+ if (cpu_has_de && dr->type == DR_TYPE_IO)
+ set_in_cr4(X86_CR4_DE);
+ write_dr(dr->debugreg, dr->addr);
+}
+
+/* Update the debug register on all CPUs */
+static void sync_dr(int debugreg, unsigned long addr, int type)
+{
+ struct dr_info dr;
+ dr.debugreg = debugreg;
+ dr.addr = addr;
+ dr.type = type;
+ smp_call_function(write_smp_dr, &dr, 0, 0);
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+ * remain disabled thorough out this function.
+ */
+int kwatch_handler(unsigned long condition, struct pt_regs *regs)
+{
+ int debugreg = dr_trap(condition);
+ unsigned long addr = dr_trap_addr(condition);
+ int retval = 0;
+
+ if (!(condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))) {
+ return 0;
+ }
+
+ /* We're in an interrupt, but this is clear and BUG()-safe. */
+ preempt_disable();
+
+ /* If we are recursing, we already hold the lock. */
+ if (kwatch_in_progress) {
+ goto recursed;
+ }
+ set_bit(debugreg, &kwatch_in_progress);
+
+ spin_lock(&kwatch_lock);
+ if (kwatch_list[debugreg].addr != addr)
+ goto out;
+
+ if (kwatch_list[debugreg].handler) {
+ kwatch_list[debugreg].handler(&kwatch_list[debugreg], regs);
+ }
+
+ if (kwatch_list[debugreg].type == DR_TYPE_EXECUTE)
+ regs->eflags |= RF_MASK;
+out:
+ clear_bit(debugreg, &kwatch_in_progress);
+ spin_unlock(&kwatch_lock);
+ preempt_enable_no_resched();
+ return retval;
+
+recursed:
+ if (kwatch_list[debugreg].type == DR_TYPE_EXECUTE)
+ regs->eflags |= RF_MASK;
+ preempt_enable_no_resched();
+ return 1;
+}
+
+int register_kwatch(unsigned long addr, u8 length, u8 type,
+ kwatch_handler_t handler)
+{
+ int debugreg;
+ unsigned long dr7, flags;
+
+ debugreg = dr_alloc(DR_ANY, DR_ALLOC_GLOBAL);
+ if (debugreg < 0) {
+ return -1;
+ }
+
+ spin_lock_irqsave(&kwatch_lock, flags);
+ kwatch_list[debugreg].addr = addr;
+ kwatch_list[debugreg].length = length;
+ kwatch_list[debugreg].type = type;
+ kwatch_list[debugreg].handler = handler;
+ spin_unlock_irqrestore(&kwatch_lock, flags);
+
+ write_dr(debugreg, (unsigned long)addr);
+ sync_dr(debugreg, (unsigned long)addr, type);
+ if (cpu_has_de && type == DR_TYPE_IO)
+ set_in_cr4(X86_CR4_DE);
+
+ dr7 = read_dr(7);
+ SET_DR7(dr7, debugreg, type, length);
+ write_dr(7, dr7);
+ sync_dr(7, dr7, 0);
+ return debugreg;
+}
+
+void unregister_kwatch(int debugreg)
+{
+ unsigned long flags;
+ unsigned long dr7 = read_dr(7);
+
+ RESET_DR7(dr7, debugreg);
+ write_dr(7, dr7);
+ sync_dr(7, dr7, 0);
+ dr_free(debugreg);
+
+ spin_lock_irqsave(&kwatch_lock, flags);
+ kwatch_list[debugreg].addr = 0;
+ kwatch_list[debugreg].handler = NULL;
+ spin_unlock_irqrestore(&kwatch_lock, flags);
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kwatch_exceptions_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ switch (val) {
+ case DIE_DEBUG:
+ if (kwatch_handler(args->err, args->regs))
+ return NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block kwatch_exceptions_nb = {
+ .notifier_call = kwatch_exceptions_notify,
+ .priority = 0x7ffffffe /* we need to notified second*/
+};
+
+static int __init init_kwatch(void)
+{
+ int err = 0;
+
+ err = register_die_notifier(&kwatch_exceptions_nb);
+ return err;
+}
+
+__initcall(init_kwatch);
+
+EXPORT_SYMBOL_GPL(register_kwatch);
+EXPORT_SYMBOL_GPL(unregister_kwatch);
diff -puN arch/i386/kernel/Makefile~kprobes-kernel-watchpoint-2.6.9-final arch/i386/kernel/Makefile
--- linux-2.6.9-final/arch/i386/kernel/Makefile~kprobes-kernel-watchpoint-2.6.9-final 2004-10-22 12:01:09.000000000 +0530
+++ linux-2.6.9-final-prasanna/arch/i386/kernel/Makefile 2004-10-22 12:01:09.000000000 +0530
@@ -33,6 +33,7 @@ obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_DEBUGREG) += debugreg.o
+obj-$(CONFIG_KWATCH) += kwatch.o

EXTRA_AFLAGS := -traditional

diff -puN /dev/null include/asm-i386/kwatch.h
--- /dev/null 2003-01-30 15:54:37.000000000 +0530
+++ linux-2.6.9-final-prasanna/include/asm-i386/kwatch.h 2004-10-22 12:01:09.000000000 +0530
@@ -0,0 +1,50 @@
+#ifndef _ASM_KWATCH_H
+#define _ASM_KWATCH_H
+/*
+ * Kernel Watchpoint interface.
+ * include/asm-i386/kwatch.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <[email protected]> for
+ * Kernel Watchpoint implementation.
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct kwatch;
+typedef void (*kwatch_handler_t)(struct kwatch *, struct pt_regs *);
+
+struct kwatch {
+ unsigned long addr; /* location of watchpoint */
+ u8 length; /* range of address */
+ u8 type; /* type of watchpoint */
+ kwatch_handler_t handler;
+};
+
+#define RF_MASK 0x00010000
+
+#ifdef CONFIG_KWATCH
+extern int register_kwatch(unsigned long addr, u8 length, u8 type, kwatch_handler_t handler);
+extern void unregister_kwatch(int debugreg);
+extern int kwatch_handler(unsigned long condition, struct pt_regs *regs);
+#else
+static inline int register_kwatch(unsigned long addr, u8 length, u8 type, kwatch_handler_t handler) { return -ENOSYS; }
+static inline void unregister_kwatch(int debugreg) { }
+static inline int kwatch_handler(unsigned long condition, struct pt_regs *regs) { return 0; }
+#endif
+#endif /* _ASM_KWATCH_H */

_
--

Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<[email protected]>