2013-04-18 18:47:31

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH v2 0/6] ptrace/x86: hw_breakpoints fixes/cleanups

Hello.

On top of "[PATCH 0/5] kill ptrace_{get,put}_breakpoints()".

V2:
- 2/6 was updated as Frederic suggested,

- if (!second_pass && rc)
+ if (rc && !WARN_ON(second_pass))

and the changelog was rewritten. I preserved your ack,
hopefully this is fine.

- 3-6 are new. 4/6 fixes the regression we discussed in V1.

Jan, this fixes watchpoint-zeroaddr test, but that test looks
"too simple", it doesn't check that bp actually works. I created
another test, see below.

Oleg.

------------------------------------------------------------------------------
#define _GNU_SOURCE 1
#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/debugreg.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <assert.h>
#include <assert.h>
#include <stddef.h>

#define DR_GLOBAL_ENABLE 0x2
#define DR_LOCAL_ENABLE 0x1

unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
{
unsigned long dr7;

dr7 = ((len | type) & 0xf)
<< (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
if (enable)
dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));

return dr7;
}

int write_dr(int pid, int dr, unsigned long val)
{
return ptrace(PTRACE_POKEUSER, pid,
offsetof (struct user, u_debugreg[dr]),
val);
}

#define GET_REG(pid, reg) \
ptrace(PTRACE_PEEKUSER, (pid), \
offsetof(struct user, regs.reg), 0)

void *get_ip(int pid)
{
return (void*)GET_REG(pid, rip);
}


void func1(void)
{
printf("HERE_1\n");
}
void func2(void)
{
printf("HERE_2\n");
}

int main(void)
{
int pid, stat;
unsigned long dr7;

pid = fork();
if (!pid) {
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
kill(getpid(), SIGHUP);

func1();
func2();

return 0x13;
}

assert(pid == waitpid(-1, &stat, 0));
assert(WSTOPSIG(stat) == SIGHUP);

dr7 = 0;
dr7 |= encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
dr7 |= encode_dr7(1, 1, DR_RW_EXECUTE, DR_LEN_1);
assert(write_dr(pid, 7, dr7) == 0);

assert(write_dr(pid, 0, (long)func1) == 0);
assert(write_dr(pid, 1, (long)func2) == 0);

assert(ptrace(PTRACE_CONT, pid, 0,0) == 0);
assert(pid == waitpid(-1, &stat, 0));
assert(WSTOPSIG(stat) == SIGTRAP);
assert(get_ip(pid) == func1);

assert(ptrace(PTRACE_CONT, pid, 0,0) == 0);
assert(pid == waitpid(-1, &stat, 0));
assert(WSTOPSIG(stat) == SIGTRAP);
assert(get_ip(pid) == func2);

assert(ptrace(PTRACE_CONT, pid, 0,0) == 0);
assert(pid == waitpid(-1, &stat, 0));
assert(stat == 0x1300);

return 0;
}


2013-04-18 18:47:06

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 1/6] ptrace/x86: simplify the "disable" logic in ptrace_write_dr7()

ptrace_write_dr7() looks unnecessarily overcomplicated. We can
factor out ptrace_modify_breakpoint() and do not do "continue"
twice, just we need to pass the proper "disabled" argument to
ptrace_modify_breakpoint().

Signed-off-by: Oleg Nesterov <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
---
arch/x86/kernel/ptrace.c | 40 +++++++++++++++-------------------------
1 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7a98b21..0649f16 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -637,9 +637,7 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
struct thread_struct *thread = &(tsk->thread);
unsigned long old_dr7;
int i, orig_ret = 0, rc = 0;
- int enabled, second_pass = 0;
- unsigned len, type;
- struct perf_event *bp;
+ int second_pass = 0;

data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
@@ -649,30 +647,22 @@ restore:
* appropriate changes to each.
*/
for (i = 0; i < HBP_NUM; i++) {
- enabled = decode_dr7(data, i, &len, &type);
- bp = thread->ptrace_bps[i];
-
- if (!enabled) {
- if (bp) {
- /*
- * Don't unregister the breakpoints right-away,
- * unless all register_user_hw_breakpoint()
- * requests have succeeded. This prevents
- * any window of opportunity for debug
- * register grabbing by other users.
- */
- if (!second_pass)
- continue;
-
- rc = ptrace_modify_breakpoint(bp, len, type,
- tsk, 1);
- if (rc)
- break;
- }
- continue;
+ unsigned len, type;
+ bool disabled = !decode_dr7(data, i, &len, &type);
+ struct perf_event *bp = thread->ptrace_bps[i];
+
+ if (disabled) {
+ /*
+ * Don't unregister the breakpoints right-away, unless
+ * all register_user_hw_breakpoint() requests have
+ * succeeded. This prevents any window of opportunity
+ * for debug register grabbing by other users.
+ */
+ if (!bp || !second_pass)
+ continue;
}

- rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+ rc = ptrace_modify_breakpoint(bp, len, type, tsk, disabled);
if (rc)
break;
}
--
1.5.5.1

2013-04-18 18:47:18

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 2/6] ptrace/x86: dont delay "disable" till second pass in ptrace_write_dr7()

ptrace_write_dr7() skips ptrace_modify_breakpoint(disabled => true)
unless second_pass, this buys nothing but complicates the code and
means that we always do the main loop twice even if "disabled" was
never true.

The comment says:

Don't unregister the breakpoints right-away,
unless all register_user_hw_breakpoint()
requests have succeeded.

Firstly, we do not do register_user_hw_breakpoint(), it was removed
by 24f1e32c "hw-breakpoints: Rewrite the hw-breakpoints layer on top
of perf events".

We are going to restore register_user_hw_breakpoint() (see the next
patch) but this doesn't matter, after 44234adc "hw-breakpoints: Modify
breakpoints without unregistering them" perf_event_disable() can not
hurt, hw_breakpoint_del() does not free the slot.

Remove the "second_pass" check from the main loop and simplify the
code. Since we have to check "bp != NULL" anyway, the patch also
removes the same check in ptrace_modify_breakpoint() and moves the
comment into ptrace_write_dr7().

With this patch the second pass is only needed to restore the saved
old_dr7. This should never fail, so the patch adds WARN_ON() to catch
the potential problems as Frederic suggested.

Signed-off-by: Oleg Nesterov <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
---
arch/x86/kernel/ptrace.c | 53 +++++++++++++++++----------------------------
1 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0649f16..98b0a2c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -609,14 +609,6 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
int gen_len, gen_type;
struct perf_event_attr attr;

- /*
- * We should have at least an inactive breakpoint at this
- * slot. It means the user is writing dr7 without having
- * written the address register first
- */
- if (!bp)
- return -EINVAL;
-
err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
if (err)
return err;
@@ -634,52 +626,47 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
*/
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
- struct thread_struct *thread = &(tsk->thread);
+ struct thread_struct *thread = &tsk->thread;
unsigned long old_dr7;
- int i, orig_ret = 0, rc = 0;
- int second_pass = 0;
+ bool second_pass = false;
+ int i, rc, ret = 0;

data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+
restore:
- /*
- * Loop through all the hardware breakpoints, making the
- * appropriate changes to each.
- */
+ rc = 0;
for (i = 0; i < HBP_NUM; i++) {
unsigned len, type;
bool disabled = !decode_dr7(data, i, &len, &type);
struct perf_event *bp = thread->ptrace_bps[i];

- if (disabled) {
+ if (!bp) {
+ if (disabled)
+ continue;
/*
- * Don't unregister the breakpoints right-away, unless
- * all register_user_hw_breakpoint() requests have
- * succeeded. This prevents any window of opportunity
- * for debug register grabbing by other users.
+ * We should have at least an inactive breakpoint at
+ * this slot. It means the user is writing dr7 without
+ * having written the address register first.
*/
- if (!bp || !second_pass)
- continue;
+ rc = -EINVAL;
+ break;
}

rc = ptrace_modify_breakpoint(bp, len, type, tsk, disabled);
if (rc)
break;
}
- /*
- * Make a second pass to free the remaining unused breakpoints
- * or to restore the original breakpoints if an error occurred.
- */
- if (!second_pass) {
- second_pass = 1;
- if (rc < 0) {
- orig_ret = rc;
- data = old_dr7;
- }
+
+ /* Restore if the first pass failed, second_pass shouldn't fail. */
+ if (rc && !WARN_ON(second_pass)) {
+ ret = rc;
+ data = old_dr7;
+ second_pass = true;
goto restore;
}

- return orig_ret < 0 ? orig_ret : rc;
+ return ret;
}

/*
--
1.5.5.1

2013-04-18 18:47:59

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 6/6] ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child)

Change ptrace_detach() to call flush_ptrace_hw_breakpoint(child).
This frees the slots for non-ptrace PERF_TYPE_BREAKPOINT users, and
this ensures that the tracee won't be killed by SIGTRAP triggered by
the active breakpoints.

Test-case:

unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
{
unsigned long dr7;

dr7 = ((len | type) & 0xf)
<< (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
if (enable)
dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));

return dr7;
}

int write_dr(int pid, int dr, unsigned long val)
{
return ptrace(PTRACE_POKEUSER, pid,
offsetof (struct user, u_debugreg[dr]),
val);
}

void func(void)
{
}

int main(void)
{
int pid, stat;
unsigned long dr7;

pid = fork();
if (!pid) {
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
kill(getpid(), SIGHUP);

func();
return 0x13;
}

assert(pid == waitpid(-1, &stat, 0));
assert(WSTOPSIG(stat) == SIGHUP);

assert(write_dr(pid, 0, (long)func) == 0);
dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
assert(write_dr(pid, 7, dr7) == 0);

assert(ptrace(PTRACE_DETACH, pid, 0,0) == 0);
assert(pid == waitpid(-1, &stat, 0));
assert(stat == 0x1300);

return 0;
}

Before this patch the child is killed after PTRACE_DETACH.

Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/ptrace.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 776ab3b..33752d9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -467,6 +467,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
/* Architecture-specific hardware disable .. */
ptrace_disable(child);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ flush_ptrace_hw_breakpoint(child);

write_lock_irq(&tasklist_lock);
/*
--
1.5.5.1

2013-04-18 18:48:02

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 4/6] ptrace/x86: ptrace_write_dr7() should create bp if !disabled

24f1e32c "hw-breakpoints: Rewrite the hw-breakpoints layer on top
of perf events" introduced the minor regression. Before this commit

PTRACE_POKEUSER DR7, enableDR0
PTRACE_POKEUSER DR0, address

was perfectly valid, now PTRACE_POKEUSER(DR7) fails if DR0 was not
previously initialized by PTRACE_POKEUSER(DR0).

Change ptrace_write_dr7() to do ptrace_register_breakpoint(addr => 0)
if !bp && !disabled. This fixes watchpoint-zeroaddr from ptrace-tests,
see https://bugzilla.redhat.com/show_bug.cgi?id=660204.

Reported-by: Jan Kratochvil <[email protected]>
Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/kernel/ptrace.c | 17 ++++++++++-------
1 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0526368..5c387b3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -670,13 +670,16 @@ restore:
if (!bp) {
if (disabled)
continue;
- /*
- * We should have at least an inactive breakpoint at
- * this slot. It means the user is writing dr7 without
- * having written the address register first.
- */
- rc = -EINVAL;
- break;
+
+ bp = ptrace_register_breakpoint(tsk,
+ len, type, 0, disabled);
+ if (IS_ERR(bp)) {
+ rc = PTR_ERR(bp);
+ break;
+ }
+
+ thread->ptrace_bps[i] = bp;
+ continue;
}

rc = ptrace_modify_breakpoint(bp, len, type, disabled);
--
1.5.5.1

2013-04-18 18:48:13

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 5/6] ptrace/x86: cleanup ptrace_set_debugreg()

ptrace_set_debugreg() is trivial but looks horrible. Kill the
unnecessary goto's and return's to cleanup the code.

This matches ptrace_get_debugreg() which also needs the trivial
whitespace cleanups.

Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/kernel/ptrace.c | 26 ++++++++------------------
1 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 5c387b3..7461f50 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -703,7 +703,7 @@ restore:
*/
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
{
- struct thread_struct *thread = &(tsk->thread);
+ struct thread_struct *thread = &tsk->thread;
unsigned long val = 0;

if (n < HBP_NUM) {
@@ -713,7 +713,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
val = bp->hw.info.address;
} else if (n == 6) {
val = thread->debugreg6;
- } else if (n == 7) {
+ } else if (n == 7) {
val = thread->ptrace_dr7;
}
return val;
@@ -761,30 +761,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
static int ptrace_set_debugreg(struct task_struct *tsk, int n,
unsigned long val)
{
- struct thread_struct *thread = &(tsk->thread);
- int rc = 0;
-
+ struct thread_struct *thread = &tsk->thread;
/* There are no DR4 or DR5 registers */
- if (n == 4 || n == 5)
- return -EIO;
+ int rc = -EIO;

- if (n == 6) {
- thread->debugreg6 = val;
- goto ret_path;
- }
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
- if (rc)
- return rc;
- }
- /* All that's left is DR7 */
- if (n == 7) {
+ } else if (n == 6) {
+ thread->debugreg6 = val;
+ rc = 0;
+ } else if (n == 7) {
rc = ptrace_write_dr7(tsk, val);
if (!rc)
thread->ptrace_dr7 = val;
}
-
-ret_path:
return rc;
}

--
1.5.5.1

2013-04-18 18:48:50

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 3/6] ptrace/x86: introduce ptrace_register_breakpoint()

No functional changes, preparation.

Extract the "register breakpoint" code from ptrace_get_debugreg()
into the new/generic helper, ptrace_register_breakpoint(). It will
have more users.

The patch also adds another simple helper, ptrace_fill_bp_fields(),
to factor out the arch_bp_generic_fields() logic in register/modify.

Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/kernel/ptrace.c | 86 ++++++++++++++++++++++++++-------------------
1 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 98b0a2c..0526368 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -601,22 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
return dr7;
}

-static int
-ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
- struct task_struct *tsk, int disabled)
+static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
+ int len, int type, bool disabled)
+{
+ int err, bp_len, bp_type;
+
+ err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
+ if (!err) {
+ attr->bp_len = bp_len;
+ attr->bp_type = bp_type;
+ attr->disabled = disabled;
+ }
+
+ return err;
+}
+
+static struct perf_event *
+ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
+ unsigned long addr, bool disabled)
{
- int err;
- int gen_len, gen_type;
struct perf_event_attr attr;
+ int err;
+
+ ptrace_breakpoint_init(&attr);
+ attr.bp_addr = addr;

- err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+ err = ptrace_fill_bp_fields(&attr, len, type, disabled);
if (err)
- return err;
+ return ERR_PTR(err);
+
+ return register_user_hw_breakpoint(&attr, ptrace_triggered,
+ NULL, tsk);
+}

- attr = bp->attr;
- attr.bp_len = gen_len;
- attr.bp_type = gen_type;
- attr.disabled = disabled;
+static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+ int disabled)
+{
+ struct perf_event_attr attr = bp->attr;
+ int err;
+
+ err = ptrace_fill_bp_fields(&attr, len, type, disabled);
+ if (err)
+ return err;

return modify_user_hw_breakpoint(bp, &attr);
}
@@ -653,7 +679,7 @@ restore:
break;
}

- rc = ptrace_modify_breakpoint(bp, len, type, tsk, disabled);
+ rc = ptrace_modify_breakpoint(bp, len, type, disabled);
if (rc)
break;
}
@@ -693,26 +719,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
- struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
- struct perf_event_attr attr;
+ struct perf_event *bp = t->ptrace_bps[nr];
int err = 0;

- if (!t->ptrace_bps[nr]) {
- ptrace_breakpoint_init(&attr);
- /*
- * Put stub len and type to register (reserve) an inactive but
- * correct bp
- */
- attr.bp_addr = addr;
- attr.bp_len = HW_BREAKPOINT_LEN_1;
- attr.bp_type = HW_BREAKPOINT_W;
- attr.disabled = 1;
-
- bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
- NULL, tsk);
-
+ if (!bp) {
/*
+ * Put stub len and type to create an inactive but correct bp.
+ *
* CHECKME: the previous code returned -EIO if the addr wasn't
* a valid task virtual addr. The new one will return -EINVAL in
* this case.
@@ -721,20 +735,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
* writing for the user. And anyway this is the previous
* behaviour.
*/
- if (IS_ERR(bp)) {
+ bp = ptrace_register_breakpoint(tsk,
+ X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
+ addr, true);
+ if (IS_ERR(bp))
err = PTR_ERR(bp);
- goto out;
- }
-
- t->ptrace_bps[nr] = bp;
+ else
+ t->ptrace_bps[nr] = bp;
} else {
- bp = t->ptrace_bps[nr];
+ struct perf_event_attr attr = bp->attr;

- attr = bp->attr;
attr.bp_addr = addr;
err = modify_user_hw_breakpoint(bp, &attr);
}
-out:
+
return err;
}

--
1.5.5.1

2013-04-18 19:04:42

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 3/6] ptrace/x86: introduce ptrace_register_breakpoint()

On 04/18, Oleg Nesterov wrote:
>
> No functional changes, preparation.
>
> Extract the "register breakpoint" code from ptrace_get_debugreg()
> into the new/generic helper, ptrace_register_breakpoint(). It will
> have more users.
>
> The patch also adds another simple helper, ptrace_fill_bp_fields(),
> to factor out the arch_bp_generic_fields() logic in register/modify.

This patch is hardly readable. To simplify the review, I attached the
new/modified code below.

Oleg.

static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
int len, int type, bool disabled)
{
int err, bp_len, bp_type;

err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
if (!err) {
attr->bp_len = bp_len;
attr->bp_type = bp_type;
attr->disabled = disabled;
}

return err;
}

static struct perf_event *
ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
unsigned long addr, bool disabled)
{
struct perf_event_attr attr;
int err;

ptrace_breakpoint_init(&attr);
attr.bp_addr = addr;

err = ptrace_fill_bp_fields(&attr, len, type, disabled);
if (err)
return ERR_PTR(err);

return register_user_hw_breakpoint(&attr, ptrace_triggered,
NULL, tsk);
}

static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
int disabled)
{
struct perf_event_attr attr = bp->attr;
int err;

err = ptrace_fill_bp_fields(&attr, len, type, disabled);
if (err)
return err;

return modify_user_hw_breakpoint(bp, &attr);
}

static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
struct thread_struct *t = &tsk->thread;
struct perf_event *bp = t->ptrace_bps[nr];
int err = 0;

if (!bp) {
/*
* Put stub len and type to create an inactive but correct bp.
*
* CHECKME: the previous code returned -EIO if the addr wasn't
* a valid task virtual addr. The new one will return -EINVAL in
* this case.
* -EINVAL may be what we want for in-kernel breakpoints users,
* but -EIO looks better for ptrace, since we refuse a register
* writing for the user. And anyway this is the previous
* behaviour.
*/
bp = ptrace_register_breakpoint(tsk,
X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
addr, true);
if (IS_ERR(bp))
err = PTR_ERR(bp);
else
t->ptrace_bps[nr] = bp;
} else {
struct perf_event_attr attr = bp->attr;

attr.bp_addr = addr;
err = modify_user_hw_breakpoint(bp, &attr);
}

return err;
}

2013-04-29 15:56:57

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 3/6] ptrace/x86: introduce ptrace_register_breakpoint()

On Thu, Apr 18, 2013 at 08:44:17PM +0200, Oleg Nesterov wrote:
> No functional changes, preparation.
>
> Extract the "register breakpoint" code from ptrace_get_debugreg()
> into the new/generic helper, ptrace_register_breakpoint(). It will
> have more users.
>
> The patch also adds another simple helper, ptrace_fill_bp_fields(),
> to factor out the arch_bp_generic_fields() logic in register/modify.
>
> Signed-off-by: Oleg Nesterov <[email protected]>

Acked-by: Frederic Weisbecker <[email protected]>

2013-04-29 15:59:17

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 4/6] ptrace/x86: ptrace_write_dr7() should create bp if !disabled

On Thu, Apr 18, 2013 at 08:44:19PM +0200, Oleg Nesterov wrote:
> 24f1e32c "hw-breakpoints: Rewrite the hw-breakpoints layer on top
> of perf events" introduced the minor regression. Before this commit
>
> PTRACE_POKEUSER DR7, enableDR0
> PTRACE_POKEUSER DR0, address
>
> was perfectly valid, now PTRACE_POKEUSER(DR7) fails if DR0 was not
> previously initialized by PTRACE_POKEUSER(DR0).
>
> Change ptrace_write_dr7() to do ptrace_register_breakpoint(addr => 0)
> if !bp && !disabled. This fixes watchpoint-zeroaddr from ptrace-tests,
> see https://bugzilla.redhat.com/show_bug.cgi?id=660204.
>
> Reported-by: Jan Kratochvil <[email protected]>
> Signed-off-by: Oleg Nesterov <[email protected]>

Acked-by: Frederic Weisbecker <[email protected]>

2013-04-29 16:00:45

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 5/6] ptrace/x86: cleanup ptrace_set_debugreg()

On Thu, Apr 18, 2013 at 08:44:22PM +0200, Oleg Nesterov wrote:
> ptrace_set_debugreg() is trivial but looks horrible. Kill the
> unnecessary goto's and return's to cleanup the code.
>
> This matches ptrace_get_debugreg() which also needs the trivial
> whitespace cleanups.
>
> Signed-off-by: Oleg Nesterov <[email protected]>

Acked-by: Frederic Weisbecker <[email protected]>

2013-04-29 16:09:40

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 6/6] ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child)

On Thu, Apr 18, 2013 at 08:44:25PM +0200, Oleg Nesterov wrote:
> Change ptrace_detach() to call flush_ptrace_hw_breakpoint(child).
> This frees the slots for non-ptrace PERF_TYPE_BREAKPOINT users, and
> this ensures that the tracee won't be killed by SIGTRAP triggered by
> the active breakpoints.
>
> Test-case:
>
> unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
> {
> unsigned long dr7;
>
> dr7 = ((len | type) & 0xf)
> << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
> if (enable)
> dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
>
> return dr7;
> }
>
> int write_dr(int pid, int dr, unsigned long val)
> {
> return ptrace(PTRACE_POKEUSER, pid,
> offsetof (struct user, u_debugreg[dr]),
> val);
> }
>
> void func(void)
> {
> }
>
> int main(void)
> {
> int pid, stat;
> unsigned long dr7;
>
> pid = fork();
> if (!pid) {
> assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
> kill(getpid(), SIGHUP);
>
> func();
> return 0x13;
> }
>
> assert(pid == waitpid(-1, &stat, 0));
> assert(WSTOPSIG(stat) == SIGHUP);
>
> assert(write_dr(pid, 0, (long)func) == 0);
> dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
> assert(write_dr(pid, 7, dr7) == 0);
>
> assert(ptrace(PTRACE_DETACH, pid, 0,0) == 0);
> assert(pid == waitpid(-1, &stat, 0));
> assert(stat == 0x1300);
>
> return 0;
> }
>
> Before this patch the child is killed after PTRACE_DETACH.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/ptrace.c | 1 +
> 1 files changed, 1 insertions(+), 0 deletions(-)
>
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 776ab3b..33752d9 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -467,6 +467,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
> /* Architecture-specific hardware disable .. */
> ptrace_disable(child);
> clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
> + flush_ptrace_hw_breakpoint(child);

So I assume the tracee is still guaranteed to be stopped at that time, right? Or already
killed? But it can't be concurrently killed given the patch you did that prevented that?

I'm just asking to make sure we can't have two concurrent calls to flush_ptrace_hw_breakpoint()
at the same time.

Also it seems to be a regression since we brought the breakpoint/perf infrastructure. But
backporting this patch prior to "ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL"
might be racy. Hmm...

Thanks.

>
> write_lock_irq(&tasklist_lock);
> /*
> --
> 1.5.5.1
>

2013-04-29 16:44:49

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 6/6] ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child)

On 04/29, Frederic Weisbecker wrote:
>
> On Thu, Apr 18, 2013 at 08:44:25PM +0200, Oleg Nesterov wrote:
> > index 776ab3b..33752d9 100644
> > --- a/kernel/ptrace.c
> > +++ b/kernel/ptrace.c
> > @@ -467,6 +467,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
> > /* Architecture-specific hardware disable .. */
> > ptrace_disable(child);
> > clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
> > + flush_ptrace_hw_breakpoint(child);
>
> So I assume the tracee is still guaranteed to be stopped at that time, right?

Yes.

This is only called by PTRACE_DETACH which requires the stopped tracee,
like all ptrace requests except PTRACE_KILL/INTERRUPT. And only one
thread (the tracer) can do this.

> But it can't be concurrently killed given the patch you did that prevented that?

No, it can't. To clarify, the tracee can't run even if killed.

And just in case... If the tracer exits and does the implicit detach,
ptrace_detach() (and thus flush_ptrace_hw_breakpoint()) is not called,
that would be wrong exactly because we can race with the tracee.

> Also it seems to be a regression since we brought the breakpoint/perf
> infrastructure.

No, I think this (minor) problem is very old... At least, when I look
at 2.6.26 code I do not see anything which coould clear db regs on
detach.

> backporting this patch prior to "ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL"
> might be racy.

Yes, unlikely this is possible or even makes sense, the problem is
minor.



Btw. perhaps flush_ptrace_hw_breakpoint() should also clear the
virtual registers like thread.debugreg7 ? Even without this patch,
flush_ is also called exec.

Oleg.

2013-04-29 23:36:54

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 6/6] ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child)

On Mon, Apr 29, 2013 at 06:40:38PM +0200, Oleg Nesterov wrote:
> On 04/29, Frederic Weisbecker wrote:
> >
> > On Thu, Apr 18, 2013 at 08:44:25PM +0200, Oleg Nesterov wrote:
> > > index 776ab3b..33752d9 100644
> > > --- a/kernel/ptrace.c
> > > +++ b/kernel/ptrace.c
> > > @@ -467,6 +467,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
> > > /* Architecture-specific hardware disable .. */
> > > ptrace_disable(child);
> > > clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
> > > + flush_ptrace_hw_breakpoint(child);
> >
> > So I assume the tracee is still guaranteed to be stopped at that time, right?
>
> Yes.
>
> This is only called by PTRACE_DETACH which requires the stopped tracee,
> like all ptrace requests except PTRACE_KILL/INTERRUPT. And only one
> thread (the tracer) can do this.

Ok.

>
> > But it can't be concurrently killed given the patch you did that prevented that?
>
> No, it can't. To clarify, the tracee can't run even if killed.
>
> And just in case... If the tracer exits and does the implicit detach,
> ptrace_detach() (and thus flush_ptrace_hw_breakpoint()) is not called,
> that would be wrong exactly because we can race with the tracee.

Great!

>
> > Also it seems to be a regression since we brought the breakpoint/perf
> > infrastructure.
>
> No, I think this (minor) problem is very old... At least, when I look
> at 2.6.26 code I do not see anything which coould clear db regs on
> detach.

Ok, if so then the conversion to perf hasn't changed much the picture I think.
Also we are not holding a reference to the tracer from the event (event->owner is NULL)
so I guess we haven't made it buggier. The breakpoints have just stayed persistent across
tracers.

>
> > backporting this patch prior to "ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL"
> > might be racy.
>
> Yes, unlikely this is possible or even makes sense, the problem is
> minor.

Ok.

> Btw. perhaps flush_ptrace_hw_breakpoint() should also clear the
> virtual registers like thread.debugreg7 ? Even without this patch,
> flush_ is also called exec.

Yeah makes sense.

Thanks.

>
> Oleg.
>

2013-04-29 23:37:57

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 6/6] ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child)

On Thu, Apr 18, 2013 at 08:44:25PM +0200, Oleg Nesterov wrote:
> Change ptrace_detach() to call flush_ptrace_hw_breakpoint(child).
> This frees the slots for non-ptrace PERF_TYPE_BREAKPOINT users, and
> this ensures that the tracee won't be killed by SIGTRAP triggered by
> the active breakpoints.
>
> Test-case:
>
> unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len)
> {
> unsigned long dr7;
>
> dr7 = ((len | type) & 0xf)
> << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
> if (enable)
> dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE));
>
> return dr7;
> }
>
> int write_dr(int pid, int dr, unsigned long val)
> {
> return ptrace(PTRACE_POKEUSER, pid,
> offsetof (struct user, u_debugreg[dr]),
> val);
> }
>
> void func(void)
> {
> }
>
> int main(void)
> {
> int pid, stat;
> unsigned long dr7;
>
> pid = fork();
> if (!pid) {
> assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
> kill(getpid(), SIGHUP);
>
> func();
> return 0x13;
> }
>
> assert(pid == waitpid(-1, &stat, 0));
> assert(WSTOPSIG(stat) == SIGHUP);
>
> assert(write_dr(pid, 0, (long)func) == 0);
> dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1);
> assert(write_dr(pid, 7, dr7) == 0);
>
> assert(ptrace(PTRACE_DETACH, pid, 0,0) == 0);
> assert(pid == waitpid(-1, &stat, 0));
> assert(stat == 0x1300);
>
> return 0;
> }
>
> Before this patch the child is killed after PTRACE_DETACH.
>
> Signed-off-by: Oleg Nesterov <[email protected]>

ACK.

Thanks a lot for this very nice series!

2013-04-30 17:55:15

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 6/6] ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child)

On 04/30, Frederic Weisbecker wrote:
>
> On Mon, Apr 29, 2013 at 06:40:38PM +0200, Oleg Nesterov wrote:
>
> > No, I think this (minor) problem is very old... At least, when I look
> > at 2.6.26 code I do not see anything which coould clear db regs on
> > detach.
>
> Ok, if so then the conversion to perf hasn't changed much the picture I think.

God knows ;) afaik there are not too much users.

But fyi, this conversion fixed some problems. For example, the content
of db registers we copied by copy_process iirc.

And, I didn't verify this, it seems that the old code didn't set _RF
bit so PTRACE_CONT should probably trigger the same bp again...

> The breakpoints have just stayed persistent across
> tracers.

Yes.

And. This conversion allows us to implement the generic arch-independent
PTRACE_GET/SET_HWBP api, the current PTRACE_{PEEK,POKE}USR(u_debugreg)
api is really awkward.

> > Btw. perhaps flush_ptrace_hw_breakpoint() should also clear the
> > virtual registers like thread.debugreg7 ? Even without this patch,
> > flush_ is also called exec.
>
> Yeah makes sense.

OK, and probably debugreg6 too, but I need to recheck.


Thanks Frederic for your review!

Oleg.

2013-04-30 18:56:58

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 0/2] (Was: ptrace: PTRACE_DETACH should do flush_ptrace_hw_breakpoint(child))

On 04/30, Oleg Nesterov wrote:
>
> On 04/30, Frederic Weisbecker wrote:
> >
> > > Btw. perhaps flush_ptrace_hw_breakpoint() should also clear the
> > > virtual registers like thread.debugreg7 ? Even without this patch,
> > > flush_ is also called exec.
> >
> > Yeah makes sense.
>
> OK, and probably debugreg6 too, but I need to recheck.

Yes, looks like it should be cleared and we did this before.

And I think this change makes sense in this series, so could you
review this patch too?

Oleg.

2013-04-30 18:57:16

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 1/2] ptrace/x86: flush_ptrace_hw_breakpoint() shoule clear the virtual debug registers

flush_ptrace_hw_breakpoint() destroys the counters set by ptrace, but
"leaks" ->debugreg6 and ->ptrace_dr7.

The problem is minor, but still it doesn't look right and flush_thread()
did this until 66cb5917. Now that PTRACE_DETACH does flush_ too this makes
even more sense.

Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/kernel/hw_breakpoint.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 02f0763..f66ff16 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -393,6 +393,9 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
unregister_hw_breakpoint(t->ptrace_bps[i]);
t->ptrace_bps[i] = NULL;
}
+
+ t->debugreg6 = 0;
+ t->ptrace_dr7 = 0;
}

void hw_breakpoint_restore(void)
--
1.5.5.1

2013-04-30 18:57:37

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 2/2] x86: kill TIF_DEBUG

Because it is not used.

Signed-off-by: Oleg Nesterov <[email protected]>
---
arch/x86/include/asm/thread_info.h | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2cd056e..af71d98 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,7 +89,6 @@ struct thread_info {
#define TIF_FORK 18 /* ret_from_fork */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_DEBUG 21 /* uses debug registers */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
@@ -113,7 +112,6 @@ struct thread_info {
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
#define _TIF_NOHZ (1 << TIF_NOHZ)
-#define _TIF_DEBUG (1 << TIF_DEBUG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
@@ -154,7 +152,7 @@ struct thread_info {
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)

#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)

#define PREEMPT_ACTIVE 0x10000000

--
1.5.5.1