For userspace checkpoint and restore (C/R) a way of getting process state
containing RSEQ configuration is needed.
There are two ways this information is going to be used:
- to re-enable RSEQ for threads which had it enabled before C/R
- to detect if a thread was in a critical section during C/R
Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
using the address registered before C/R.
Detection whether the thread is in a critical section during C/R is needed
to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
before registers are dumped itself doesn't cause RSEQ abort.
Restoring the instruction pointer within the critical section is
problematic because rseq_cs may get cleared before the control is passed
to the migrated application code leading to RSEQ invariants not being
preserved. C/R code will use RSEQ ABI address to find the abort handler
to which the instruction pointer needs to be set.
To achieve above goals expose the RSEQ ABI address and the signature value
with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
This new ptrace request can also be used by debuggers so they are aware
of stops within restartable sequences in progress.
Signed-off-by: Piotr Figiel <[email protected]>
Reviewed-by: Michal Miroslaw <[email protected]>
---
v2:
Applied review comments:
- changed return value from the ptrace request to the size of the
configuration structure
- expanded configuration structure with the flags field and
the rseq abi structure size
v1:
https://lore.kernel.org/lkml/[email protected]/
---
include/uapi/linux/ptrace.h | 10 ++++++++++
kernel/ptrace.c | 25 +++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 83ee45fa634b..3747bf816f9a 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -102,6 +102,16 @@ struct ptrace_syscall_info {
};
};
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
+
+struct ptrace_rseq_configuration {
+ __u64 rseq_abi_pointer;
+ __u32 rseq_abi_size;
+ __u32 signature;
+ __u32 flags;
+ __u32 pad;
+};
+
/*
* These values are stored in task->ptrace_message
* by tracehook_report_syscall_* to describe the current syscall-stop.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61db50f7ca86..76f09456ec4b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -31,6 +31,7 @@
#include <linux/cn_proc.h>
#include <linux/compat.h>
#include <linux/sched/signal.h>
+#include <linux/minmax.h>
#include <asm/syscall.h> /* for syscall_get_* */
@@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child,
return ret;
}
+#ifdef CONFIG_RSEQ
+static long ptrace_get_rseq_configuration(struct task_struct *task,
+ unsigned long size, void __user *data)
+{
+ struct ptrace_rseq_configuration conf = {
+ .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
+ .rseq_abi_size = sizeof(*task->rseq),
+ .signature = task->rseq_sig,
+ .flags = 0,
+ };
+
+ size = min_t(unsigned long, size, sizeof(conf));
+ if (copy_to_user(data, &conf, size))
+ return -EFAULT;
+ return sizeof(conf);
+}
+#endif
+
#ifdef PTRACE_SINGLESTEP
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
#else
@@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long request,
ret = seccomp_get_metadata(child, addr, datavp);
break;
+#ifdef CONFIG_RSEQ
+ case PTRACE_GET_RSEQ_CONFIGURATION:
+ ret = ptrace_get_rseq_configuration(child, addr, datavp);
+ break;
+#endif
+
default:
break;
}
--
2.30.1.766.gb4fecdf3b7-goog
----- On Feb 26, 2021, at 8:51 AM, Piotr Figiel [email protected] wrote:
[...]
> ---
> v2:
> Applied review comments:
> - changed return value from the ptrace request to the size of the
> configuration structure
> - expanded configuration structure with the flags field and
> the rseq abi structure size
>
[...]
> +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
> +
> +struct ptrace_rseq_configuration {
> + __u64 rseq_abi_pointer;
> + __u32 rseq_abi_size;
> + __u32 signature;
> + __u32 flags;
> + __u32 pad;
> +};
> +
[...]
> +#ifdef CONFIG_RSEQ
> +static long ptrace_get_rseq_configuration(struct task_struct *task,
> + unsigned long size, void __user *data)
> +{
> + struct ptrace_rseq_configuration conf = {
> + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
> + .rseq_abi_size = sizeof(*task->rseq),
> + .signature = task->rseq_sig,
> + .flags = 0,
> + };
> +
> + size = min_t(unsigned long, size, sizeof(conf));
> + if (copy_to_user(data, &conf, size))
> + return -EFAULT;
> + return sizeof(conf);
> +}
I think what Florian was after would be:
struct ptrace_rseq_configuration {
__u32 size; /* size of struct ptrace_rseq_configuration */
__u32 flags;
__u64 rseq_abi_pointer;
__u32 signature;
__u32 pad;
};
where:
.size = sizeof(struct ptrace_rseq_configuration),
This way, the configuration structure can be expanded in the future. The
rseq ABI structure is by definition fixed-size, so there is no point in
having its size here.
Florian, did I understand your request correctly, or am I missing your point ?
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
On Fri, 26 Feb 2021 at 16:32, Mathieu Desnoyers
<[email protected]> wrote:
>
> ----- On Feb 26, 2021, at 8:51 AM, Piotr Figiel [email protected] wrote:
> [...]
> > ---
> > v2:
> > Applied review comments:
> > - changed return value from the ptrace request to the size of the
> > configuration structure
> > - expanded configuration structure with the flags field and
> > the rseq abi structure size
> >
> [...]
> > +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
> > +
> > +struct ptrace_rseq_configuration {
> > + __u64 rseq_abi_pointer;
> > + __u32 rseq_abi_size;
> > + __u32 signature;
> > + __u32 flags;
> > + __u32 pad;
> > +};
> > +
> [...]
> > +#ifdef CONFIG_RSEQ
> > +static long ptrace_get_rseq_configuration(struct task_struct *task,
> > + unsigned long size, void __user *data)
> > +{
> > + struct ptrace_rseq_configuration conf = {
> > + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
> > + .rseq_abi_size = sizeof(*task->rseq),
> > + .signature = task->rseq_sig,
> > + .flags = 0,
> > + };
> > +
> > + size = min_t(unsigned long, size, sizeof(conf));
> > + if (copy_to_user(data, &conf, size))
> > + return -EFAULT;
> > + return sizeof(conf);
> > +}
>
> I think what Florian was after would be:
>
> struct ptrace_rseq_configuration {
> __u32 size; /* size of struct ptrace_rseq_configuration */
> __u32 flags;
> __u64 rseq_abi_pointer;
> __u32 signature;
> __u32 pad;
> };
>
> where:
>
> .size = sizeof(struct ptrace_rseq_configuration),
>
> This way, the configuration structure can be expanded in the future. The
> rseq ABI structure is by definition fixed-size, so there is no point in
> having its size here.
>
> Florian, did I understand your request correctly, or am I missing your point ?
In this case returning sizeof(conf) would serve the same purpose, wouldn't it?
Best Regards
Michał Mirosław
[Resent because of HTML mail misfeature...]
Hi,
On Fri, Feb 26, 2021 at 10:32:35AM -0500, Mathieu Desnoyers wrote:
> > +static long ptrace_get_rseq_configuration(struct task_struct *task,
> > + unsigned long size, void __user *data)
> > +{
> > + struct ptrace_rseq_configuration conf = {
> > + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
> > + .rseq_abi_size = sizeof(*task->rseq),
> > + .signature = task->rseq_sig,
> > + .flags = 0,
> > + };
> > +
> > + size = min_t(unsigned long, size, sizeof(conf));
> > + if (copy_to_user(data, &conf, size))
> > + return -EFAULT;
> > + return sizeof(conf);
> > +}
>
> I think what Florian was after would be:
>
> struct ptrace_rseq_configuration {
> __u32 size; /* size of struct ptrace_rseq_configuration */
> __u32 flags;
> __u64 rseq_abi_pointer;
> __u32 signature;
> __u32 pad;
> };
>
> where:
>
> .size = sizeof(struct ptrace_rseq_configuration),
>
> This way, the configuration structure can be expanded in the future. The
> rseq ABI structure is by definition fixed-size, so there is no point in
> having its size here.
Still rseq syscall accepts the rseq ABI structure size as a paremeter.
I think this way the information returned from ptrace is consistent with
the userspace view of the rseq state and allows expansion in case the
ABI structure would have to be extended (in spite of it's current
definition).
The configuration structure still can be expanded as its size is
reported to userspace as return value from the request (in line with
Dmitry's comments).
Best regards, Piotr.
----- On Feb 26, 2021, at 11:04 AM, emmir [email protected] wrote:
> On Fri, 26 Feb 2021 at 16:32, Mathieu Desnoyers
> <[email protected]> wrote:
>>
>> ----- On Feb 26, 2021, at 8:51 AM, Piotr Figiel [email protected] wrote:
>> [...]
>> > ---
>> > v2:
>> > Applied review comments:
>> > - changed return value from the ptrace request to the size of the
>> > configuration structure
>> > - expanded configuration structure with the flags field and
>> > the rseq abi structure size
>> >
>> [...]
>> > +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
>> > +
>> > +struct ptrace_rseq_configuration {
>> > + __u64 rseq_abi_pointer;
>> > + __u32 rseq_abi_size;
>> > + __u32 signature;
>> > + __u32 flags;
>> > + __u32 pad;
>> > +};
>> > +
>> [...]
>> > +#ifdef CONFIG_RSEQ
>> > +static long ptrace_get_rseq_configuration(struct task_struct *task,
>> > + unsigned long size, void __user *data)
>> > +{
>> > + struct ptrace_rseq_configuration conf = {
>> > + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
>> > + .rseq_abi_size = sizeof(*task->rseq),
>> > + .signature = task->rseq_sig,
>> > + .flags = 0,
>> > + };
>> > +
>> > + size = min_t(unsigned long, size, sizeof(conf));
>> > + if (copy_to_user(data, &conf, size))
>> > + return -EFAULT;
>> > + return sizeof(conf);
>> > +}
>>
>> I think what Florian was after would be:
>>
>> struct ptrace_rseq_configuration {
>> __u32 size; /* size of struct ptrace_rseq_configuration */
>> __u32 flags;
>> __u64 rseq_abi_pointer;
>> __u32 signature;
>> __u32 pad;
>> };
>>
>> where:
>>
>> .size = sizeof(struct ptrace_rseq_configuration),
>>
>> This way, the configuration structure can be expanded in the future. The
>> rseq ABI structure is by definition fixed-size, so there is no point in
>> having its size here.
>>
>> Florian, did I understand your request correctly, or am I missing your point ?
>
> In this case returning sizeof(conf) would serve the same purpose, wouldn't it?
If the size is received as input from user-space as well, this can be used to
make sure the kernel detects what size is expected by user-space and act accordingly.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
* Mathieu Desnoyers:
> This way, the configuration structure can be expanded in the future. The
> rseq ABI structure is by definition fixed-size, so there is no point in
> having its size here.
>
> Florian, did I understand your request correctly, or am I missing your
> point ?
No, the idea was that if the kernel ever supports different rseq ABI
sizes on registration (it could as there's a size argument to the rseq
system call), that needs to be communicated to CRIU, so that it restores
with the right size.
I haven't thought about whether it makes sense to make the ptrace
argument struct extensible.
Thanks,
Florian
----- On Feb 26, 2021, at 11:06 AM, Piotr Figiel [email protected] wrote:
> Hi,
>
> On Fri, Feb 26, 2021 at 10:32:35AM -0500, Mathieu Desnoyers wrote:
>> > +static long ptrace_get_rseq_configuration(struct task_struct *task,
>> > + unsigned long size, void __user *data)
>> > +{
>> > + struct ptrace_rseq_configuration conf = {
>> > + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
>> > + .rseq_abi_size = sizeof(*task->rseq),
>> > + .signature = task->rseq_sig,
>> > + .flags = 0,
>> > + };
>> > +
>> > + size = min_t(unsigned long, size, sizeof(conf));
>> > + if (copy_to_user(data, &conf, size))
>> > + return -EFAULT;
>> > + return sizeof(conf);
>> > +}
>>
>> I think what Florian was after would be:
>>
>> struct ptrace_rseq_configuration {
>> __u32 size; /* size of struct ptrace_rseq_configuration */
>> __u32 flags;
>> __u64 rseq_abi_pointer;
>> __u32 signature;
>> __u32 pad;
>> };
>>
>> where:
>>
>> .size = sizeof(struct ptrace_rseq_configuration),
>>
>> This way, the configuration structure can be expanded in the future. The
>> rseq ABI structure is by definition fixed-size, so there is no point in
>> having its size here.
>
> Still rseq syscall accepts the rseq ABI structure size as a paremeter.
> I think this way the information returned from ptrace is consistent with
> the userspace view of the rseq state and allows expansion in case the
> ABI structure would have to be extended (in spite of it's current
> definition).
>
> The configuration structure still can be expanded as its size is
> reported to userspace as return value from the request (in line with
> Dmitry's comments).
Fair enough. And now with the reply from Florian I see that I misunderstood his
point.
Thanks,
Mathieu
>
> Best regards, Piotr.
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
I don't think I can review this patch, I don't understand the problem space
well enough. But just in case, I see nothing wrong in this simple patch.
Feel free to add
Acked-by: Oleg Nesterov <[email protected]>
On 02/26, Piotr Figiel wrote:
>
> For userspace checkpoint and restore (C/R) a way of getting process state
> containing RSEQ configuration is needed.
>
> There are two ways this information is going to be used:
> - to re-enable RSEQ for threads which had it enabled before C/R
> - to detect if a thread was in a critical section during C/R
>
> Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
> using the address registered before C/R.
>
> Detection whether the thread is in a critical section during C/R is needed
> to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
> before registers are dumped itself doesn't cause RSEQ abort.
> Restoring the instruction pointer within the critical section is
> problematic because rseq_cs may get cleared before the control is passed
> to the migrated application code leading to RSEQ invariants not being
> preserved. C/R code will use RSEQ ABI address to find the abort handler
> to which the instruction pointer needs to be set.
>
> To achieve above goals expose the RSEQ ABI address and the signature value
> with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
>
> This new ptrace request can also be used by debuggers so they are aware
> of stops within restartable sequences in progress.
>
> Signed-off-by: Piotr Figiel <[email protected]>
> Reviewed-by: Michal Miroslaw <[email protected]>
>
> ---
> v2:
> Applied review comments:
> - changed return value from the ptrace request to the size of the
> configuration structure
> - expanded configuration structure with the flags field and
> the rseq abi structure size
>
> v1:
> https://lore.kernel.org/lkml/[email protected]/
>
> ---
> include/uapi/linux/ptrace.h | 10 ++++++++++
> kernel/ptrace.c | 25 +++++++++++++++++++++++++
> 2 files changed, 35 insertions(+)
>
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index 83ee45fa634b..3747bf816f9a 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -102,6 +102,16 @@ struct ptrace_syscall_info {
> };
> };
>
> +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
> +
> +struct ptrace_rseq_configuration {
> + __u64 rseq_abi_pointer;
> + __u32 rseq_abi_size;
> + __u32 signature;
> + __u32 flags;
> + __u32 pad;
> +};
> +
> /*
> * These values are stored in task->ptrace_message
> * by tracehook_report_syscall_* to describe the current syscall-stop.
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 61db50f7ca86..76f09456ec4b 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -31,6 +31,7 @@
> #include <linux/cn_proc.h>
> #include <linux/compat.h>
> #include <linux/sched/signal.h>
> +#include <linux/minmax.h>
>
> #include <asm/syscall.h> /* for syscall_get_* */
>
> @@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child,
> return ret;
> }
>
> +#ifdef CONFIG_RSEQ
> +static long ptrace_get_rseq_configuration(struct task_struct *task,
> + unsigned long size, void __user *data)
> +{
> + struct ptrace_rseq_configuration conf = {
> + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
> + .rseq_abi_size = sizeof(*task->rseq),
> + .signature = task->rseq_sig,
> + .flags = 0,
> + };
> +
> + size = min_t(unsigned long, size, sizeof(conf));
> + if (copy_to_user(data, &conf, size))
> + return -EFAULT;
> + return sizeof(conf);
> +}
> +#endif
> +
> #ifdef PTRACE_SINGLESTEP
> #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
> #else
> @@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long request,
> ret = seccomp_get_metadata(child, addr, datavp);
> break;
>
> +#ifdef CONFIG_RSEQ
> + case PTRACE_GET_RSEQ_CONFIGURATION:
> + ret = ptrace_get_rseq_configuration(child, addr, datavp);
> + break;
> +#endif
> +
> default:
> break;
> }
> --
> 2.30.1.766.gb4fecdf3b7-goog
>
----- On Feb 26, 2021, at 8:51 AM, Piotr Figiel [email protected] wrote:
> For userspace checkpoint and restore (C/R) a way of getting process state
> containing RSEQ configuration is needed.
>
> There are two ways this information is going to be used:
> - to re-enable RSEQ for threads which had it enabled before C/R
> - to detect if a thread was in a critical section during C/R
>
> Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
> using the address registered before C/R.
>
> Detection whether the thread is in a critical section during C/R is needed
> to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
> before registers are dumped itself doesn't cause RSEQ abort.
> Restoring the instruction pointer within the critical section is
> problematic because rseq_cs may get cleared before the control is passed
> to the migrated application code leading to RSEQ invariants not being
> preserved. C/R code will use RSEQ ABI address to find the abort handler
> to which the instruction pointer needs to be set.
>
> To achieve above goals expose the RSEQ ABI address and the signature value
> with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
>
> This new ptrace request can also be used by debuggers so they are aware
> of stops within restartable sequences in progress.
>
> Signed-off-by: Piotr Figiel <[email protected]>
> Reviewed-by: Michal Miroslaw <[email protected]>
Reviewed-by: Mathieu Desnoyers <[email protected]>
Thanks!
Mathieu
>
> ---
> v2:
> Applied review comments:
> - changed return value from the ptrace request to the size of the
> configuration structure
> - expanded configuration structure with the flags field and
> the rseq abi structure size
>
> v1:
> https://lore.kernel.org/lkml/[email protected]/
>
> ---
> include/uapi/linux/ptrace.h | 10 ++++++++++
> kernel/ptrace.c | 25 +++++++++++++++++++++++++
> 2 files changed, 35 insertions(+)
>
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index 83ee45fa634b..3747bf816f9a 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -102,6 +102,16 @@ struct ptrace_syscall_info {
> };
> };
>
> +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
> +
> +struct ptrace_rseq_configuration {
> + __u64 rseq_abi_pointer;
> + __u32 rseq_abi_size;
> + __u32 signature;
> + __u32 flags;
> + __u32 pad;
> +};
> +
> /*
> * These values are stored in task->ptrace_message
> * by tracehook_report_syscall_* to describe the current syscall-stop.
> diff --git a/kernel/ptrace.c b/kernel/ptrace.c
> index 61db50f7ca86..76f09456ec4b 100644
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -31,6 +31,7 @@
> #include <linux/cn_proc.h>
> #include <linux/compat.h>
> #include <linux/sched/signal.h>
> +#include <linux/minmax.h>
>
> #include <asm/syscall.h> /* for syscall_get_* */
>
> @@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child,
> return ret;
> }
>
> +#ifdef CONFIG_RSEQ
> +static long ptrace_get_rseq_configuration(struct task_struct *task,
> + unsigned long size, void __user *data)
> +{
> + struct ptrace_rseq_configuration conf = {
> + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
> + .rseq_abi_size = sizeof(*task->rseq),
> + .signature = task->rseq_sig,
> + .flags = 0,
> + };
> +
> + size = min_t(unsigned long, size, sizeof(conf));
> + if (copy_to_user(data, &conf, size))
> + return -EFAULT;
> + return sizeof(conf);
> +}
> +#endif
> +
> #ifdef PTRACE_SINGLESTEP
> #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
> #else
> @@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long
> request,
> ret = seccomp_get_metadata(child, addr, datavp);
> break;
>
> +#ifdef CONFIG_RSEQ
> + case PTRACE_GET_RSEQ_CONFIGURATION:
> + ret = ptrace_get_rseq_configuration(child, addr, datavp);
> + break;
> +#endif
> +
> default:
> break;
> }
> --
> 2.30.1.766.gb4fecdf3b7-goog
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
On Thu, Mar 11, 2021 at 09:51:56AM -0500, Mathieu Desnoyers wrote:
>
>
> ----- On Feb 26, 2021, at 8:51 AM, Piotr Figiel [email protected] wrote:
>
> > For userspace checkpoint and restore (C/R) a way of getting process state
> > containing RSEQ configuration is needed.
> >
> > There are two ways this information is going to be used:
> > - to re-enable RSEQ for threads which had it enabled before C/R
> > - to detect if a thread was in a critical section during C/R
> >
> > Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
> > using the address registered before C/R.
> >
> > Detection whether the thread is in a critical section during C/R is needed
> > to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
> > before registers are dumped itself doesn't cause RSEQ abort.
> > Restoring the instruction pointer within the critical section is
> > problematic because rseq_cs may get cleared before the control is passed
> > to the migrated application code leading to RSEQ invariants not being
> > preserved. C/R code will use RSEQ ABI address to find the abort handler
> > to which the instruction pointer needs to be set.
> >
> > To achieve above goals expose the RSEQ ABI address and the signature value
> > with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
> >
> > This new ptrace request can also be used by debuggers so they are aware
> > of stops within restartable sequences in progress.
> >
> > Signed-off-by: Piotr Figiel <[email protected]>
> > Reviewed-by: Michal Miroslaw <[email protected]>
>
> Reviewed-by: Mathieu Desnoyers <[email protected]>
How do we route this? Do I stick this in tip/sched/core as being an rseq
patch?
----- On Mar 11, 2021, at 11:51 AM, Peter Zijlstra [email protected] wrote:
> On Thu, Mar 11, 2021 at 09:51:56AM -0500, Mathieu Desnoyers wrote:
>>
>>
>> ----- On Feb 26, 2021, at 8:51 AM, Piotr Figiel [email protected] wrote:
>>
>> > For userspace checkpoint and restore (C/R) a way of getting process state
>> > containing RSEQ configuration is needed.
>> >
>> > There are two ways this information is going to be used:
>> > - to re-enable RSEQ for threads which had it enabled before C/R
>> > - to detect if a thread was in a critical section during C/R
>> >
>> > Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
>> > using the address registered before C/R.
>> >
>> > Detection whether the thread is in a critical section during C/R is needed
>> > to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
>> > before registers are dumped itself doesn't cause RSEQ abort.
>> > Restoring the instruction pointer within the critical section is
>> > problematic because rseq_cs may get cleared before the control is passed
>> > to the migrated application code leading to RSEQ invariants not being
>> > preserved. C/R code will use RSEQ ABI address to find the abort handler
>> > to which the instruction pointer needs to be set.
>> >
>> > To achieve above goals expose the RSEQ ABI address and the signature value
>> > with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
>> >
>> > This new ptrace request can also be used by debuggers so they are aware
>> > of stops within restartable sequences in progress.
>> >
>> > Signed-off-by: Piotr Figiel <[email protected]>
>> > Reviewed-by: Michal Miroslaw <[email protected]>
>>
>> Reviewed-by: Mathieu Desnoyers <[email protected]>
>
> How do we route this? Do I stick this in tip/sched/core as being an rseq
> patch?
Sure, it's fine with me, thanks!
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 2c406d3f436db1deea55ec44cc4c3c0861c3c185
Gitweb: https://git.kernel.org/tip/2c406d3f436db1deea55ec44cc4c3c0861c3c185
Author: Piotr Figiel <[email protected]>
AuthorDate: Fri, 26 Feb 2021 14:51:56 +01:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 17 Mar 2021 14:05:40 +01:00
rseq, ptrace: Add PTRACE_GET_RSEQ_CONFIGURATION request
For userspace checkpoint and restore (C/R) a way of getting process state
containing RSEQ configuration is needed.
There are two ways this information is going to be used:
- to re-enable RSEQ for threads which had it enabled before C/R
- to detect if a thread was in a critical section during C/R
Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
using the address registered before C/R.
Detection whether the thread is in a critical section during C/R is needed
to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
before registers are dumped itself doesn't cause RSEQ abort.
Restoring the instruction pointer within the critical section is
problematic because rseq_cs may get cleared before the control is passed
to the migrated application code leading to RSEQ invariants not being
preserved. C/R code will use RSEQ ABI address to find the abort handler
to which the instruction pointer needs to be set.
To achieve above goals expose the RSEQ ABI address and the signature value
with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
This new ptrace request can also be used by debuggers so they are aware
of stops within restartable sequences in progress.
Signed-off-by: Piotr Figiel <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Michal Miroslaw <[email protected]>
Reviewed-by: Mathieu Desnoyers <[email protected]>
Acked-by: Oleg Nesterov <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
include/uapi/linux/ptrace.h | 10 ++++++++++
kernel/ptrace.c | 25 +++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 83ee45f..3747bf8 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -102,6 +102,16 @@ struct ptrace_syscall_info {
};
};
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
+
+struct ptrace_rseq_configuration {
+ __u64 rseq_abi_pointer;
+ __u32 rseq_abi_size;
+ __u32 signature;
+ __u32 flags;
+ __u32 pad;
+};
+
/*
* These values are stored in task->ptrace_message
* by tracehook_report_syscall_* to describe the current syscall-stop.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 821cf17..c71270a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -31,6 +31,7 @@
#include <linux/cn_proc.h>
#include <linux/compat.h>
#include <linux/sched/signal.h>
+#include <linux/minmax.h>
#include <asm/syscall.h> /* for syscall_get_* */
@@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child,
return ret;
}
+#ifdef CONFIG_RSEQ
+static long ptrace_get_rseq_configuration(struct task_struct *task,
+ unsigned long size, void __user *data)
+{
+ struct ptrace_rseq_configuration conf = {
+ .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
+ .rseq_abi_size = sizeof(*task->rseq),
+ .signature = task->rseq_sig,
+ .flags = 0,
+ };
+
+ size = min_t(unsigned long, size, sizeof(conf));
+ if (copy_to_user(data, &conf, size))
+ return -EFAULT;
+ return sizeof(conf);
+}
+#endif
+
#ifdef PTRACE_SINGLESTEP
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
#else
@@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long request,
ret = seccomp_get_metadata(child, addr, datavp);
break;
+#ifdef CONFIG_RSEQ
+ case PTRACE_GET_RSEQ_CONFIGURATION:
+ ret = ptrace_get_rseq_configuration(child, addr, datavp);
+ break;
+#endif
+
default:
break;
}
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 90f093fa8ea48e5d991332cee160b761423d55c1
Gitweb: https://git.kernel.org/tip/90f093fa8ea48e5d991332cee160b761423d55c1
Author: Piotr Figiel <[email protected]>
AuthorDate: Fri, 26 Feb 2021 14:51:56 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Wed, 17 Mar 2021 16:15:39 +01:00
rseq, ptrace: Add PTRACE_GET_RSEQ_CONFIGURATION request
For userspace checkpoint and restore (C/R) a way of getting process state
containing RSEQ configuration is needed.
There are two ways this information is going to be used:
- to re-enable RSEQ for threads which had it enabled before C/R
- to detect if a thread was in a critical section during C/R
Since C/R preserves TLS memory and addresses RSEQ ABI will be restored
using the address registered before C/R.
Detection whether the thread is in a critical section during C/R is needed
to enforce behavior of RSEQ abort during C/R. Attaching with ptrace()
before registers are dumped itself doesn't cause RSEQ abort.
Restoring the instruction pointer within the critical section is
problematic because rseq_cs may get cleared before the control is passed
to the migrated application code leading to RSEQ invariants not being
preserved. C/R code will use RSEQ ABI address to find the abort handler
to which the instruction pointer needs to be set.
To achieve above goals expose the RSEQ ABI address and the signature value
with the new ptrace request PTRACE_GET_RSEQ_CONFIGURATION.
This new ptrace request can also be used by debuggers so they are aware
of stops within restartable sequences in progress.
Signed-off-by: Piotr Figiel <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Michal Miroslaw <[email protected]>
Reviewed-by: Mathieu Desnoyers <[email protected]>
Acked-by: Oleg Nesterov <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
include/uapi/linux/ptrace.h | 10 ++++++++++
kernel/ptrace.c | 25 +++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 83ee45f..3747bf8 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -102,6 +102,16 @@ struct ptrace_syscall_info {
};
};
+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
+
+struct ptrace_rseq_configuration {
+ __u64 rseq_abi_pointer;
+ __u32 rseq_abi_size;
+ __u32 signature;
+ __u32 flags;
+ __u32 pad;
+};
+
/*
* These values are stored in task->ptrace_message
* by tracehook_report_syscall_* to describe the current syscall-stop.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 821cf17..c71270a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -31,6 +31,7 @@
#include <linux/cn_proc.h>
#include <linux/compat.h>
#include <linux/sched/signal.h>
+#include <linux/minmax.h>
#include <asm/syscall.h> /* for syscall_get_* */
@@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child,
return ret;
}
+#ifdef CONFIG_RSEQ
+static long ptrace_get_rseq_configuration(struct task_struct *task,
+ unsigned long size, void __user *data)
+{
+ struct ptrace_rseq_configuration conf = {
+ .rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
+ .rseq_abi_size = sizeof(*task->rseq),
+ .signature = task->rseq_sig,
+ .flags = 0,
+ };
+
+ size = min_t(unsigned long, size, sizeof(conf));
+ if (copy_to_user(data, &conf, size))
+ return -EFAULT;
+ return sizeof(conf);
+}
+#endif
+
#ifdef PTRACE_SINGLESTEP
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
#else
@@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long request,
ret = seccomp_get_metadata(child, addr, datavp);
break;
+#ifdef CONFIG_RSEQ
+ case PTRACE_GET_RSEQ_CONFIGURATION:
+ ret = ptrace_get_rseq_configuration(child, addr, datavp);
+ break;
+#endif
+
default:
break;
}